In [None]:
from datetime import datetime
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit
import matplotlib.pyplot as plt

ticker = "EURUSD=X"
start = "2019-01-01"
end = "2025-01-01"

In [None]:


def download(ticker, start, end=datetime.now()):
  return yf.Ticker(ticker).history(start=start, end=end)

def calculate_rsi(data, p=14):
  df = data.copy()
  df["Move"] = data["Close"] - data["Close"].shift(1)
  df["Up"] = np.where(df["Move"] > 0, df["Move"], 0)
  df["Down"] = np.where(df["Move"] < 0, df["Move"], 0)
  gain = df["Up"].rolling(p).mean()
  loss = df["Down"].rolling(p).mean()

  rs = gain / loss
  return 100 - (100/(1+rs))

def generate_features(data, ma_period=60, rsi_period = 14):
  
  data["SMA"] = data["Close"].rolling(window=ma_period).mean()
  data["trend"] = (data["Open"] - data["SMA"])
  data["RSI"] = calculate_rsi(data, rsi_period)
  data["Target"] = np.where(data["Close"] - data["Open"] > 0, 1, -1)
  data["Target"] = data["Target"].shift(-1)
  features = ["trend", "RSI"]
  return data.dropna(), features

def preprocessing(df, features, test_size=0.2):
  df = df.copy()
  shuffle=False
  assert shuffle == False

  x = df[features]
  assert "Target" not in x.columns

  y = df["Target"]

  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, shuffle=shuffle)

  return x_train, x_test, y_train, y_test

def run_svc_gridsearch(x_train, y_train):
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('svc', SVC())
    ])

    param_grid = {
        'svc__C': [0.1, 1, 10, 100, 1000],
        'svc__kernel': ['linear', 'rbf', 'poly'],
        'svc__gamma': ['scale', 'auto'],
    }

    tscv = TimeSeriesSplit(n_splits=5)

    grid = GridSearchCV(
        estimator=pipe,
        param_grid=param_grid,
        cv=tscv,
        scoring='accuracy',
        n_jobs=-1,
        verbose=1
    )

    grid.fit(x_train, y_train)
    return grid

def classification_metrics(y_true, y_pred, dataset_name=""):
  acc = accuracy_score(y_true, y_pred)
  cm = confusion_matrix(y_true, y_pred)
  report = classification_report(y_true, y_pred, digits=4)
  print(f"{dataset_name} Accuracy: {acc:.4f}")
  print(f"{dataset_name} Confusion Matrix:\n{cm}")
  print(f"{dataset_name} Classification Report:\n{report}\n")
  return acc, cm, report

In [None]:
data = download(ticker=ticker, start=start, end=end)

In [None]:
data, features = generate_features(data)
x_train, x_test, y_train, y_test = preprocessing(data, features)

grid = run_svc_gridsearch(x_train, y_train)

print("Best parameters:", grid.best_params_)
print("Best CV accuracy:", grid.best_score_)
print("Test accuracy:", grid.score(x_test, y_test))

model = grid.best_estimator_
y_train_pred = model.predict(x_train)
y_test_pred = model.predict(x_test)

train_metrics = classification_metrics(y_train, y_train_pred, dataset_name="Train")
test_metrics = classification_metrics(y_test, y_test_pred, dataset_name="Test")
