In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_recall_curve
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
import xgboost as xgb
import pandas as pd
import numpy as np


class TradingAlgoXGB:
    def __init__(self, data, target="Adj Close", test_size=0.2):
        self.data = data
        self.target = target
        self.test_size = test_size
        self.model = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

    def feature_engineering(self):
        self.data['Return'] = self.data[self.target].pct_change()
        self.data['SMA_5'] = self.data[self.target].rolling(window=5).mean()
        self.data.dropna(inplace=True)
        X = self.data[['Return', 'SMA_5']]
        y = (self.data[self.target].shift(-1) > self.data[self.target]).astype(int)

        split_idx = int(len(X) * (1 - self.test_size))
        self.X_train, self.X_test = X.iloc[:split_idx], X.iloc[split_idx:]
        self.y_train, self.y_test = y[:split_idx], y[split_idx:]

    def train_model(self):
        self.feature_engineering()

        self.model = xgb.XGBClassifier()
        self.model.fit(self.X_train, self.y_train)

    def predict(self, test_set):
        if self.model is None:
            raise ValueError("Model is not trained. Call train_model first.")
        return self.model.predict(test_set)

    def cross_validate(self, folds=5):
        if self.model is None:
            raise ValueError("Model is not trained. Call train_model first.")

        self.feature_engineering()

        tscv = TimeSeriesSplit(n_splits=folds)

        scores = cross_val_score(self.model, self.X_test, self.y_test, cv=tscv, scoring='accuracy')
        return np.mean(scores)

    def plot_cumulative_returns(self):

        # Get model predictions
        predictions = self.predict(self.X_test)

        # Compute daily returns only when prediction is 1 (buy)
        returns = self.y_test.pct_change().fillna(0) * predictions

        # Compute cumulative returns
        cumulative_returns = (1 + returns).cumprod()

        plt.figure(figsize=(10, 5))
        plt.plot(cumulative_returns.index, cumulative_returns, label="Cumulative Returns")
        plt.axhline(1, linestyle="--", color="red", alpha=0.7)  # Baseline
        plt.title("Cumulative Returns Over Time")
        plt.xlabel("Date")
        plt.ylabel("Cumulative Return")
        plt.legend()
        plt.show()

    def plot_precision_recall(self, test_set=None, y_true=None):
        if test_set is None or y_true is None:
            test_set = self.X_test
            y_true = self.y_test

        y_scores = self.model.predict_proba(test_set)[:, 1]  # Probabilities for class 1
        precision, recall, _ = precision_recall_curve(y_true, y_scores)
        plt.figure(figsize=(8, 5))
        plt.plot(recall, precision, marker=".")
        plt.xlabel("Recall")
        plt.ylabel("Precision")
        plt.title("Precision-Recall Curve")
        plt.show()

In [None]:
dat = pd.read_csv("SP.csv", index_col="Date", parse_dates=True)

In [None]:
mdl = TradingAlgoXGB(dat)

In [None]:
mdl.train_model()

In [None]:
mdl.cross_validate()

In [None]:
mdl.plot_precision_recall()

In [None]:
mdl.plot_cumulative_returns()