In [1]:
from features_labels_extract import extract_features_and_labels
import torch
from torch.utils.data import Dataset
import numpy as np
from sklearn.metrics import (accuracy_score, classification_report, confusion_matrix, 
                             hamming_loss, ConfusionMatrixDisplay, 
                             precision_score, recall_score, f1_score, roc_curve, auc)
import yfinance as yf

In [2]:
tickers = [
    # Technology
    "AAPL",  # Apple Inc.
    "MSFT",  # Microsoft Corporation
    "GOOGL", # Alphabet Inc. (Google)
    "AMZN",  # Amazon.com Inc.
    "TSLA",  # Tesla Inc.
    "NVDA",  # NVIDIA Corporation
    "META",  # Meta Platforms Inc. (Facebook)
    "AMD",   # Advanced Micro Devices Inc.
    "INTC",  # Intel Corporation
    "CSCO",  # Cisco Systems Inc.

    # Finance
    "BRK-B", # Berkshire Hathaway Inc.
    "JPM",   # JPMorgan Chase & Co.
    "V",     # Visa Inc.
    "MA",    # Mastercard Inc.
    "GS",    # Goldman Sachs Group Inc.

    # Consumer Goods & Retail
    "NFLX",  # Netflix Inc.
    "DIS",   # Walt Disney Co.
    "PG",    # Procter & Gamble Co.
    "PEP",   # PepsiCo Inc.
    "KO",    # The Coca-Cola Company
    "MCD",   # McDonald's Corporation
    "WMT",   # Walmart Inc.
    "TGT",   # Target Corporation
    "NKE",   # Nike Inc.

    # Energy & Industrials
    "XOM",   # Exxon Mobil Corporation
    "CVX",   # Chevron Corporation
    "BA",    # Boeing Co.
    "CAT",   # Caterpillar Inc.

    # Healthcare & Pharmaceuticals
    "UNH",   # UnitedHealth Group Incorporated
    "PFE",   # Pfizer Inc.
    "JNJ",   # Johnson & Johnson
    "LLY",   # Eli Lilly and Co.
    "MRNA",  # Moderna Inc.
]

stock_data_train=[]
for stock in tickers:
    stock_data_train.append((stock,yf.download(stock, start="2000-01-01", end="2020-01-01")[["Close", "Volume"]].dropna()))
stock_data_test=[]
for stock in tickers:
    stock_data_test.append((stock, yf.download(stock, start="2020-01-01", end="2025-01-01")[["Close", "Volume"]].dropna()))


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [3]:
class TradingDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32).view(-1, 1)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [4]:
import torch.nn as nn
import torch.nn.functional as F

class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3):
        super(CNNBlock, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, padding='same')
        self.bn = nn.BatchNorm1d(out_channels)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.bn(self.conv(x)))  # Conv -> BN -> ReLU
        x = self.pool(x)  # Max pooling
        x = self.dropout(x)  # Dropout
        return x

class TradingCNN(nn.Module):
    def __init__(self, input_channels, input_length):
        super(TradingCNN, self).__init__()

        # Feature Extraction: 3 CNN layers
        self.conv_layers = nn.Sequential(
            CNNBlock(input_channels, 64),
            CNNBlock(64, 128),
            CNNBlock(128, 256)
        )

        # Fully Connected Layers
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * (input_length // 8), 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 1)  # Output: Binary classification
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss, Precision, Recall
from ignite.handlers import EarlyStopping, ModelCheckpoint, TerminateOnNan
from ignite.contrib.handlers import ProgressBar

def train_evaluate_test_strategy(stock_data_train, stock_data_test, strategy, feature_window_length=100, delay=2, batch_size=32, max_epochs=100):
    # Prepare training data
    X_train, y_train = [], []
    for data in stock_data_train:
        features, labels = extract_features_and_labels(data[0], data[1], strategy, feature_window_length, delay)
        X_train.append(features)
        y_train.append(labels)
    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)

    # Prepare testing data
    X_test, y_test = [], []
    for data in stock_data_test:
        features, labels = extract_features_and_labels(data[0], data[1], strategy, feature_window_length, delay)
        X_test.append(features)
        y_test.append(labels)
    X_test = np.concatenate(X_test)
    y_test = np.concatenate(y_test)

    # Create datasets and dataloaders
    train_dataset = TradingDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_dataset = TradingDataset(X_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Instantiate the model
    input_channels = X_train.shape[1]
    input_length = X_train.shape[2]
    model = TradingCNN(input_channels, input_length).to(device)

    # Define loss function and optimizer
    criterion = nn.BCEWithLogitsLoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Create the trainer and evaluators
    trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
    def thresholded_output_transform(output):
        y_pred, y = output
        y_pred = torch.round(torch.sigmoid(y_pred))  # Apply sigmoid and round to get binary output
        return y_pred, y
    val_metrics = {
        "accuracy": Accuracy(output_transform=thresholded_output_transform),
        "precision": Precision(output_transform=thresholded_output_transform),
        "recall": Recall(output_transform=thresholded_output_transform),
        "loss": Loss(criterion)
    }
    train_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)
    val_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)
    test_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)

    # Validation after each epoch
    @trainer.on(Events.EPOCH_COMPLETED)
    def run_validation(engine):
        val_evaluator.run(train_loader)
        metrics = val_evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_loss = metrics['loss']

    # Early stopping and model checkpointing
    early_stopping = EarlyStopping(patience=10, score_function=lambda engine: -engine.state.metrics["loss"], trainer=trainer)
    val_evaluator.add_event_handler(Events.COMPLETED, early_stopping)
    trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan())

    checkpoint_handler = ModelCheckpoint(dirname='checkpoints', filename_prefix=f'trading_cnn_{strategy}', n_saved=1, create_dir=True, require_empty=False, score_function=lambda e: -e.state.metrics["loss"], score_name="val_loss", global_step_transform=lambda e, _: e.state.epoch)
    val_evaluator.add_event_handler(Events.COMPLETED, checkpoint_handler, {'best_model': model})

    # Run the training loop
    trainer.run(train_loader, max_epochs=max_epochs)
    y_pred_list = []
    y_true_list = []
    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            y_pred = model(X_batch)
            y_pred = torch.round(torch.sigmoid(y_pred))
            y_pred_list.extend(y_pred.cpu().numpy())
            y_true_list.extend(y_batch.cpu().numpy())

    print("\nClassification Report:")
    print(classification_report(y_true_list, y_pred_list))
    # Return the trained model
    return model

In [None]:
# List of strategies to evaluate
strategies = ["momentum", "mean_reversion", "breakout", "rsi", "macd", "vwap"]

# Train, evaluate, and test the model for each strategy
for strategy in strategies:
    print(f"Training, evaluating, and testing strategy: {strategy}")
    model = train_evaluate_test_strategy(stock_data_train, stock_data_test, strategy)
    print(f"Completed training, evaluation, and testing for strategy: {strategy}")

Training, evaluating, and testing strategy: momentum


2025-03-08 12:50:05,290 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training



Classification Report:
              precision    recall  f1-score   support

         0.0       0.64      0.99      0.77      1845
         1.0       0.96      0.38      0.54      1653

    accuracy                           0.70      3498
   macro avg       0.80      0.68      0.66      3498
weighted avg       0.79      0.70      0.66      3498

Completed training, evaluation, and testing for strategy: momentum
Training, evaluating, and testing strategy: mean_reversion

Classification Report:
              precision    recall  f1-score   support

         0.0       0.73      0.99      0.84      2433
         1.0       0.84      0.15      0.25      1065

    accuracy                           0.73      3498
   macro avg       0.78      0.57      0.54      3498
weighted avg       0.76      0.73      0.66      3498

Completed training, evaluation, and testing for strategy: mean_reversion
Training, evaluating, and testing strategy: breakout


2025-03-08 13:02:30,182 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training



Classification Report:
              precision    recall  f1-score   support

         0.0       0.69      0.99      0.81      2350
         1.0       0.91      0.25      0.39      1379

    accuracy                           0.71      3729
   macro avg       0.80      0.62      0.60      3729
weighted avg       0.77      0.71      0.66      3729

Completed training, evaluation, and testing for strategy: breakout
Training, evaluating, and testing strategy: rsi

Classification Report:
              precision    recall  f1-score   support

         0.0       0.64      0.99      0.77      2223
         1.0       0.89      0.16      0.28      1506

    accuracy                           0.65      3729
   macro avg       0.76      0.57      0.52      3729
weighted avg       0.74      0.65      0.57      3729

Completed training, evaluation, and testing for strategy: rsi
Training, evaluating, and testing strategy: macd
