In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from lumibot.backtesting import YahooDataBacktesting
from lumibot.brokers import Alpaca
from datetime import datetime
from backtesting import Backtest
from dotenv import load_dotenv

load_dotenv()

## Helpful Notes
- https://www.geeksforgeeks.org/how-to-use-pytorchs-nnmultiheadattention/
- https://github.com/keras-team/tf-keras/issues/139

In [None]:
if torch.cuda.is_available:
    device = "cuda"
elif torch.mps.is_available:
    device = "mps"
else:
    device = "cpu"
device = "cpu"
print(device)

In [None]:
# Model Parameter
input_size_news = 17
input_size_f_and_g = 14
input_size_commodities = 48
output_zize = 1
hidden_size = 1024
num_layers = 1
dropout = 0.1
embed_dim = 2048
num_heads = 512

# Training Parameter
batch_size = 1
num_epochs = 500 # early stopping
learning_rate = 0.000001
seq_size = 30

# Training Data Path
train_data_path_commodities = "../Data/commodities_train.pkl"
train_data_path_f_and_g = "../Data/train_f_and_g.pkl"
train_data_path_news = "../Data/train_news.pkl"
# Test Data Path
test_data_path_commodities = "../Data/commodities_test.pkl"
test_data_path_f_and_g = "../Data/test_f_and_g.pkl"
test_data_path_news = "../Data/test_news.pkl"

model_path = "../Models/best_model.pt"

backtest_name = "Multi Input inkl stop_loss, cash at risk 1, take_profit, careful short, careful buy"

In [None]:
class Net(nn.Module):
    def __init__(
            self, 
            input_size_commodities: int, 
            input_size_f_and_g: int, 
            input_size_news: int,
            output_size: int = 1,
            hidden_size: int = 10,
            embed_dim: int = 3,
            num_layers: int = 1,
            dropout: float = 0.2,
            num_heads: int = 4,
        ):
        super(Net, self).__init__()
        
        self.lstm_commodities = nn.LSTM(input_size_commodities, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc_commodities = nn.Linear(hidden_size, embed_dim)
        
        self.lstm_f_and_g = nn.LSTM(input_size_f_and_g, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc_f_and_g = nn.Linear(hidden_size, embed_dim)
        
        self.lstm_news = nn.LSTM(input_size_news, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc_news = nn.Linear(hidden_size, embed_dim)
        
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        
        self.norm1 = nn.LayerNorm(embed_dim)
        self.fc = nn.Linear(embed_dim, output_size)
        
    def forward(self, x_commodities, x_f_and_g, x_news):
        out_commodities, _ = self.lstm_commodities(x_commodities)
        out_commodities = self.fc_commodities(out_commodities[:, -1, :]) 
        
        out_f_and_g, _ = self.lstm_f_and_g(x_f_and_g)
        out_f_and_g = self.fc_f_and_g(out_f_and_g[:, -1, :])  
        
        out_news, _ = self.lstm_news(x_news)
        out_news = self.fc_news(out_news[:, -1, :])  
        
        combined = torch.cat([out_commodities.unsqueeze(1), 
                              out_f_and_g.unsqueeze(1), 
                              out_news.unsqueeze(1)], dim=1)

        # Apply attention mechanism
        attn_output, attn_weights = self.attn(combined, combined, combined)
        
        # Normalize and pass through the final output layer
        attn_output = self.norm1(attn_output)
        output = self.fc(attn_output.mean(dim=1))  

        # For plotting during training: store attention weights (optional)
        return output, attn_weights


In [None]:
class FinanceDataset(Dataset):
    def __init__(
        self,
        input_commodities,
        input_f_and_g,
        input_news,
        labels,
        seq_size,
    ):
        
        self.seq_size = seq_size
        self.input_commodities = input_commodities
        self.input_f_and_g = input_f_and_g
        self.input_news = input_news
        self.labels = labels
    
    def __len__(self):
        return len(self.labels) - self.seq_size
    
    def __getitem__(self, idx):
        com = self.input_commodities[idx:idx + self.seq_size]
        fg = self.input_f_and_g[idx:idx + self.seq_size]
        news = self.input_news[idx:idx + self.seq_size]
        y = self.labels[idx + self.seq_size]
        
        com = torch.tensor(com, dtype=torch.float32)
        fg = torch.tensor(fg, dtype=torch.float32)
        news = torch.tensor(news, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)
        
        return com, fg, news, y

In [None]:
# Training
com = pd.read_pickle("../Data/commodities_train.pkl") 
com["Date"] = pd.to_datetime(com["Date"])

fg = pd.read_pickle("../Data/train_f_and_g.pkl")
fg["Date"] = pd.to_datetime(fg["Date"])

news = pd.read_pickle("../Data/train_news.pkl")  
news["Date"] = pd.to_datetime(news["Date"])

# Find the common dates across all datasets
common_dates = set(com["Date"]).intersection(set(fg["Date"])).intersection(set(news["Date"]))

# Filter each dataset to include only rows with the common dates
com = com[com["Date"].isin(common_dates)]
fg = fg[fg["Date"].isin(common_dates)]
news = news[news["Date"].isin(common_dates)]

# Sort by Date
com.sort_values(["Date"], inplace=True)
fg.sort_values(["Date"], inplace=True)
news.sort_values(["Date"], inplace=True)

## Commodities and Y

com_inputs = com.iloc[:, 1:-1]
scaler_com = MinMaxScaler()
scaler_y = MinMaxScaler()
scaled_com_inputs = scaler_com.fit_transform(com_inputs.values)
y = scaler_y.fit_transform(com.iloc[:, -1].values.reshape(-1, 1))

## Fear and Greed

fg_inputs = fg.iloc[:, 1:-1]  
scaler_fg = MinMaxScaler()
scaled_fg_inputs = scaler_fg.fit_transform(fg_inputs.values)

## News

news_inputs = news.iloc[:, 1:-1] 
scaler_news = MinMaxScaler()
scaled_news_inputs = scaler_news.fit_transform(news_inputs.values)

# Initialize dataset and dataloader
dataset = FinanceDataset(
    input_commodities=scaled_com_inputs,
    input_f_and_g=scaled_fg_inputs,
    input_news=scaled_news_inputs,
    labels=y,
    seq_size=seq_size,
)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [None]:
# Test
com_test = pd.read_pickle("../Data/commodities_test.pkl") 
com_test["Date"] = pd.to_datetime(com_test["Date"])

fg_test = pd.read_pickle("../Data/test_f_and_g.pkl")
fg_test["Date"] = pd.to_datetime(fg_test["Date"])

news_test = pd.read_pickle("../Data/test_news.pkl")  
news_test["Date"] = pd.to_datetime(news_test["Date"])

# Find the common dates across all datasets
common_dates_test = set(com_test["Date"]).intersection(set(fg_test["Date"])).intersection(set(news_test["Date"]))

# Filter each dataset to include only rows with the common dates
com_test = com_test[com_test["Date"].isin(common_dates_test)]
fg_test = fg_test[fg_test["Date"].isin(common_dates_test)]
news_test = news_test[news_test["Date"].isin(common_dates_test)]

# Sort by Date
com_test.sort_values(["Date"], inplace=True)
fg_test.sort_values(["Date"], inplace=True)
news_test.sort_values(["Date"], inplace=True)

## Commodities and Y

com_inputs_test = com_test.iloc[:, 1:-1]  # Exclude "Date" and target column

scaled_com_inputs_test = scaler_com.transform(com_inputs_test.values)
y_test = scaler_y.transform(com_test.iloc[:, -1].values.reshape(-1, 1))

## Fear and Greed

fg_inputs_test = fg_test.iloc[:, 1:-1]  # Exclude "Date" and target column
scaled_fg_inputs_test = scaler_fg.transform(fg_inputs_test.values)

## News

news_inputs_test = news_test.iloc[:, 1:-1]  # Exclude "Date" and target column
scaled_news_inputs_test = scaler_news.transform(news_inputs_test.values)

# Initialize dataset and dataloader
dataset = FinanceDataset(
    input_commodities=scaled_com_inputs_test,
    input_f_and_g=scaled_fg_inputs_test,
    input_news=scaled_news_inputs_test,
    labels=y_test,
    seq_size=seq_size,
)
test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [None]:
net = Net(
    input_size_commodities=input_size_commodities,
    input_size_f_and_g=input_size_f_and_g,
    input_size_news=input_size_news,
    output_size=output_zize,
    hidden_size=hidden_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    dropout=dropout,
    num_heads=num_heads,
)
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

net.to(device)

In [None]:
# Create directory for saving models if it doesn't exist
os.makedirs("../Models", exist_ok=True)

# Load model if it exists
if os.path.exists(model_path):
    net.load_state_dict(torch.load(model_path))
    print(f"Model successfully loaded from {model_path}")

# Early stopping and best model saving parameters
patience = 8
best_test_loss = float('inf')
best_train_loss = float('inf')
early_stopping_counter = 0

# Loss tracking
train_losses = []
test_losses = []

# Attention weight tracking
attention_tracker = {'commodities': [], 'fg': [], 'news': []}

# Training loop with early stopping
for epoch in range(num_epochs):
    # Training phase
    net.train()
    epoch_train_loss = 0
    epoch_attn_commodities = []
    epoch_attn_fg = []
    epoch_attn_news = []
    
    for com, fg, news, y in train_loader:
        # Move data to the appropriate device
        com, fg, news, y = com.to(device), fg.to(device), news.to(device), y.to(device)

        # Forward pass
        optimizer.zero_grad()
        outputs, attn_weights = net(com, fg, news)
        loss = criterion(outputs.squeeze(-1), y)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Update metrics
        epoch_train_loss += loss.item()
        epoch_attn_commodities.append(attn_weights[0].item())  
        epoch_attn_fg.append(attn_weights[1].item())           
        epoch_attn_news.append(attn_weights[2].item())         

    avg_train_loss = epoch_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    attention_tracker['commodities'].append(np.mean(epoch_attn_commodities))
    attention_tracker['fg'].append(np.mean(epoch_attn_fg))
    attention_tracker['news'].append(np.mean(epoch_attn_news))
    
    # Validation/Test phase
    net.eval()
    epoch_test_loss = 0
    with torch.no_grad():
        for com, fg, news, y in test_loader:
            # Move data to the appropriate device
            com, fg, news, y = com.to(device), fg.to(device), news.to(device), y.to(device)
            
            # Forward pass
            test_outputs, _ = net(com, fg, news)
            test_loss = criterion(test_outputs.squeeze(-1), y)
            
            # Update test loss
            epoch_test_loss += test_loss.item()
    
    avg_test_loss = epoch_test_loss / len(test_loader)
    test_losses.append(avg_test_loss)
    
    # Save the best model
    if avg_test_loss < best_test_loss:
        best_test_loss = avg_test_loss
        best_train_loss = avg_train_loss
        best_epoch = epoch
        torch.save(net.state_dict(), model_path)
        print(f"Best model saved at epoch {epoch + 1} with Test Loss: {best_test_loss:.4f}")
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
    
    # Print epoch results
    print(f"Epoch {epoch + 1}/{num_epochs} - Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}")
    
    # Early stopping
    if early_stopping_counter >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs. No improvement in Test Loss for {patience} consecutive epochs.")
        break

# Plot Training and Test Loss
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='Training Loss', marker='o')
plt.plot(test_losses, label='Test Loss', marker='o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Test Loss')
plt.legend()
plt.grid(True)
plt.show()

# Plot Attention Weights over Epochs
plt.figure(figsize=(10, 6))
plt.plot(attention_tracker['commodities'], label='Commodities Attention', marker='o')
plt.plot(attention_tracker['fg'], label='Fear & Greed Attention', marker='o')
plt.plot(attention_tracker['news'], label='News Attention', marker='o')
plt.xlabel('Epoch')
plt.ylabel('Average Attention Weight')
plt.title('Attention Weight Changes Across Epochs')
plt.legend()
plt.grid(True)
plt.show()

# Final Summary
print(f"Training completed. Best model saved to {model_path}")
print(f"Best Test Loss: {best_test_loss} at epoch {best_epoch + 1}")
print(f"Best Train Loss: {best_train_loss} at epoch {best_epoch + 1}")
print(f"Final Training Loss: {train_losses[-1]}, Final Test Loss: {test_losses[-1]}")

In [None]:
model = Net(
    input_size_commodities=input_size_commodities,
    input_size_f_and_g=input_size_f_and_g,
    input_size_news=input_size_news,
    output_size=output_zize,
    hidden_size=hidden_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    dropout=dropout,
    num_heads=num_heads,
)

# Load state_dict only
model.load_state_dict(torch.load(model_path)) 
model.eval()

all_predictions = []
all_labels = []

with torch.no_grad():
    for com, fg, news, y in test_loader:
        out, _ = model(com, fg, news) 
        
        all_predictions.append(out.numpy())  
        all_labels.append(y.numpy())

all_predictions = np.concatenate(all_predictions)
all_labels = np.concatenate(all_labels)

pred = scaler_y.inverse_transform(all_predictions.reshape(-1, 1)).reshape(-1, 1)
actual = scaler_y.inverse_transform(all_labels.reshape(-1, 1)).reshape(-1, 1)

# DataFrame erstellen
df_results = pd.DataFrame({
    "pred": pred.flatten(),
    "actual": actual.flatten(),
})

# DataFrame anzeigen
print(df_results)

In [None]:

# Backtesting-Funktion
def backtest_model(model, dataloader, scaler_y, seq_size):
    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        for com, fg, news, y in test_loader:
            output, _ = model(com, fg, news) 
            predictions.extend(output.numpy())
            actuals.extend(y.numpy())

    # Rücktransformation der Vorhersagen und tatsächlichen Werte
    predictions = scaler_y.inverse_transform(np.array(predictions).reshape(-1, 1))
    actuals = scaler_y.inverse_transform(np.array(actuals).reshape(-1, 1))

    return predictions.flatten(), actuals.flatten()

# Backtesting starten
def run_backtest(model_path, seq_size):
    # Modell laden
    model = Net(
        input_size_commodities=input_size_commodities,
        input_size_f_and_g=input_size_f_and_g,
        input_size_news=input_size_news,
        output_size=output_zize,
        hidden_size=hidden_size,
        embed_dim=embed_dim,
        num_layers=num_layers,
        dropout=dropout,
        num_heads=num_heads,
    )
    model.load_state_dict(torch.load(model_path))
    model.eval()

    # Backtesting durchführen
    predictions, actuals = backtest_model(model, test_loader, scaler_y, seq_size)

    # Ergebnisse visualisieren
    plt.figure(figsize=(14, 7))
    plt.plot(predictions, label="Predicted", color="blue")
    plt.plot(actuals, label="Actual", color="orange")
    plt.title("Backtesting Results")
    plt.legend()
    plt.show()

    # Statistiken berechnen
    df_results = pd.DataFrame({"Actual": actuals, "Predicted": predictions})
    mse = ((df_results["Actual"] - df_results["Predicted"]) ** 2).mean()
    mae = np.abs(df_results["Actual"] - df_results["Predicted"]).mean()
    print(f"Mean Squared Error (MSE): {mse:.2f}")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")

    return df_results

test_loader = DataLoader(dataset, batch_size=batch_size)
results = run_backtest(model_path, seq_size)

In [None]:
os.makedirs("logs", exist_ok=True)
os.makedirs("results", exist_ok=True)

# Training
com = pd.read_pickle(train_data_path_commodities) 
com["Date"] = pd.to_datetime(com["Date"])

fg = pd.read_pickle(train_data_path_f_and_g)
fg["Date"] = pd.to_datetime(fg["Date"])

news = pd.read_pickle(train_data_path_news)  
news["Date"] = pd.to_datetime(news["Date"])

# Find the common dates across all datasets
common_dates = set(com["Date"]).intersection(set(fg["Date"])).intersection(set(news["Date"]))

# Filter each dataset to include only rows with the common dates
com = com[com["Date"].isin(common_dates)]
fg = fg[fg["Date"].isin(common_dates)]
news = news[news["Date"].isin(common_dates)]

scaler_com = MinMaxScaler()
scaler_com.fit(com.iloc[:, 1:-1].values)
scaler_fg = MinMaxScaler()
scaler_fg.fit(fg.iloc[:, 1:-1].values)
scaler_news = MinMaxScaler()
scaler_news.fit(news.iloc[:, 1:-1].values)
scaler_y = MinMaxScaler()
scaler_y.fit(com.iloc[:, -1].values.reshape(-1, 1))

test_data_commodities = pd.read_pickle(test_data_path_commodities)
test_data_fg = pd.read_pickle(test_data_path_f_and_g)
test_data_news = pd.read_pickle(test_data_path_news)

model = Net(
    input_size_commodities=input_size_commodities,
    input_size_f_and_g=input_size_f_and_g,
    input_size_news=input_size_news,
    output_size=output_zize,
    hidden_size=hidden_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    dropout=dropout,
    num_heads=num_heads,
)
model.load_state_dict(torch.load(model_path))
model.eval()

ALPACA_CREDS = {
    "API_KEY": os.getenv("ALPACA_API_KEY"),
    "API_SECRET": os.getenv("ALPACA_API_SECRET"),
    "PAPER": True,
}

# Strategy setup
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)
broker = Alpaca(ALPACA_CREDS)

strategy = Backtest(
    name=backtest_name,
    broker=broker,
    parameters={
        "symbol": "spy",
        "cash_at_risk": 0.8,
        "model": model,
        "num_prior_days": 30,
        "datase_com": test_data_commodities,
        "datase_fg": test_data_fg,
        "datase_news": test_data_news,
        "scaler_com": scaler_com,
        "scaler_fg": scaler_fg,
        "scaler_news": scaler_news,
        "scaler_y": scaler_y,
    },
)

# Run backtest
backtest_results = strategy.backtest(
    YahooDataBacktesting,
    start_date,
    end_date,
    name=backtest_name,
    parameters={
        "symbol": "spy",
        "cash_at_risk": 1,
        "model": model,
        "num_prior_days": 30,
        "dataset_com": test_data_commodities,
        "dataset_fg": test_data_fg,
        "dataset_news": test_data_news,
        "scaler_com": scaler_com,
        "scaler_fg": scaler_fg,
        "scaler_news": scaler_news,
        "scaler_y": scaler_y,
    },
    benchmark_asset="SPY",
    show_plot=True,
    show_tearsheet=True,
)

# Save results
pd.DataFrame(backtest_results).to_csv("results/backtest_results.csv.gz", index=False, compression="gzip")

print("Backtesting complete. Results saved to backtest_results.csv.gz.")