# Test 1

### Import

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from backtesting import Backtest
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os
import datetime

### Device

In [None]:
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f"Using device: {device}")

### Hyperparameter

In [None]:
# Model parameter
input_size = 9
output_size = 1
hidden_size = 10
num_layers = 2
dropout = 0.2

# Training parameter
batch_size = 16
num_epochs = 1
learning_rate = 0.001
seq_size = 10

### LSTM Model

In [None]:
class Net(nn.Module):

    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(Net, self).__init__()
        self.layer_1 = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.layer_2 = nn.LSTM(hidden_size, hidden_size, batch_first=True, dropout=dropout, num_layers=num_layers)
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x, _ = self.layer_1(x)  
        x = torch.relu(x)

        x, _ = self.layer_2(x)
        x = torch.relu(x)

        x = self.output_layer(x[:, -1, :])  
        
        return x

### Dataloader

In [None]:
class FinanceDataset(Dataset):
    def __init__(self, data_path, seq_size):
        self.data = pd.read_pickle(data_path)
        self.seq_size = seq_size
        
        self.inputs = self.data.iloc[:, 1:-1].values
        self.labels = self.data.iloc[:, -1].values
        
        scaler = MinMaxScaler()
        self.inputs = scaler.fit_transform(self.inputs)

    def __len__(self):
        return len(self.inputs) - self.seq_size

    def __getitem__(self, idx):
        x = self.inputs[idx:idx + self.seq_size]
        y = self.labels[idx + self.seq_size] 

        # Convert to tensors
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)
        
        return x, y

### Init

In [None]:
# Initialize model, loss function, optimizer
net = Net(input_size, output_size, hidden_size, num_layers)
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=learning_rate)

In [None]:
# Initialize dataset and dataloader
dataset = FinanceDataset('../Data/train_dax_data.pkl', seq_size=seq_size)
train_loader = DataLoader(dataset, batch_size=batch_size)

### Training

In [None]:
losses = []
# Training loop
for epoch in range(num_epochs):
    running_loss = 0
    for inputs, labels in train_loader:
        print(inputs.shape)
        optimizer.zero_grad()
        outputs = net(inputs.float())
        loss = criterion(torch.squeeze(outputs), labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        # Save model after each epoch
    model_path = f'../Models/model-{epoch + 1}.pt'
    torch.save(net.state_dict(), model_path)

    
    if epoch % 10 == 0:
        learning_rate *= 0.8
        optimizer = optim.SGD(net.parameters(), lr=learning_rate)

    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
    losses.append(running_loss / len(train_loader))

### Backtesting

In [None]:
import pandas as pd
import pandas as pd
import torch
from sklearn.preprocessing import MinMaxScaler
import numpy as np

net.eval()

# Load your DataFrame
data = pd.read_pickle('../Data/test_dax_data.pkl')

test_data = FinanceDataset('../Data/test_dax_data.pkl', seq_size=seq_size)
test_loader = DataLoader(test_data, batch_size=batch_size)
# Select all rows and columns from index 1 to -1 (exclusive)

all_predictions = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        # Forward pass through the network
        out = net(inputs)  # The model expects (batch_size, sequence_length, num_features)
        # Store predictions and labels
        all_predictions.append(out.numpy())  # Convert to numpy for easier handling
        all_labels.append(labels.numpy())

# Concatenate results
all_predictions = np.concatenate(all_predictions)
all_labels = np.concatenate(all_labels)

# Print or analyze the predictions
print(f'Predicted values: {all_predictions.flatten()}')
print(f'Actual values: {all_labels.flatten()}')

# Optionally, save predictions to a CSV file
output_df = pd.DataFrame({'Predicted': all_predictions.flatten(), 'Actual': all_labels.flatten()})


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

threshold = 0.5
predicted_classes = (all_predictions.flatten() > threshold).astype(int)

accuracy = accuracy_score(all_labels.flatten(), predicted_classes)
precision = precision_score(all_labels.flatten(), predicted_classes)
recall = recall_score(all_labels.flatten(), predicted_classes)
f1 = f1_score(all_labels.flatten(), predicted_classes)

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(all_labels.flatten(), predicted_classes)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()


In [None]:
output_df = pd.DataFrame({
    'Predicted': all_predictions.flatten(),
    'Predicted_Class': predicted_classes,
    'Actual': all_labels.flatten()
})
correlation_matrix = output_df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.show()
display(output_df)

In [None]:


models = [f for f in os.listdir("../Models/") if os.path.isfile(os.path.join("../Models/", f))]
results = []


for model_name in models:

    model_path = f"../Models/{model_name}"
    model = Net(input_size, output_size, hidden_size, num_layers)

    # Load state_dict only
    model.load_state_dict(torch.load(model_path))  # Do not use weights_only
    model.eval()

    ALPACA_CREDS = {
        "API_KEY": os.getenv("ALPACA_API_KEY"), 
        "API_SECRET": os.getenv("ALPACA_API_SECRET"), 
        "PAPER": True
    }

    # Strategy setup
    start_date = datetime(2023, 11, 1)
    end_date = datetime(2023, 12, 31)
    broker = Alpaca(ALPACA_CREDS)

    # Instantiate and run the strategy
    strategy = Backtest(
        name=model_name,  
        broker=broker,
        parameters={
            "symbol": "DAX",
            "cash_at_risk": 0.5,
            "model": model,
            "num_prior_days": 5
        }
    )

    # Run the backtest
    backtest_results = strategy.backtest(
        YahooDataBacktesting,
        start_date,
        end_date,
        parameters={"symbol": "DAX", "cash_at_risk": 0.5, "model": model}
    )
    backtest_results = pd.DataFrame(backtest_results)
    backtest_results["model"] = model_name
    results.append(backtest_results)

# Save the results to a CSV file

pd.concat(results, ignore_index=True).to_csv("results.csv", index=False)

print("Backtesting complete. Results saved to backtest_results.csv.")


In [None]:
display(pd.read_csv("results.csv"))