In [1]:

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import *
from sklearn.decomposition import *
from sklearn.preprocessing import *
from sklearn.metrics import *
import torch
from torch import nn
from torch.utils.data import *
from torchvision import datasets
from torchvision.transforms import *


# Define the hyperparameters
input_size = 7 # Number of features
hidden_size = 16 # Size of the hidden state
num_layers = 1 # Number of LSTM layers
output_size = 1 # Number of classes
batch_size = 32 # Size of the mini-batch
num_epochs = 20 # Number of training epochs
learning_rate = 0.01 # Learning rate for the optimizer
seq_len = 10 # Sequence length

root_results_dir = "/Users/newuser/Projects/robust-algo-trader/data/trades_seq_EURUSD_H1_2007_2023.csv"
df = pd.read_csv(f"{root_results_dir}")

# take first 4096 rows
df = df.iloc[:4000]

y = df["label"]
X = df[["position", "RSI",  "ADX","ATR", "MFI", "CCI", "AROON_Oscillator"]]



# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
split_index = int(len(X) * 0.5)  # 60% training, 40% testing
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.tensor(y_train.values.astype(np.float32))
y_test = torch.tensor(y_test.values.astype(np.float32))

# Reshape the data to (N, T, F) format
X_train = torch.reshape(X_train, (-1, seq_len, X_train.size(1)))
X_test = torch.reshape(X_test, (-1, seq_len, X_test.size(1)))

# Reshape the targets to (N, 1) format
y_train = torch.reshape(y_train, (-1, 1))
y_test = torch.reshape(y_test, (-1, 1))

# Ensure that y_train and y_test have the same number of samples as X_train and X_test
y_train = y_train[:X_train.shape[0]]
y_test = y_test[:X_test.shape[0]]

# Reshape the targets to (N, 1) format again (in case it's needed)
y_train = torch.reshape(y_train, (-1, 1))
y_test = torch.reshape(y_test, (-1, 1))

# Create DataLoader objects for training and testing
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

test_dataset = TensorDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



# Define the LSTM model class
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        # Initialize the LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        # Initialize the linear layer
        self.fc = nn.Linear(hidden_size, output_size)
        # Sigmoid activation
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # Pass the input through the LSTM layer
        out, _ = self.lstm(x)
        # Take the last output of the LSTM layer
        out = out[:, -1, :]
        # Pass the output through the linear layer
        out = self.fc(out)
        # Apply sigmoid activation
        out = self.sigmoid(out)
        return out

# Create an instance of the model
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Define the loss function and the optimizer
criterion = nn.BCELoss()  # Use BCELoss for binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    # Shuffle the data
    X_tensor = X_train
    y_tensor = y_train
    perm = torch.randperm(X_tensor.size(0))  # Use a different variable name (e.g., X_tensor) for your data tensor
    X_tensor = X_tensor[perm]
    y_tensor = y_tensor[perm]
    # Loop over the mini-batches
    for i in range(0, X_tensor.size(0), batch_size):
        # Get the current batch
        X_batch = X_tensor[i:i+batch_size]
        y_batch = y_tensor[i:i+batch_size].float()  # Ensure labels are of type torch.float32
        # Zero the gradients
        optimizer.zero_grad()
        # Forward pass
        outputs = model(X_batch)
        # Compute the loss
        loss = criterion(outputs, y_batch)
        # Backward pass and update
        loss.backward()
        optimizer.step()
    # Print the loss for the epoch
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')



Epoch 1, Loss: 0.6871
Epoch 2, Loss: 0.6328
Epoch 3, Loss: 0.7277
Epoch 4, Loss: 0.7777
Epoch 5, Loss: 0.6232
Epoch 6, Loss: 0.6414
Epoch 7, Loss: 0.5844
Epoch 8, Loss: 0.7025
Epoch 9, Loss: 0.5811
Epoch 10, Loss: 0.6725
Epoch 11, Loss: 0.7521
Epoch 12, Loss: 0.6250
Epoch 13, Loss: 0.7651
Epoch 14, Loss: 0.6052
Epoch 15, Loss: 0.7253
Epoch 16, Loss: 0.4811
Epoch 17, Loss: 0.4748
Epoch 18, Loss: 0.2720
Epoch 19, Loss: 0.4053
Epoch 20, Loss: 0.4744


In [2]:
# Set the model to evaluation mode
model.eval()

# Lists to store predictions and true labels
all_predictions = []
all_labels = []

# Disable gradient computation during testing
with torch.no_grad():
    for i in range(0, X_test.size(0), batch_size):
        # Get the current batch
        X_batch = X_test[i:i + batch_size]
        y_batch = y_test[i:i + batch_size].float()  # Ensure labels are of type torch.float32

        # Forward pass
        outputs = model(X_batch)

        # Convert probabilities to binary predictions (0 or 1)
        predictions = (outputs > 0.5).float()

        # Append predictions and true labels to the lists
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(y_batch.cpu().numpy())

# Convert lists to NumPy arrays
all_predictions = np.array(all_predictions)
all_labels = np.array(all_labels)

# Calculate accuracy and other metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions)
recall = recall_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.5650
Precision: 0.4848
Recall: 0.1860
F1 Score: 0.2689


In [3]:
all_predictions

array([[0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],