In [9]:

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import *
from sklearn.decomposition import *
from sklearn.preprocessing import *
from sklearn.metrics import *
import torch
from torch import nn
from torch.utils.data import *
from torchvision import datasets
from torchvision.transforms import *


In [10]:


root_results_dir = "/Users/newuser/Projects/robust-algo-trader/data/trades_non_seq_EURUSD_H1_2007_2023.csv"
df = pd.read_csv(f"{root_results_dir}")

# take first 4096 rows
df = df.iloc[:6144]

y = df["label"]
X = df[["position", "RSI",  "ADX","ATR", "MFI", "CCI", "AROON_Oscillator"]]


In [11]:

df

Unnamed: 0,index,ask_price,take_profit_price,stop_loss_price,position,MACD,MACD_Signal,MACD_Hist,RSI,ATR,...,OBV,CCI,PSAR,AD,ADOSC,VOLUME_RSI,MFI,Date_Time,label,close_time
0,209,1.30870,1.31270,1.30620,1,-0.001955,-0.001962,0.000007,33.833860,0.001827,...,2987.0,-59.563728,1.310000,-3.518072e+03,-314.606376,43.118997,27.493133,2007.03.05 22:00:00,1.0,2007.03.06 22:00:00
1,210,1.30800,1.30400,1.31050,0,-0.001967,-0.001963,-0.000004,31.687411,0.001839,...,2728.0,-77.611586,1.307000,-3.751172e+03,-355.228376,46.286378,29.501138,2007.03.05 23:00:00,0.0,2007.03.06 03:00:00
2,211,1.30870,1.31270,1.30620,1,-0.001898,-0.001950,0.000052,36.056116,0.001765,...,2922.0,-84.009466,1.307058,-3.612600e+03,-295.462070,44.135909,31.102673,2007.03.06 00:00:00,1.0,2007.03.06 22:00:00
3,244,1.31130,1.30730,1.31380,0,0.000262,0.000342,-0.000080,46.186253,0.001341,...,4903.0,-138.344595,1.313834,-3.619358e+03,-94.962257,52.352590,46.481357,2007.03.07 09:00:00,0.0,2007.03.07 15:00:00
4,248,1.31360,1.31760,1.31110,1,0.000325,0.000297,0.000029,60.534359,0.001250,...,5550.0,101.577287,1.310900,-3.351958e+03,-2.545212,50.850305,47.517985,2007.03.07 13:00:00,1.0,2007.03.07 20:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6139,79746,1.11372,1.10972,1.11622,0,0.000490,0.000499,-0.000009,60.047462,0.000731,...,182072.0,35.979440,1.112450,-3.580794e+06,-115.251599,46.241589,66.276466,2020.01.14 04:00:00,0.0,2020.01.16 14:00:00
6140,79751,1.11420,1.11820,1.11170,1,0.000469,0.000467,0.000001,62.642451,0.000668,...,181739.0,137.361219,1.112864,-3.580140e+06,272.936806,55.702155,61.057179,2020.01.14 09:00:00,0.0,2020.01.14 15:00:00
6141,79752,1.11352,1.10952,1.11602,0,0.000415,0.000457,-0.000042,52.983970,0.000696,...,179524.0,4.487179,1.112937,-3.581561e+06,-85.789531,61.988794,46.851101,2020.01.14 10:00:00,0.0,2020.01.16 14:00:00
6142,79769,1.11296,1.11696,1.11046,1,-0.000051,-0.000058,0.000008,50.960844,0.000640,...,183223.0,55.729490,1.111221,-3.578669e+06,452.089005,44.509559,60.484772,2020.01.15 03:00:00,0.0,2020.01.17 15:00:00


In [12]:

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
split_index = int(len(X) * 0.6)  # 60% training, 40% testing
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# convert to tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.tensor(y_train.values.astype(np.float32))
y_test = torch.tensor(y_test.values.astype(np.float32))

batch_size = 64

# Create DataLoader objects for training and testing
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [13]:
X_train.size(), y_train.size(), X_test.size(), y_test.size()

(torch.Size([3686, 7]),
 torch.Size([3686]),
 torch.Size([2458, 7]),
 torch.Size([2458]))

In [14]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
device = "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(7 , 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

    
input_size = len(X.columns)
hidden_size = 64
output_size = 1
model = NeuralNetwork().to(device)


class PrecisionLoss(nn.Module):
    def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean'):
        super(PrecisionLoss, self).__init__()

        # You can customize additional parameters if needed
        self.loss_function = nn.BCELoss(weight, size_average, reduce, reduction)

    def forward(self, input, target, threshold=0.5):
        # Calculate the binary cross-entropy loss
        bce_loss = self.loss_function(input, target)

        # Convert probabilities to binary predictions using a threshold
        binary_predictions = (input > threshold).float()

        # Calculate precision
        true_positives = torch.sum(binary_predictions * target)
        false_positives = torch.sum(binary_predictions * (1 - target))

        # Calculate precision and add it as a part of the loss
        precision = true_positives / (true_positives + false_positives + 1e-10)
        precision_loss = 1 - precision  # Invert precision to minimize the loss

        # Combine BCE loss and precision loss
        total_loss = bce_loss + precision_loss

        return total_loss

# Example usage:
# criterion = PrecisionLoss()
# loss = criterion(output, targ



class CustomBCELoss(nn.Module):
    def __init__(self, weight=None, reduction='mean'):
        super(CustomBCELoss, self).__init__()
        self.weight = weight
        self.reduction = reduction

    def forward(self, input, target):
        # Compute the binary cross entropy loss
        bce_loss = nn.functional.binary_cross_entropy(input, target, reduction='none')
        # Compute the weight factor based on the input and target
        weight_factor = torch.abs(input - target) ** 2
        # Multiply the loss by the weight and the weight factor
        if self.weight is not None:
            weighted_loss = self.weight * weight_factor * bce_loss
        else:
            weighted_loss = weight_factor * bce_loss
        # Apply the reduction method
        if self.reduction == 'mean':
            return torch.mean(weighted_loss)
        elif self.reduction == 'sum':
            return torch.sum(weighted_loss)
        else:
            return weighted_loss


# loss_fn = CustomBCELoss(weight=3)
loss_fn = PrecisionLoss()
# loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=7, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=1, bias=True)
    (9): Sigmoid()
  )
)


In [23]:
# Define test function
def test(dataloader, model, criterion):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            label = y.float().view(-1, 1)
            # check the size of the prediction and label
            # print("pred")
            # print(pred.size())
            # print("label")
            # print(label.size())
            
            current_loss = criterion(pred, label).item()
            test_loss += current_loss
            correct += (pred > 0.5).eq(y.view_as(pred)).sum().item()
    test_loss /= num_batches
    # correct /= size
    print(f"Test Loss: {test_loss}")

# Train and test the model
# epochs = 500_000_000
# epochs = 50
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    model.train()
    loss_sum = 0
    correct = 0
    for batch, (X, y) in enumerate(train_dataloader):
        # Compute prediction error
        X, y = X.to(device), y.to(device)
        pred = model(X)
        # print(pred)
        label = y.float().view(-1, 1)
        loss = loss_fn(pred, label)

        optimizer.zero_grad()
        # Backpropagation
        loss.backward()
        optimizer.step()
        
        loss_sum += loss.item()
        
        # if batch % 100 == 0:
            # loss, current = loss.item(), (batch + 1) * len(X)
      
    print(f"Train Loss: {loss_sum / len(train_dataloader)}")
    
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
Train Loss: 0.9997085672000359
Test Loss: 1.3227008092097747
Epoch 2
-------------------------------
Train Loss: 0.9808186929801415
Test Loss: 1.3577794998120039
Epoch 3
-------------------------------
Train Loss: 0.9911651159154957
Test Loss: 1.3645966450373332
Epoch 4
-------------------------------
Train Loss: 0.9556875290541813
Test Loss: 1.3722648192674687
Epoch 5
-------------------------------
Train Loss: 0.9606499343082823
Test Loss: 1.3623818770433083
Epoch 6
-------------------------------
Train Loss: 0.9360506637343045
Test Loss: 1.3808652468216724
Epoch 7
-------------------------------
Train Loss: 0.9205967285509767
Test Loss: 1.3818442821502686
Epoch 8
-------------------------------
Train Loss: 0.9034152966121147
Test Loss: 1.40446448020446
Epoch 9
-------------------------------
Train Loss: 0.9258301288917147
Test Loss: 1.3918465987229958
Epoch 10
-------------------------------
Train Loss: 0.8841771904764504
Test Loss: 1.37385817

In [47]:
from sklearn.metrics import *

with torch.no_grad():
    pred = model(X_test)
    pred = pred.detach().numpy()
    pred = pred.reshape(-1)
    print(pred)
    pred = np.where(pred > 0.9, 1, 0)
    print(y_test)
    print(accuracy_score(y_test, pred))
    print(precision_score(y_test, pred))

[0.55401146 0.4761544  0.6006379  ... 0.4355289  0.74538505 0.07773386]
tensor([0., 1., 0.,  ..., 0., 0., 1.])
0.595606183889341
0.5555555555555556


In [48]:

# count where pred is 1
pred = pd.Series(pred)
pred.value_counts()

0    2449
1       9
Name: count, dtype: int64

In [None]:
import matplotlib.pyplot as plt

# Assuming you have a list to store train and test accuracies for each epoch
train_accuracies = []
test_accuracies = []

# Your training loop with accuracy calculations during each epoch
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_acc = train(train_dataloader, model, loss_fn, optimizer)
    test_acc = test(test_dataloader, model, loss_fn)
    
    # Append accuracies to the lists
    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)

# Plot the graph
epochs_range = range(1, epochs+1)
plt.plot(epochs_range, train_accuracies, label='Train Accuracy')
plt.plot(epochs_range, test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Train and Test Accuracy Over Epochs')
plt.legend()
plt.show()



In [None]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))


# test_dataloader
# calculate the accuracy over the whole test set
