In [1]:
import pandas as pd
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pickle
from datetime import datetime
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import f1_score


In [2]:
# Parameters
FILE_PATH = './Dataset/EURUSD/EURUSD_M30_features+label_v.2.1.csv'
COLUMNS = ['Close', 'SMA200', 'SMA50', 'RSI14']
LABEL = 'signal'
seq_length = 20
batch_size = 1024
epochs = 100
dropout = 0.4
learning_rate = 0.01

input_size = 4  # ['Close', 'SMA200', 'SMA50', 'RSI14']
hidden_size = 128
middle_size = 64
num_classes = 3  # [buy, sell, nothing]

device = torch.device('cpu' if torch.cuda.is_available() else 'cpu')

In [3]:
# Load data
data = pd.read_csv(FILE_PATH)

In [4]:
# Data preparation
data = data[['Close', 'SMA200', 'SMA50', 'RSI14', 'signal']]
data = data.dropna()  # Drop missing values

In [5]:
scaler = MinMaxScaler()
data[['Close', 'SMA200', 'SMA50', 'RSI14']] = scaler.fit_transform(data[['Close', 'SMA200', 'SMA50', 'RSI14']])
data = data.round(4)

In [6]:
data['signal'] = data['signal'] - 1  # Convert labels: 1 -> 0 (buy), 2 -> 1 (sell), 3 -> 2 (nothing)

In [7]:
# Dataset and DataLoader
class ForexDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        x = self.data.iloc[idx:idx + self.seq_length, :-1].values
        y = self.data.iloc[idx + self.seq_length, -1]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

In [8]:
dataset = ForexDataset(data, seq_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [9]:
# Model definition
class ForexModel(nn.Module):
    def __init__(self, input_size, hidden_size, middle_size, num_classes, dropout):
        super(ForexModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.gelu = nn.GELU()
        self.middle_layer = nn.Linear(hidden_size, middle_size)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(middle_size, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        hn = self.gelu(hn[-1])
        hn = self.middle_layer(hn)
        hn = self.dropout(hn)
        out = self.fc(hn)
        out = self.softmax(out)
        return out

In [10]:
# Evaluate the model and collect predictions and true labels
def evaluate_model(model, dataloader, device):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for x_batch, y_batch in dataloader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(y_batch.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    return y_true, y_pred

In [11]:
model = ForexModel(input_size, hidden_size, middle_size, num_classes, dropout).to(device)

In [12]:
# MODEL_PATH = "D:/Programing/AI Trader/Model/lstmModelv.1.0/Model v.1.0_loss 62.9028_Acc 0.8692_at 20241224-051604.model"

# model.load_state_dict(torch.load(MODEL_PATH, weights_only=False))

# # model.eval()

# print(f"{MODEL_PATH.split('/')[-1]} is loaded.")

In [13]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Training loop
for epoch in tqdm(range(epochs), desc="Training Epochs"):
    total_loss = 0
    correct = 0
    total = 0
    model.train()
    for x_batch, y_batch in dataloader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

    accuracy = correct / total
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    
    MODEL_SAVEPATH = f"D:/Programing/AI Trader/Model/lstmModelv.1.1/Model v.1.1_loss {total_loss:.4f}_Acc {accuracy:.4f}_at {timestamp}.model"
    torch.save(model.state_dict(), MODEL_SAVEPATH)
    
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"Epoch {epoch + 1}/{epochs}; Loss: {total_loss:.4f} , Accuracy: {accuracy:.4f} | @ {timestamp}")


Training Epochs:   1%|          | 1/100 [18:19<30:14:50, 1099.90s/it]

Epoch 1/100; Loss: 199.2515 , Accuracy: 0.5125 | @ 2024-12-26 23:37:49


In [None]:
device

In [None]:
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

MODEL_SAVEPATH = f"D:/Programing/AI Trader/Model/lstmModelv.1.0/Model v.1.0_loss {total_loss:.4f}_Acc {accuracy:.4f}_at {timestamp}.model"

print(f"{MODEL_SAVEPATH.split('/')[-1]} was saved.")

torch.save(model.state_dict(), MODEL_SAVEPATH)

In [None]:
# Evaluate on the same dataset
y_true, y_pred = evaluate_model(model, dataloader, device)

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Buy (0)', 'Sell (1)', 'Nothing (2)'])

# Plot confusion matrix
plt.figure(figsize=(8, 6))
disp.plot(cmap='Greens', values_format='d')
plt.title("Confusion Matrix")
plt.show()

# image_path = "./Result/cm-01.png"
# plt.savefig(image_path)

In [None]:
# Calculate F1 score
f1 = f1_score(y_true, y_pred, average='weighted')
print(f"F1 Score: {f1:.4f}")

In [None]:
# Calculate F1 score
f1 = f1_score(y_true, y_pred, average='weighted')
print(f"F1 Score: {f1:.4f}")