In [1]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pickle

In [2]:
# Parameters
FILE_PATH = './Dataset/EURUSD/EURUSD_M30_features+label_v.2.1.csv'
COLUMNS = ['Close', 'SMA200', 'SMA13', 'RSI14']
LABEL = 'signal'
SEQ_LENGTH = 20
NUM_LAYERS = 4
NUM_HEADS = 4
BATCH_SIZE = 1024
EPOCHS = 50
DROPOUT = 0.4
LEARNING_RATE = 0.01
RANDOM_STATE = 13
TRAIN_SHUFFLE = True
TEST_SHUFFLE = True
TRAIN_TEST_RATIO = 0.2
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Load and preprocess data
data = pd.read_csv(FILE_PATH)
data = data[COLUMNS + [LABEL]]
data[COLUMNS] = data[COLUMNS].round(4)

In [4]:
# Normalize features
scaler = MinMaxScaler()
data[COLUMNS] = scaler.fit_transform(data[COLUMNS])

In [5]:
# Adjust labels
data[LABEL] = data[LABEL] - 1

In [6]:
# Create sequences
class ForexDataset(Dataset):
    def __init__(self, data, seq_length):
        self.features = []
        self.labels = []
        for i in range(len(data) - seq_length):
            self.features.append(data.iloc[i:i + seq_length][COLUMNS].values)
            self.labels.append(data.iloc[i + seq_length][LABEL])
        self.features = torch.tensor(self.features, dtype=torch.float32)
        self.labels = torch.tensor(self.labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]


In [7]:
# Split data
train_data, test_data = train_test_split(data, test_size=TRAIN_TEST_RATIO, random_state=RANDOM_STATE)

In [8]:
train_dataset = ForexDataset(train_data, SEQ_LENGTH)
test_dataset = ForexDataset(test_data, SEQ_LENGTH)

  self.features = torch.tensor(self.features, dtype=torch.float32)


In [9]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=TRAIN_SHUFFLE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=TEST_SHUFFLE)

In [10]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return x

In [11]:
class EnhancedTransformerModel(nn.Module):
    def __init__(self, input_dim, num_classes, seq_length, num_layers, num_heads, dropout):
        super(EnhancedTransformerModel, self).__init__()
        self.input_dim = input_dim
        self.seq_length = seq_length
        
        # Positional Encoding
        self.positional_encoding = PositionalEncoding(input_dim)
        
        # Transformer layers
        self.attention_layers = nn.ModuleList([
            nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, dropout=dropout, batch_first=True)
            for _ in range(num_layers)
        ])
        
        # Fully Connected layers
        self.fc1 = nn.Linear(seq_length * input_dim, 128)
        self.fc2 = nn.Linear(128, num_classes)
        
        # Normalization and Dropout
        self.norm = nn.LayerNorm(input_dim)
        self.dropout = nn.Dropout(dropout)
        self.activation = nn.ReLU()

    def forward(self, x):
        # Positional Encoding
        x = self.positional_encoding(x)
        
        # Multi-head Attention with Residual Connections
        for attention_layer in self.attention_layers:
            residual = x
            x = attention_layer(x)
            x = x + residual  # Residual connection
            x = self.norm(x)  # Layer Normalization
        
        # Flatten and pass through Fully Connected layers
        x = x.flatten(start_dim=1)
        x = self.dropout(self.activation(self.fc1(x)))
        return self.fc2(x)


In [12]:
class ForexTransformer(nn.Module): 
    def __init__(self, input_dim, seq_length, num_heads, num_layers, output_dim):
        super(ForexTransformer, self).__init__()
        self.embed_dim = 128  # Increased embedding dimension
        self.embedding = nn.Linear(input_dim, self.embed_dim)
        self.pos_encoder = PositionalEncoding(self.embed_dim, max_len=seq_length)
        encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=num_heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.dropout = nn.Dropout(0.4)  # Increased dropout rate
        self.fc = nn.Linear(self.embed_dim * seq_length, output_dim)

    def forward(self, x):
        x = self.embedding(x)  # Linear embedding
        x = self.pos_encoder(x)  # Add positional encoding
        x = self.transformer(x)  # Transformer encoder
        x = self.dropout(x)  # Apply dropout
        x = x.flatten(start_dim=1)  # Flatten for fully connected layer
        x = self.fc(x)  # Final fully connected layer
        return x

In [13]:


# Instantiate the model with the updated architecture
model = ForexTransformer(
    input_dim=len(COLUMNS), 
    seq_length=SEQ_LENGTH, 
    num_heads=NUM_HEADS, 
    num_layers=NUM_LAYERS, 
    output_dim=3  # Number of output classes
).to(DEVICE)

# Update loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)



In [17]:
# Loss and optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=LEARNING_RATE, steps_per_epoch=len(train_loader), epochs=EPOCHS)
class_counts = data[LABEL].value_counts().to_dict()
total_samples = sum(class_counts.values())
class_weights = {cls: total_samples / count for cls, count in class_counts.items()}
weights = torch.tensor([class_weights[i] for i in range(3)], dtype=torch.float32).to(DEVICE)
criterion = nn.CrossEntropyLoss(weight=weights)

In [None]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.zeros_(m.bias)

model.apply(init_weights)

In [None]:
# Updated training loop
for epoch in range(EPOCHS):
    model.train()
    train_loss, train_preds, train_labels = 0, [], []
    for features, labels in train_loader:
        features, labels = features.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
        train_labels.extend(labels.cpu().numpy())

    train_acc = accuracy_score(train_labels, train_preds)
    train_f1 = f1_score(train_labels, train_preds, average='weighted')

    model.eval()
    test_preds, test_labels = [], []
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(DEVICE), labels.to(DEVICE)
            outputs = model(features)
            test_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
            test_labels.extend(labels.cpu().numpy())

    test_acc = accuracy_score(test_labels, test_preds)
    test_f1 = f1_score(test_labels, test_preds, average='weighted')

    print(f"Epoch {epoch + 1}/{EPOCHS}, "
          f"Train Loss: {train_loss:.4f}, "
          f"Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}, "
          f"Test Acc: {test_acc:.4f}, Test F1: {test_f1:.4f}")


Epoch 1/50, Train Loss: 284.3134, Train Acc: 0.4407, Train F1: 0.3835, Test Acc: 0.4997, Test F1: 0.3330
Epoch 2/50, Train Loss: 167.0966, Train Acc: 0.4863, Train F1: 0.3549, Test Acc: 0.2550, Test F1: 0.1036
Epoch 3/50, Train Loss: 166.6782, Train Acc: 0.4865, Train F1: 0.3552, Test Acc: 0.4997, Test F1: 0.3330
Epoch 4/50, Train Loss: 164.6240, Train Acc: 0.4999, Train F1: 0.3380, Test Acc: 0.4997, Test F1: 0.3330
Epoch 5/50, Train Loss: 163.7677, Train Acc: 0.5020, Train F1: 0.3356, Test Acc: 0.4997, Test F1: 0.3330
Epoch 6/50, Train Loss: 163.5504, Train Acc: 0.5020, Train F1: 0.3356, Test Acc: 0.4997, Test F1: 0.3330
Epoch 7/50, Train Loss: 163.3975, Train Acc: 0.5020, Train F1: 0.3356, Test Acc: 0.4997, Test F1: 0.3330
Epoch 8/50, Train Loss: 163.2229, Train Acc: 0.5020, Train F1: 0.3356, Test Acc: 0.4997, Test F1: 0.3330
Epoch 9/50, Train Loss: 163.4268, Train Acc: 0.5020, Train F1: 0.3356, Test Acc: 0.4997, Test F1: 0.3330
Epoch 10/50, Train Loss: 163.2729, Train Acc: 0.5020, T