In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.cluster import KMeans
from datetime import datetime
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
FILE_PATH = "./Dataset/EURUSD/EURUSD_M30_with_features+label3.csv"
# FILE_PATH = "./Dataset/EURUSD/EURUSD_M30_features+label_v.2.1.csv"
data = pd.read_csv(FILE_PATH)

In [3]:
# Select features and labels
features = ['SMA100', 'RSI14', 'Close']
# features = ['Open', 'High', 'Low', 'Close', 'Volume', 'SMA200', 'SMA100', 'SMA50', 'SMA13', 'RSI14']
# features = ['Close', 'SMA200', 'SMA50', 'RSI14']
labels = 'signal'
X = data[features].values
y = data[labels].values

# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Convert labels to integers
num_classes = len(set(y))
y = torch.tensor(y, dtype=torch.long)

In [4]:
num_classes

4

In [5]:
# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=13)

In [6]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

  y_train_tensor = torch.tensor(y_train, dtype=torch.long)
  y_val_tensor = torch.tensor(y_val, dtype=torch.long)


In [7]:
seq_length = 15
num_layers = 2
num_heads = 4

In [8]:
def create_sequences(data, labels, seq_length):
    X_seq, y_seq = [], []
    for i in range(len(data) - seq_length + 1):
        X_seq.append(data[i:i + seq_length])
        y_seq.append(labels[i + seq_length - 1])  # Directly append the label without .item()
    return torch.stack(X_seq), torch.tensor(y_seq, dtype=torch.long)

X_train_seq, y_train_seq = create_sequences(X_train_tensor, y_train_tensor, seq_length)
X_val_seq, y_val_seq = create_sequences(X_val_tensor, y_val_tensor, seq_length)

In [9]:
# Positional Encoding class for the Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(1)]

In [10]:
class ForexTransformer(nn.Module):
    def __init__(self, input_dim, seq_length, num_heads, num_layers, output_dim):
        super(ForexTransformer, self).__init__()
        self.embed_dim = 128  # Increased embedding dimension
        self.embedding = nn.Linear(input_dim, self.embed_dim)
        self.pos_encoder = PositionalEncoding(self.embed_dim, max_len=seq_length)
        encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=num_heads)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.dropout = nn.Dropout(0.4)  # Increased dropout rate
        self.fc = nn.Linear(self.embed_dim * seq_length, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = self.transformer(x)
        x = self.dropout(x)  # Apply dropout after transformer
        x = x.flatten(start_dim=1)  # Flatten for the fully connected layer
        x = self.fc(x)
        return x

In [11]:
X_train_seq.shape

torch.Size([159906, 15, 3])

In [12]:
# Initialize model, loss, and optimizer
model = ForexTransformer(input_dim=X_train_seq.shape[2], seq_length=seq_length, num_heads=num_heads, num_layers=num_layers, output_dim=num_classes).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)



In [13]:
# MODEL_PATH = "D:/Programing/AI Trader/Model/transformerModel+dropout_Labeled_layers-heads 4-8_Val-Loss 0.8701, Val-Accuracy 0.5773_at 20241216-054736.model"
# model.load_state_dict(torch.load(MODEL_PATH))
# model.eval()
# print()
# print(f"Model: {MODEL_PATH.split('/')[-1]} is loaded.")
# print()

In [14]:
# Training loop
def train_model(model, X_train_seq, y_train_seq, X_val_seq, y_val_seq, epochs=100, batch_size=1024):
    train_dataset = torch.utils.data.TensorDataset(X_train_seq, y_train_seq)
    val_dataset = torch.utils.data.TensorDataset(X_val_seq, y_val_seq)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

    for epoch in tqdm(range(epochs), desc="Training Epochs"):
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to('cuda'), batch_y.to('cuda')
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to('cuda'), batch_y.to('cuda')
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        val_accuracy = correct / total
        
        
        
        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        
        print(f"Train-Loss: {train_loss:.4f} ; Val-Loss: {val_loss:.4f} ; Val-Accuracy: {val_accuracy:.4f} ; @ {timestamp}\n")
        
        MODEL_SAVEPATH = f"./Model/transformerModelv.3.0_layers-heads {num_layers}-{num_heads}_Val-Loss {val_loss:.4f}, Val-Accuracy {val_accuracy:.4f}_at {timestamp}.model"
        
        torch.save(model.state_dict(), MODEL_SAVEPATH)

train_model(model, X_train_seq, y_train_seq, X_val_seq, y_val_seq)


Training Epochs:   1%|          | 1/100 [00:26<43:21, 26.28s/it]

Train-Loss: 1.1270 ; Val-Loss: 1.0568 ; Val-Accuracy: 0.4462 ; @ 20241222-015224



Training Epochs:   2%|▏         | 2/100 [00:52<42:44, 26.17s/it]

Train-Loss: 1.0608 ; Val-Loss: 1.0474 ; Val-Accuracy: 0.4559 ; @ 20241222-015250



Training Epochs:   3%|▎         | 3/100 [01:18<42:16, 26.15s/it]

Train-Loss: 1.0556 ; Val-Loss: 1.0487 ; Val-Accuracy: 0.4511 ; @ 20241222-015316



Training Epochs:   4%|▍         | 4/100 [01:44<41:54, 26.19s/it]

Train-Loss: 1.0475 ; Val-Loss: 1.0379 ; Val-Accuracy: 0.4518 ; @ 20241222-015343



Training Epochs:   5%|▌         | 5/100 [02:11<41:34, 26.25s/it]

Train-Loss: 1.0316 ; Val-Loss: 1.0154 ; Val-Accuracy: 0.4905 ; @ 20241222-015409

