In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.cluster import KMeans
from datetime import datetime
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [30]:
FILE_PATH = "./Dataset/EURUSD/EURUSD_M30_features+label_v.2.1.csv"
# FILE_PATH = "./Dataset/EURUSD/EURUSD_M30_with_features+label3.csv"
data = pd.read_csv(FILE_PATH)

In [31]:
# Select features and labels
# features = ['SMA200', 'SMA100', 'SMA50', 'SMA13', 'RSI14', 'Close']
features = ['SMA100', 'RSI14', 'Close']
labels = 'signal'
X = data[features].values
y = data[labels].values

# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
# X_scaled = X


# Convert labels to integers
num_classes = len(set(y))
y = torch.tensor(y, dtype=torch.long)

In [32]:
# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.25, random_state=13)

In [33]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

  y_train_tensor = torch.tensor(y_train, dtype=torch.long)
  y_val_tensor = torch.tensor(y_val, dtype=torch.long)


In [34]:
seq_length = 10
num_layers = 4
num_heads = 8

In [35]:
def create_sequences(data, labels, seq_length):
    X_seq, y_seq = [], []
    for i in range(len(data) - seq_length + 1):
        X_seq.append(data[i:i + seq_length])
        y_seq.append(labels[i + seq_length - 1])  # Directly append the label without .item()
    return torch.stack(X_seq), torch.tensor(y_seq, dtype=torch.long)

X_train_seq, y_train_seq = create_sequences(X_train_tensor, y_train_tensor, seq_length)
X_val_seq, y_val_seq = create_sequences(X_val_tensor, y_val_tensor, seq_length)

In [36]:
# Positional Encoding class for the Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(1)]

In [37]:
class ForexTransformer(nn.Module):
    def __init__(self, input_dim, seq_length, num_heads, num_layers, output_dim):
        super(ForexTransformer, self).__init__()
        self.embed_dim = 128  # Increased embedding dimension
        self.embedding = nn.Linear(input_dim, self.embed_dim)
        self.pos_encoder = PositionalEncoding(self.embed_dim, max_len=seq_length)
        encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=num_heads)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.dropout = nn.Dropout(0.3)  # Increased dropout rate
        
        self.fc = nn.Linear(self.embed_dim * seq_length, output_dim)
        
        self.fc1 = nn.Linear(self.embed_dim * seq_length, 128)  # First fully connected layer
        self.fc2 = nn.Linear(128, output_dim)  # Second fully connected layer

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = self.transformer(x)
        x = self.dropout(x)  # Apply dropout after transformer
        x = x.flatten(start_dim=1)  # Flatten for the fully connected layer
        
        x = self.fc(x)
        
        # x = self.fc1(x)  # First fully connected layer
        # x = torch.relu(x)  # Activation function
        # x = self.fc2(x)  # Second fully connected layer
        
        return x

In [38]:
class ForexTransformerWithDecoder(nn.Module):
    def __init__(self, input_dim, seq_length, num_heads, num_layers, output_dim):
        super(ForexTransformerWithDecoder, self).__init__()
        self.embed_dim = 128  # Increased embedding dimension
        self.embedding = nn.Linear(input_dim, self.embed_dim)
        self.pos_encoder = PositionalEncoding(self.embed_dim, max_len=seq_length)
        
        # Encoder layers
        encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=num_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # Decoder layers
        decoder_layer = nn.TransformerDecoderLayer(d_model=self.embed_dim, nhead=num_heads)
        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)
        
        self.batch_norm = nn.BatchNorm1d(self.embed_dim * seq_length)
        
        self.dropout = nn.Dropout(0.3)  # Dropout for regularization
        self.fc = nn.Linear(self.embed_dim * seq_length, output_dim)  
        self.fc1 = nn.Linear(self.embed_dim * seq_length, 128)  # First fully connected layer
        self.fc2 = nn.Linear(128, output_dim)  # Second fully connected layer

    def forward(self, x):
        x = self.embedding(x)  # Input embedding
        x = self.pos_encoder(x)  # Add positional encoding
        
        # Encoder output
        memory = self.transformer_encoder(x)
        
        # Decoder input (using encoder output as memory)
        decoder_input = torch.zeros_like(x)  # Start with zeros or input as needed
        output = self.transformer_decoder(decoder_input, memory)
        
        output = self.dropout(output)  # Apply dropout
        output = output.flatten(start_dim=1)  # Flatten for the fully connected layer
        
        output = self.fc(output)  # Alone Fully Connected Layer
        
        # output = self.fc1(output)  # First fully connected layer
        # output = torch.relu(output)  # Activation function
        # output = self.fc2(output)  # Second fully connected layer
        
        return output


In [39]:
X_train_seq.shape

torch.Size([149841, 10, 3])

In [40]:
num_classes

3

In [41]:
# Initialize model, loss, and optimizer
model = ForexTransformerWithDecoder(input_dim=X_train_seq.shape[2], seq_length=seq_length, num_heads=num_heads, num_layers=num_layers, output_dim=num_classes).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)



RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [14]:
# # Initialize model, loss, and optimizer
# model = ForexTransformer(input_dim=X_train_seq.shape[2], seq_length=seq_length, num_heads=num_heads, num_layers=num_layers, output_dim=num_classes).to('cuda')
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.005)

In [15]:
learning_rates = [0.01, 0.001, 0.0001, 0.00001]

In [16]:
# MODEL_PATH = "D:/Programing/AI Trader/Model/transformerModelv.3.0/Modelv.3.0_layers-heads 4-8_Val-Loss 0.8897, Val-Accuracy 0.5899_at 20241219-062117.save"
# model.load_state_dict(torch.load(MODEL_PATH))
# model.eval()
# print()
# print(f"Model: {MODEL_PATH.split('/')[-1]} is loaded.")
# print()

In [14]:
# Training loop
def train_model(model, X_train_seq, y_train_seq, X_val_seq, y_val_seq, epochs=100, batch_size=1024):
    train_dataset = torch.utils.data.TensorDataset(X_train_seq, y_train_seq)
    val_dataset = torch.utils.data.TensorDataset(X_val_seq, y_val_seq)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

    for epoch in tqdm(range(epochs), desc="Training Epochs"):
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to('cuda'), batch_y.to('cuda')
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to('cuda'), batch_y.to('cuda')
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        val_accuracy = correct / total
        
        
        
        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        
        print(f"Train-Loss: {train_loss:.3f} ; Val-Loss: {val_loss:.3f} ; Val-Accuracy: {val_accuracy:.3f} ; @ {timestamp}\n")
        
        MODEL_SAVEPATH = f"./Model/transformerModelv.3.0/Modelv.3.1_layers-heads {num_layers}-{num_heads}_Val-Loss {val_loss:.3f}, Val-Accuracy {val_accuracy:.3f}_at {timestamp}.save"
        
        torch.save(model.state_dict(), MODEL_SAVEPATH)

In [42]:
train_model(model, X_train_seq, y_train_seq, X_val_seq, y_val_seq)

Training Epochs:   0%|          | 0/100 [00:00<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# for learning_rate in learning_rates:

#     # Initialize model, loss, and optimizer
#     model = ForexTransformer(input_dim=X_train_seq.shape[2], seq_length=seq_length, num_heads=num_heads, num_layers=num_layers, output_dim=num_classes).to('cuda')
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#     train_model(model, X_train_seq, y_train_seq, X_val_seq, y_val_seq)

In [None]:
print(data['signal'].value_counts())


In [None]:
print("Sample Input:", X_train_seq[:5])
print("Sample Label:", y_train_seq[:5])


In [None]:
print(data[['SMA200', 'SMA100', 'SMA50', 'SMA13', 'RSI14', 'Close']].describe())
