In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, num_layers, hidden_dim, n_heads, dropout_prob):
        super(TransformerModel, self).__init__()
        self.transformer_encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=n_heads)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer, num_layers=num_layers)
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, src):
        src = self.transformer_encoder(src)
        output = self.linear(src)
        return output

# Preprocess data
def preprocess_data(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

In [None]:
# Load data
# Assuming you have stock market data loaded into a pandas DataFrame with 'Close' prices
# Replace 'your_data.csv' with your actual data file
data = pd.read_csv('your_data.csv')
data = data['Close'].values.reshape(-1, 1)  # Reshape data to 2D array
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data)

# Hyperparameters
input_dim = 1  # Dimensionality of input sequence
output_dim = 1  # Dimensionality of output sequence
seq_length = 30  # Length of input sequence
num_layers = 2  # Number of transformer layers
hidden_dim = 64  # Hidden dimension of transformer model
n_heads = 4  # Number of attention heads
dropout_prob = 0.1  # Dropout probability

# Preprocess data
X, y = preprocess_data(data_normalized, seq_length)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Convert data to PyTorch tensors
X_train_tensor = torch.Tensor(X_train).unsqueeze(-1)  # Add input channel dimension
y_train_tensor = torch.Tensor(y_train)
X_test_tensor = torch.Tensor(X_test).unsqueeze(-1)  # Add input channel dimension
y_test_tensor = torch.Tensor(y_test)

# Instantiate the model
model = TransformerModel(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
                         hidden_dim=hidden_dim, n_heads=n_heads, dropout_prob=dropout_prob)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output.squeeze(), y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluation
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)

# Inverse transform predictions and actual values
y_pred = scaler.inverse_transform(y_pred.squeeze().numpy())
y_test_unscaled = scaler.inverse_transform(y_test)

# Calculate evaluation metrics
# For example, you can use mean absolute error (MAE) or mean squared error (MSE)
mae = np.mean(np.abs(y_pred - y_test_unscaled))
mse = np.mean((y_pred - y_test_unscaled) ** 2)
print(f'MAE: {mae:.2f}, MSE: {mse:.2f}')