<a href="https://colab.research.google.com/github/ManjuRama/FinMath/blob/main/TImeSeries_TFR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error

# Set seeds for reproducibility
SEED = 100
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)

# Define constants
embedding_dim = 16  # embedding dimension for ticker
num_heads = 4  # Number of attention heads
ff_dim = 256  # Hidden layer size in feed forward network inside transformer
num_tickers = len(df['Ticker_encoded'].unique())  # Number of unique tickers

# Define the Transformer model in PyTorch
class TransformerModel(nn.Module):
    def __init__(self, num_tickers, embedding_dim, num_heads, ff_dim):
        super(TransformerModel, self).__init__()

        # Embedding for ticker
        self.ticker_embedding = nn.Embedding(num_tickers, embedding_dim)

        # Multi-Head Attention layer
        self.multihead_attn = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads, batch_first=True)

        # Feed Forward Network
        self.ffn = nn.Sequential(
            nn.Linear(embedding_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embedding_dim)
        )

        # Layer normalization
        self.layer_norm1 = nn.LayerNorm(embedding_dim)
        self.layer_norm2 = nn.LayerNorm(embedding_dim)

        # Final output layer for regression ('Adj. Close')
        self.output_layer = nn.Linear(embedding_dim, 1)

    def forward(self, inputs):
        # Unpack inputs
        (ticker_input, volume_input, eps_input, pe_ratio_input, revenue_growth_input, operating_margin_input,
         total_equity_input, debt_levels_input, roe_input, net_cash_input, unemployment_rate_input,
         gdp_input, month_sin_input, month_cos_input, day_of_week_sin_input, day_of_week_cos_input,
         day_of_year_sin_input, day_of_year_cos_input) = inputs

        # Embedding for ticker and flatten
        ticker_embedded = self.ticker_embedding(ticker_input).squeeze(1)  # Shape: (batch_size, embedding_dim)

        # Concatenate all inputs
        combined_input = torch.cat([ticker_embedded, volume_input, eps_input, pe_ratio_input, revenue_growth_input,
                                    operating_margin_input, total_equity_input, debt_levels_input, roe_input,
                                    net_cash_input, unemployment_rate_input, gdp_input, month_sin_input,
                                    month_cos_input, day_of_week_sin_input, day_of_week_cos_input,
                                    day_of_year_sin_input, day_of_year_cos_input], dim=1)

        # Reshape for attention (batch_size, seq_len=1, embedding_dim)
        combined_input = combined_input.unsqueeze(1)

        # Multi-head attention
        attn_output, _ = self.multihead_attn(combined_input, combined_input, combined_input)

        # Add & normalize
        attn_output = self.layer_norm1(attn_output + combined_input)

        # Feed-forward network
        ffn_output = self.ffn(attn_output)

        # Add & normalize
        transformer_output = self.layer_norm2(ffn_output + attn_output)

        # Flatten and pass through the output layer
        transformer_output_flat = transformer_output.squeeze(1)
        output = self.output_layer(transformer_output_flat)

        return output

# Instantiate the model
model = TransformerModel(num_tickers=num_tickers, embedding_dim=embedding_dim, num_heads=num_heads, ff_dim=ff_dim)

# Define the optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Dataset preparation
X_train = {
    'ticker_input': torch.tensor(df_train['Ticker_encoded'].values).long().unsqueeze(1),
    'volume_input': torch.tensor(df_train['Volume'].values).float().unsqueeze(1),
    'eps_input': torch.tensor(df_train['EPS'].values).float().unsqueeze(1),
    'pe_ratio_input': torch.tensor(df_train['PE_Ratio'].values).float().unsqueeze(1),
    'revenue_growth_input': torch.tensor(df_train['Revenue Growth'].values).float().unsqueeze(1),
    'operating_margin_input': torch.tensor(df_train['Operating Margin'].values).float().unsqueeze(1),
    'total_equity_input': torch.tensor(df_train['Total Equity'].values).float().unsqueeze(1),
    'debt_levels_input': torch.tensor(df_train['Debt Levels'].values).float().unsqueeze(1),
    'roe_input': torch.tensor(df_train['ROE'].values).float().unsqueeze(1),
    'net_cash_input': torch.tensor(df_train['Net Cash from Operating Activities'].values).float().unsqueeze(1),
    'unemployment_rate_input': torch.tensor(df_train['Unemployment Rate'].values).float().unsqueeze(1),
    'gdp_input': torch.tensor(df_train['GDP'].values).float().unsqueeze(1),
    'month_sin_input': torch.tensor(df_train['Month_sin'].values).float().unsqueeze(1),
    'month_cos_input': torch.tensor(df_train['Month_cos'].values).float().unsqueeze(1),
    'day_of_week_sin_input': torch.tensor(df_train['Day_of_Week_sin'].values).float().unsqueeze(1),
    'day_of_week_cos_input': torch.tensor(df_train['Day_of_Week_cos'].values).float().unsqueeze(1),
    'day_of_year_sin_input': torch.tensor(df_train['Day_of_Year_sin'].values).float().unsqueeze(1),
    'day_of_year_cos_input': torch.tensor(df_train['Day_of_Year_cos'].values).float().unsqueeze(1),
}

y_train = torch.tensor(df_train['Adj. Close'].values).float().unsqueeze(1)

# Create DataLoader
train_dataset = TensorDataset(torch.cat([X_train[key] for key in X_train], dim=1), y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Training loop
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, targets in train_loader:
            # Forward pass
            optimizer.zero_grad()
            inputs_split = torch.split(inputs, 1, dim=1)  # Split to match the input format
            outputs = model(inputs_split)
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

# Evaluate on test set
X_test = {
    'ticker_input': torch.tensor(df_test['Ticker_encoded'].values).long().unsqueeze(1),
    'volume_input': torch.tensor(df_test['Volume'].values).float().unsqueeze(1),
    'eps_input': torch.tensor(df_test['EPS'].values).float().unsqueeze(1),
    'pe_ratio_input': torch.tensor(df_test['PE_Ratio'].values).float().unsqueeze(1),
    'revenue_growth_input': torch.tensor(df_test['Revenue Growth'].values).float().unsqueeze(1),
    'operating_margin_input': torch.tensor(df_test['Operating Margin'].values).float().unsqueeze(1),
    'total_equity_input': torch.tensor(df_test['Total Equity'].values).float().unsqueeze(1),
    'debt_levels_input': torch.tensor(df_test['Debt Levels'].values).float().unsqueeze(1),
    'roe_input': torch.tensor(df_test['ROE'].values).float().unsqueeze(1),
    'net_cash_input': torch.tensor(df_test['Net Cash from Operating Activities'].values).float().unsqueeze(1),
    'unemployment_rate_input': torch.tensor(df_test['Unemployment Rate'].values).float().unsqueeze(1),
    'gdp_input': torch.tensor(df_test['GDP'].values).float().unsqueeze(1),
    'month_sin_input': torch.tensor(df_test['Month_sin'].values).float().unsqueeze(1),
    'month_cos_input': torch.tensor(df_test['Month_cos'].values).float().unsqueeze(1),
    'day_of_week_sin_input': torch.tensor(df_test['Day_of_Week_sin'].values).float().unsqueeze(1),
    'day_of_week_cos_input': torch.tensor(df_test['Day_of_Week_cos'].values).float().unsqueeze(1),
    'day_of_year_sin_input': torch.tensor(df_test['Day_of_Year_sin'].values).float().unsqueeze(1),
    'day_of_year_cos_input': torch.tensor(df_test['Day_of_Year_cos'].values).float().unsqueeze(1),
}

y_test = torch.tensor(df_test['Adj. Close'].values).float().unsqueeze(1)

# Predict
model.eval()
with torch.no_grad():
    X_test_tensor = torch.cat([X_test[key] for key in X_test], dim=1)
    X_test_split = torch.split(X_test_tensor, 1, dim=1)
    y_pred = model(X_test_split)

# Calculate MSE
mse = mean_squared_error(y_test.numpy(), y_pred.numpy())
print("Test Loss (MSE):", mse)
