In [None]:
"""
'Get rich quick' scheme, a 5-day prediction of FTSE/TWSE TW50 Index stock prices.

Caveats:

1. We used historical data from the beginning of 2020 (2020/01/02) to the 15th of March 2024
2. We will be predicting the price index
3. We write everything in Python
4. We compare several models and will only submit one result which we think is the best

Hard requirements:

1. The dataset, which is a FTSE/TWSE TW-50 index stock from 2020-01-02 to 2024-03-15
"""

In [12]:
# ===== Imports =====
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# ===== Data loading and plotting =====

dataset = pd.read_csv('dataset/FTSE TWSE Taiwan 50 Index.csv')

# Since we will be predicting the price index, we will be making a dataframe with the values we'd like to predict
# I am lazy, so we just drop the unused columns instead
df = dataset.drop(columns=['Total Return Index', 'Change', '%Change'])
df['Date'] = pd.to_datetime(df['Date'])

# Now, we just need to plot the dataset
fig = plt.subplots(figsize=(16, 5))
plt.plot(df['Date'], df['Price Index'])
plt.title('TW-50 Price Index', fontsize=20)
plt.xlabel('Date', fontsize=15)
plt.ylabel('Price', fontsize=15)
plt.xticks(rotation=30, fontsize=15)
plt.xlim(pd.Timestamp('2020-01-02'), pd.Timestamp('2024-03-15'))

In [6]:
# ===== Predicting stock prices using Transformers =====

import torch
import torch.nn as nn
import torch.nn.functional as F

class Transformer(nn.Module):
    
    """
    Complete transformer model for stock price prediction
    """
    
    def __init__(self,
                 attention_dim: int, 
                 attention_dropout: float,
                 attention_heads: int,
                 hidden_dim: int,
                 perceptron_dim: int,
                 perceptron_dropout: float):
        super().__init__()
        
        self.attention_heads = attention_heads
        self.attention_dim = attention_dim
        self.attention_dropout = attention_dropout
        self.hidden_dim = hidden_dim
        self.perceptron_dim = perceptron_dim
        self.perceptron_dropout = perceptron_dropout
        
        self.attention = nn.MultiheadAttention(num_heads=self.attention_heads, embed_dim=self.attention_dim, dropout=self.attention_dropout)
        self.norm1 = nn.LayerNorm(self.hidden_dim, elementwise_affine=False, eps=1e-6)
        self.norm2 = nn.LayerNorm(self.hidden_dim, elementwise_affine=False, eps=1e-6)
        
        self.perceptron = nn.Sequential(
            nn.Linear(self.perceptron_dim, self.hidden_dim),
            nn.ReLU(),
            nn.Dropout(self.perceptron_dropout),
            nn.LayerNorm(self.hidden_dim),
            nn.Linear(self.hidden_dim, self.perceptron_dim),
            nn.Dropout(self.perceptron_dropout)
        )
        
    def forward(self, x):
        
        query, key, value = self.norm1(x)
        
        x = x + self.attention(query, key, value)[0]
        x = x + self.perceptron(self.norm2(x))
        
        return x
        

# # ===== Attention tester =====
# attention_embedding_size = 128
# attention_heads = 2
# attention_dropout = 0.1
# attention_hidden_dim = 128
# perceptron_dim = 128
# perceptron_dropout = 0.1
# 
# x = torch.rand((3, 784, 128))
# 
# attention = Transformer(attention_embedding_size, attention_dropout, attention_heads, attention_hidden_dim, perceptron_dim, perceptron_dropout)
# context = attention(x)
# print("Context final shape: ", context.shape)

In [ ]:
import pytorch_lightning as pl

class Model(pl.LightningModule):
    def __init__(self, model_kwargs, lr):
        super().__init__()
        self.model = Transformer(**model_kwargs)
    
    def forward(self, x):
        return self.model(x)
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.hparams.lr)
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 100], gamma=0.1)
        return [optimizer], [lr_scheduler]
    
    def calculate_loss(self, batch, mode='train'):