Implement a transformer architecture and improve and analyze it's arous aspects rather than focus on a particular paper

Use percentage changes as data source instead of prices

If we make a predictor which just guesses next day's price as the same as today’s price, it would have better than 95% accuracy. 

Guessing whether next day price will go up or down i.e. as a classification problem.

In [None]:
from google.colab import drive
import numpy as np
import pandas as pd
import torch
from torchsummary import summary
drive.mount("/content/drive")
%cd '/content/drive/MyDrive/CS7643 Final Project/'  

In [None]:
from data.get_dataset import GetDataset, StockData

FAKE_DATA=False
REPEAT_ONE_SMALL_BATCH=False
NUM_DAYS = 5
USE_TRANSFORMER=True
USE_ALT_TRANSFORMER = True
KEEP_CLOSE_ONLY = False # debug to drop other columns
USE_CPU = False
FUTURE_DAYS= 5
DROP_FIRST_N_DAYS=10000
USE_TREND_PREDICTION = True

device = torch.device('cuda' if torch.cuda.is_available() and not USE_CPU else 'cpu')

csv = './data/SPXDailyData.csv'
df = GetDataset(csv)
dataset = df.get_data2(FUTURE_DAYS) if USE_TREND_PREDICTION else df.get_data()
dataset = dataset[:][DROP_FIRST_N_DAYS:]#discard ~first half
if REPEAT_ONE_SMALL_BATCH:
  dataset = dataset[0:10]
  if FAKE_DATA:
    dataset[:][0:5]=.6
    dataset[:][5:]=.4
    dataset['Next_Day_Change'][0:5]=0
    dataset['Next_Day_Change'][5:]=1

#split into 3
valid_frac, test_frac = 0.2, 0.2
train_sz=int(dataset.shape[0]*(1-(valid_frac+test_frac)))
valid_sz=int(dataset.shape[0]*(valid_frac))
df_train = dataset[               0:train_sz]
df_valid = dataset[        train_sz:train_sz+valid_sz]
df_test = dataset[train_sz+valid_sz:]

if KEEP_CLOSE_ONLY:# see in case additional info acts like a noise
  df_train.drop(columns=['Open', 'High', 'Low'], inplace=True)
  df_valid.drop(columns=['Open', 'High', 'Low'], inplace=True)
  df_test.drop(columns=['Open', 'High', 'Low'], inplace=True)

#convert to sequence data and make dataset
train_dataset = StockData(df_train.to_numpy(), num_days=NUM_DAYS)
valid_dataset = StockData(df_valid.to_numpy(), num_days=NUM_DAYS) 
test_dataset = StockData(df_test.to_numpy(), num_days=NUM_DAYS) 


In [None]:
#print(train_dataset[:][0].shape)
#print(train_dataset.num_samples)
#print(dataset)
#print(train_dataset[:])

In [None]:
from models.my_transformer import TransformerModelImpl
from torch.utils.data import DataLoader
class hyperparameters:
    device = device
    n_layers = 1
    num_heads = 1
    model_dim = 4 #this is number of features
    forward_dim = 64
    output_dim = 1
    dropout = 3e-4
    n_epochs = 10
    lr = 0.001 if USE_TRANSFORMER else 0.001
    batch_size = 64
modelT = TransformerModelImpl(hyperparameters).to(device)
#summary(modelT, (NUM_DAYS, train_dataset[:][0].shape[2]))
train_loader = DataLoader(train_dataset, batch_size=train_dataset.num_samples if REPEAT_ONE_SMALL_BATCH else hyperparameters.batch_size, shuffle=False)#todo while debuggin set to false
valid_loader = DataLoader(valid_dataset, batch_size=valid_dataset.num_samples if REPEAT_ONE_SMALL_BATCH else hyperparameters.batch_size, shuffle=False)#todo while debuggin set to false

In [None]:
from models.my_transformer import TransformerModelImpl2
class params:
    device = device
    num_layers = 1
    nhead = 1
    d_model = df_train.shape[1]-1 #this is number of features
    dim_feedforward = 64
    d_output = 1
    dropout = 3e-4
    seq_len=NUM_DAYS
modelT2 = TransformerModelImpl2(params).to(device)
#summary(modelT2, (NUM_DAYS, train_dataset[:][0].shape[2]))


In [None]:
from torch import nn
from torch.nn import functional as F
class FCNet(nn.Module):
  def __init__(self,in_shape):
    super(FCNet,self).__init__()
    self.fc_layer1 = nn.Linear(in_shape,in_shape*4)
    self.bn_layer1 = nn.BatchNorm1d(in_shape*4)
    self.fc_layer2 = nn.Linear(in_shape*4,in_shape*8)
    self.bn_layer2 = nn.BatchNorm1d(in_shape*8)
    self.fc_layer3 = nn.Linear(in_shape*8,1)
  def forward(self,x):
    x = torch.flatten(x,start_dim=1)
    x = self.bn_layer1(torch.relu(self.fc_layer1(x)))
    x = self.bn_layer2(torch.relu(self.fc_layer2(x)))
    x = self.fc_layer3(x)
    return x.reshape(x.shape[0],-1)
#print(train_dataset[:][0].shape[2])
modelFC = FCNet((train_dataset[:][0].shape[2])*NUM_DAYS).to(device)
#summary(modelFC, (NUM_DAYS, train_dataset[:][0].shape[2]))

## Training

In [None]:
from utils import plot_curves
from utils import train
from utils import evaluate
from sklearn.exceptions import UndefinedMetricWarning
import copy

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

model = modelT2 if USE_TRANSFORMER and USE_ALT_TRANSFORMER else modelT if USE_TRANSFORMER else modelFC
#minitial = copy.deepcopy(model)
criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=hyperparameters.lr)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=1)#constant
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
#scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

avg_train_loss,avg_train_acc,avg_valid_loss,avg_valid_acc=[],[],[],[]
model.float()
model.to(device)
for epoch in range(hyperparameters.n_epochs):
    train_loss, atl, ata = train(model, train_loader, optimizer, criterion, device)
    #scheduler.step(train_loss)
    _, avl, ava = evaluate(model, valid_loader, criterion, device)
    if epoch%50==1:
      print("Epoch %d: Training Loss: %.4f. Training Acc: %.4f. Validation Loss: %.4f. Validation Acc: %.4f." % (epoch+1, atl, ata, avl, ava))
    avg_train_loss.append(atl.item())
    avg_train_acc.append(ata)
    avg_valid_loss.append(avl.item())
    avg_valid_acc.append(ava)
plot_curves(avg_train_loss,avg_train_acc,avg_valid_loss,avg_valid_acc, info='', save=False)   
#mfinal = copy.deepcopy(model)
 

In [None]:

a,b=train_dataset[:]
print(a.shape)
print(b.shape)
print(b)

## Validating Predictions

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
model.eval()
features, targets = test_dataset[:]
features, targets = features.to(device), targets.to(device)
predictions = model(features.float())
pred=torch.round(torch.sigmoid(predictions)).long()
print(classification_report(targets.cpu().detach().numpy(), pred.cpu().detach().numpy(), output_dict=True)['weighted avg']['f1-score'])
print(classification_report(targets.cpu().detach().numpy(), pred.cpu().detach().numpy()))

## TODO

In [None]:
# 1. Review/Fix Transformer Implementation
# 2. Review/Fix Training and Testing
# 3. Review/Fix Predictions with Tensors
# 4. Implement Accuracy
# 5. Implement Charting
# 6. Experimentation