In [1]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import models
from matplotlib.lines import Line2D

import pandas as pd
import matplotlib.pyplot as plt


import random
import math
import time

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
class CustomStockData(Dataset):

    def __init__(self, csv_file, root_dir=None, transform=None):

        self.csv = pd.read_pickle(csv_file)

    def __len__(self):

        return len(self.csv)

    def __getitem__(self, idx):

        if torch.is_tensor(idx):
            idx = idx.tolist()

        source = self.csv.iloc[idx].src
        target = self.csv.iloc[idx].trg
        initprice = self.csv.iloc[idx].initopen
        name = self.csv.iloc[idx]['name']

        sample = {'src': source, 'trg': target,
                  'initprice': initprice, 'symbol': name}

        return sample


data = CustomStockData(
    'techcompdata/5yrs/srctrg_set75_45VOC_trgpad_augment.pkl')

trainlen = int(len(data)*0.8)
testlen = len(data)-trainlen

train, test = torch.utils.data.random_split(data, [trainlen, testlen])

testloader = DataLoader(test, batch_size=32,
                        shuffle=True, num_workers=4)

print(f'number of test set : {len(testloader.dataset)}')

trainlen = int(len(train)*0.8)
vallen = len(train)-trainlen

train_set, val_set = torch.utils.data.random_split(train, [trainlen, vallen])

trainloader = DataLoader(train_set, batch_size=32,
                         shuffle=True, num_workers=4)
valloader = DataLoader(val_set, batch_size=32,
                       shuffle=True, num_workers=4)

print(f"number of train set : {len(trainloader.dataset)}")
print(f"number of val set:  {len(valloader.dataset)}")

number of test set : 5888
number of train set : 18840
number of val set:  4711


In [3]:
Encode = models.TransformerEncoderV1(3,device,dmodel=256,layers=4,drop=.05)
Decode = models.TransformerDecoderV1(3,device,dmodel=256,layers=4,drop=.05)
Transformer = models.TransformerV1(Encode,Decode,3,dmodel=256).to(device)
Transformer.load_state_dict(torch.load('TransformerV4_trgpad75_45.pt'))

<All keys matched successfully>

In [4]:
def draw_line_normalized_open(src,trg,stockname,prediction,):
    
    srclen=src.shape[1]
    trglen=trg.shape[1]

    plt.plot([i for i in range(srclen)],
             src[0][:], 'b', label='src', alpha=.6)
    
    plt.plot([i for i in range(srclen,srclen+trglen)],
             trg[0,:][:], 'r', label='trg')
    
    plt.plot([i for i in range(srclen,srclen+trglen)],
             prediction, 'g', label='pred')
    plt.title(
        f'Stock : {stockname} Open')
    plt.legend()
    plt.grid()
    plt.rcParams["figure.figsize"] = (13,4)
    plt.show
    
def draw_line_normalized_close(src,trg,stockname,prediction,):
    
    srclen=src.shape[1]
    trglen=trg.shape[1]

    plt.plot([i for i in range(srclen)],
             src[1][:], 'b', label='src', alpha=.6)
    
    plt.plot([i for i in range(srclen,srclen+trglen)],
             trg[1,:][:], 'r', label='trg')
    
    plt.plot([i for i in range(srclen,srclen+trglen)],
             prediction, 'g', label='pred')
    plt.title(
        f'Stock : {stockname} Close')
    plt.legend()
    plt.grid()
#     plt.rcParams["figure.figsize"] = (8,5)
    plt.show
    
def draw_line_normalized_vol(src,trg,stockname,prediction,):
    
    srclen=src.shape[1]
    trglen=trg.shape[1]

    plt.plot([i for i in range(srclen)],
             src[2][:], 'b', label='src', alpha=.6)
    
    plt.plot([i for i in range(srclen,srclen+trglen)],
             trg[2,:][:], 'r', label='trg')
    
    plt.plot([i for i in range(srclen,srclen+trglen)],
             prediction, 'g', label='pred')
    plt.title(
        f'Stock : {stockname} Volume')
    plt.legend()
    plt.grid()
#     plt.rcParams["figure.figsize"] = (8,5)
    plt.show

In [5]:
def eval_model_trgpadd(model, loss_func, src, trg, trgpadsize):

    model.eval()

    with torch.no_grad():

        src = src.clone().detach().to(device).permute(2, 0, 1)
        trg_input = trg.clone().detach().to(device).permute(
            2, 0, 1)[:-1, :, :]  # modified this line temporarily
        trg_no_pad = trg.clone().detach().to(
            device).permute(2, 0, 1)[trgpadsize:, :, :]

        prediction = model(src, trg_input)
        prediction = prediction[trgpadsize-1:, :, :]
        loss = criterion(prediction, trg_no_pad)

    return loss


def model_predict_padtrg(model, src, trg, trgpadsize):

    src = torch.from_numpy(src).to(device)
    src = src.unsqueeze(2).permute(1, 2, 0)
    trg = torch.from_numpy(trg).to(device)
    trg = trg.unsqueeze(2).permute(1, 2, 0)
    trglen = trg.shape[0]-trgpadsize

    with torch.no_grad():

        tgt = trg.clone().detach()[:trgpadsize, :, :]

        for i in range(trglen):
            model.eval()
            result = model(src, tgt)
            result = result[-1, :, :].unsqueeze(0)

            tgt = torch.cat((tgt, result), dim=0)

    return tgt

In [8]:
criterion = nn.MSELoss(reduction='sum')


##############################################################################################################
trainloss = 0

for idx, sample in enumerate(trainloader):

    src = sample['src']
    trg = sample['trg']

    trainloss_ = eval_model_trgpadd(Transformer, criterion, src, trg, 15)
    trainloss += trainloss_.item()

trainloss = trainloss/len(trainloader.dataset)
print('RMSE for train set: ',math.sqrt(trainloss))

###############################################################################################################

valloss = 0

for idx, sample in enumerate(valloader):

    src = sample['src']
    trg = sample['trg']

    valloss_ = eval_model_trgpadd(Transformer, criterion, src, trg, 15)
    valloss += valloss_.item()

valloss = valloss/len(valloader.dataset)
print('RMSE for val set: ',math.sqrt(valloss))

###############################################################################################################

testloss = 0

for idx, sample in enumerate(testloader):

    src = sample['src']
    trg = sample['trg']

    testloss_ = eval_model_trgpadd(Transformer, criterion, src, trg, 15)
    testloss += testloss_.item()

testloss = testloss/len(testloader.dataset)
print('RMSE for test set: ',math.sqrt(testloss))

RMSE for train set:  0.058194553597528585
RMSE for val set:  0.0585941185464256
RMSE for test set:  0.05839861967914145


In [None]:
length = len(valloader.dataset)
idx=np.random.randint(0,length,10)

for i in idx:

    src = valloader.dataset[i]['src']
    trg = valloader.dataset[i]['trg']

    stockname = data[i]['symbol']

    prediction = model_predict_padtrg(Transformer, src, trg,15)[:, :, 0]
    prediction = prediction.squeeze(1)
    prediction = prediction.detach().cpu().numpy()
    
    draw_line_normalized_open(src, trg, stockname, prediction)
    plt.show()

In [None]:
length = len(valloader.dataset)
idx=np.random.randint(0,length,10)

for i in idx:

    src = valloader.dataset[i]['src']
    trg = valloader.dataset[i]['trg']

    stockname = data[i]['symbol']

    prediction = model_predict_padtrg(Transformer, src, trg,15)[:, :, 1]
    prediction = prediction.squeeze(1)
    prediction = prediction.detach().cpu().numpy()
    
    draw_line_normalized_close(src, trg, stockname, prediction)
    plt.show()

In [None]:
length = len(valloader.dataset)
idx=np.random.randint(0,length,10)

for i in idx:

    src = valloader.dataset[i]['src']
    trg = valloader.dataset[i]['trg']

    stockname = data[i]['symbol']

    prediction = model_predict_padtrg(Transformer, src, trg,15)[:, :, 2]
    prediction = prediction.squeeze(1)
    prediction = prediction.detach().cpu().numpy()
    
    draw_line_normalized_vol(src, trg, stockname, prediction)
    plt.show()

In [None]:
length = len(valloader.dataset)
idx=np.random.randint(0,length,30)

for i in idx:
    
    src = valloader.dataset[i]['src']
    trg = valloader.dataset[i]['trg']
    stockname = data[i]['symbol']



    prediction = model_predict_padtrg(Transformer, src, trg,15)
    prediction = prediction.detach().cpu().numpy()
    plt.subplot(131)
    draw_line_normalized_open(src, trg, stockname, prediction[:, :, 0].squeeze(1))
    plt.subplot(132)
    draw_line_normalized_close(src, trg, stockname, prediction[:, :, 1].squeeze(1))
    plt.subplot(133)
    draw_line_normalized_vol(src, trg, stockname, prediction[:, :, 2].squeeze(1))
    
    plt.show()