In [1]:
import torch
from torch import nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch import optim
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import random

In [2]:
data_ = pd.read_csv("combined.csv", sep=";")

In [3]:
ericsson_data = data_[data_["stock"]=="Ericsson_A"]
all_data = ericsson_data[["publication_time", "price", "volume"]].iloc[::-1].to_numpy()

In [11]:
#Predict 5min into future, only if up or down
def splitDataIntoTrainDevTestDataSets(data, input_size, train_ratio, dev_ratio):
    xs = []
    ys = []

    for i in range(len(data)-input_size):
        x = data[i:i+input_size]
        xs.append(x[:,(1,2)].ravel())

        y_time = x[-1,0] + 5*60
        y_price = data[data[:,0] < y_time][-1][1]
        
        if(y_price < x[-1,1]):
            y = 0
        elif(y_price == x[-1,1]):
            y = 1
        else:
            y = 2
        ys.append(y)
    
    combined = list(zip(xs, ys))
    random.shuffle(combined)
    xs[:], ys[:] = zip(*combined)
    
    xs = torch.tensor(xs, dtype=torch.float32)
    ys = torch.tensor(ys, dtype=torch.float32)
    
    train_len = round(len(data)*train_ratio)
    dev_len = round(len(data)*dev_ratio)
    test_len = len(data) - train_len - dev_len
    
    train_dataset = TensorDataset(xs[:train_len], ys[:train_len])
    dev_dataset = TensorDataset(xs[train_len:train_len+dev_len], ys[train_len:train_len+dev_len])
    test_dataset = TensorDataset(xs[train_len+dev_len:], ys[train_len+dev_len:])
    
    
    return train_dataset, dev_dataset, test_dataset

In [43]:
input_size=20
batch_size=8
nbr_epochs=10

In [44]:
train_data, dev_data, test_data = splitDataIntoTrainDevTestDataSets(all_data, input_size, 0.8, 0.1)

print("Length train data: " + str(len(train_data)))
print("Length dev data: " + str(len(dev_data)))
print("Length test data: " + str(len(test_data)))

train_data_loader = DataLoader(train_data, batch_size=batch_size)
dev_data_loader = DataLoader(dev_data, batch_size=batch_size)

Length train data: 14865
Length dev data: 1858
Length test data: 1838


In [54]:
class StockModel(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, input_size*2)
        self.fc2 = nn.Linear(input_size*2, input_size*2)
        self.fc3 = nn.Linear(input_size*2, 20)
        self.fc4 = nn.Linear(20, 10)
        self.fc5 = nn.Linear(10, 3)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        return F.log_softmax(self.fc5(x), dim=-1)
    
    def eval(self, x):
        with torch.no_grad():
            y_ = self.forward(x)
            print(y_)
            return y_.argmax().item()

In [55]:
def evaluate_model(dev_data_loader, model, loss_fn):
    losses = []
    n_correct = 0
    with torch.no_grad():
        for x, y in dev_data_loader:
            y = y.long()
            pred = model(x)
            loss = loss_fn(pred, y)
            losses.append(loss.item())
            n_correct += torch.sum(pred.argmax(dim=1) == y).item()
            
        dev_avg_loss = sum(losses)/len(losses)    
        dev_acc = n_correct/len(dev_data)
    return dev_avg_loss, dev_acc

In [56]:
def train_model(model, train_data_loader, dev_data_loader, loss_fn, optimizer, epochrange, batchsize):
    for epoch in range(epochrange):
        losses = []
        n_correct = 0
        for x, y in train_data_loader:
            y = y.long()
            pred = model(x)
            
            loss = loss_fn(pred, y)
        
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()    
            
            n_correct += torch.sum(pred.argmax(dim=1) == y).item()
            #n_correct += torch.sum((pred > 0.5) == y.bool()).item()
            
        # Compute accuracy and loss in the entire training set
        train_avg_loss = sum(losses)/len(losses)    
        
        dev_avg_loss, dec_acc = evaluate_model(dev_data_loader, model, loss_fn)
        
        train_acc = n_correct/len(train_data)
        
        # Display metrics
        display_str = 'Epoch {} '
        display_str += '\tLoss (train): {:.3f} '
        display_str += '\tAcc (train): {:.3f}% '
        display_str += '\tLoss (dev): {:.3f}'
        display_str += '\tAcc (dev): {:.3f}%'
        print(display_str.format(epoch, train_avg_loss, train_acc*100, dev_avg_loss, dec_acc*100))

In [57]:
model = StockModel(input_size*2)
loss_fn = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00005)
train_model(model, train_data_loader, dev_data_loader, loss_fn, optimizer, nbr_epochs, batch_size)

Epoch 0 	Loss (train): 1.198 	Acc (train): 43.545% 	Loss (dev): 1.068	Acc (dev): 46.179%
Epoch 1 	Loss (train): 1.083 	Acc (train): 45.510% 	Loss (dev): 1.059	Acc (dev): 47.417%
Epoch 2 	Loss (train): 1.069 	Acc (train): 46.364% 	Loss (dev): 1.058	Acc (dev): 47.955%
Epoch 3 	Loss (train): 1.058 	Acc (train): 46.667% 	Loss (dev): 1.056	Acc (dev): 48.116%
Epoch 4 	Loss (train): 1.053 	Acc (train): 46.862% 	Loss (dev): 1.057	Acc (dev): 48.278%
Epoch 5 	Loss (train): 1.051 	Acc (train): 47.077% 	Loss (dev): 1.052	Acc (dev): 47.901%
Epoch 6 	Loss (train): 1.045 	Acc (train): 47.265% 	Loss (dev): 1.057	Acc (dev): 47.524%
Epoch 7 	Loss (train): 1.043 	Acc (train): 47.386% 	Loss (dev): 1.057	Acc (dev): 48.062%
Epoch 8 	Loss (train): 1.041 	Acc (train): 47.420% 	Loss (dev): 1.064	Acc (dev): 48.332%
Epoch 9 	Loss (train): 1.038 	Acc (train): 47.582% 	Loss (dev): 1.068	Acc (dev): 48.116%


In [79]:
torch.save(model, "../python-docker/models/Ericsson_A.model")