In [24]:
import torch
from torch import nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch import optim
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import random
from sklearn.preprocessing import MinMaxScaler
np.set_printoptions(suppress=True)

In [3]:
data_ = pd.read_csv("combined.csv", sep=";")

In [71]:
ericsson_data = data_[data_["stock"]=="Ericsson_A"]
all_data = ericsson_data[["publication_time", "price", "volume"]].iloc[::-1].to_numpy()

In [72]:
def normalize_data(data, cols, scaler):
    for col in cols:
        data[:,col] = np.squeeze(scaler.fit_transform(data[:,col].reshape(-1, 1)))
    return data

In [73]:
scaler = MinMaxScaler(feature_range=(0, 1))
normalize_data(all_data,[1,2],scaler)

#Predict 5min into future, only if up or down
def preprocces(data, input_size, train_ratio, dev_ratio):
    xs = []
    ys = []
    
    for i in range(len(data)-input_size):
        x = data[i:i+input_size]
        
        xs.append(x[:,(1,2)].ravel())

        y_time = x[-1,0] + 5*60
        y_price = data[data[:,0] < y_time][-1][1]
        
        if(y_price < x[-1,1]):
            y = 0
        elif(y_price == x[-1,1]):
            y = 1
        else:
            y = 2
        ys.append(y)
    
    combined = list(zip(xs, ys))
    random.shuffle(combined)
    xs[:], ys[:] = zip(*combined)
    
    xs = torch.tensor(xs, dtype=torch.float32)
    ys = torch.tensor(ys, dtype=torch.float32)
    
    train_len = round(len(data)*train_ratio)
    dev_len = round(len(data)*dev_ratio)
    test_len = len(data) - train_len - dev_len
    
    train_dataset = TensorDataset(xs[:train_len], ys[:train_len])
    dev_dataset = TensorDataset(xs[train_len:train_len+dev_len], ys[train_len:train_len+dev_len])
    test_dataset = TensorDataset(xs[train_len+dev_len:], ys[train_len+dev_len:])
    
    
    return train_dataset, dev_dataset, test_dataset

In [91]:
input_size=50
batch_size=8
nbr_epochs=20

In [92]:
train_data, dev_data, test_data = preprocces(all_data, input_size, 0.8, 0.1)

print("Length train data: " + str(len(train_data)))
print("Length dev data: " + str(len(dev_data)))
print("Length test data: " + str(len(test_data)))

train_data_loader = DataLoader(train_data, batch_size=batch_size)
dev_data_loader = DataLoader(dev_data, batch_size=batch_size)

Length train data: 14865
Length dev data: 1858
Length test data: 1808


In [93]:
class StockModel(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, input_size*2)
        self.fc2 = nn.Linear(input_size*2, input_size*2)
        self.fc3 = nn.Linear(input_size*2, 20)
        self.fc4 = nn.Linear(20, 10)
        self.fc5 = nn.Linear(10, 3)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        return F.log_softmax(self.fc5(x), dim=-1)
    
    def eval(self, x):
        with torch.no_grad():
            y_ = self.forward(x)
            print(y_)
            return y_.argmax().item()

In [94]:
def evaluate_model(dev_data_loader, model, loss_fn):
    losses = []
    n_correct = 0
    with torch.no_grad():
        for x, y in dev_data_loader:
            y = y.long()
            pred = model(x)
            loss = loss_fn(pred, y)
            losses.append(loss.item())
            n_correct += torch.sum(pred.argmax(dim=1) == y).item()
            
        dev_avg_loss = sum(losses)/len(losses)    
        dev_acc = n_correct/len(dev_data)
    return dev_avg_loss, dev_acc

In [1]:
def train_model(model, train_data_loader, dev_data_loader, loss_fn, optimizer, epochrange, batchsize):
    for epoch in range(epochrange):
        losses = []
        n_correct = 0
        for x, y in train_data_loader:
            y = y.long()
            pred = model(x)
            
            loss = loss_fn(pred, y)
        
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()    
            
            n_correct += torch.sum(pred.argmax(dim=1) == y).item()
            #n_correct += torch.sum((pred > 0.5) == y.bool()).item()
            
        # Compute accuracy and loss in the entire training set
        train_avg_loss = sum(losses)/len(losses)    
        
        dev_avg_loss, dec_acc = evaluate_model(dev_data_loader, model, loss_fn)
        
        train_acc = n_correct/len(train_data)
        
        # Display metrics
        display_str = 'Epoch {} '
        display_str += '\tLoss (train): {:.3f} '
        display_str += '\tAcc (train): {:.3f}% '
        display_str += '\tLoss (dev): {:.3f}'
        display_str += '\tAcc (dev): {:.3f}%'
        print(display_str.format(epoch, train_avg_loss, train_acc*100, dev_avg_loss, dec_acc*100))

In [2]:
model = StockModel(input_size*2)
loss_fn = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
train_model(model, train_data_loader, dev_data_loader, loss_fn, optimizer, nbr_epochs, batch_size)

NameError: name 'StockModel' is not defined

In [99]:
torch.save(model, "../python-docker/models/Ericsson_A_Norm.model")