In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import sys
from infer_functions import *


In [17]:
modules_to_reload = ["infer_functions"]
for module_name in modules_to_reload:
    if module_name in sys.modules:
        del sys.modules[module_name]
    __import__(module_name)
    module = sys.modules[module_name]
    globals().update({name: getattr(module, name) for name in dir(module) if not name.startswith('_')})

Data formation

In [21]:
tensor_dataset = create_tensor_dataset("static", 4, limit=10)

Catting AAPL.csv
Catting AMZN.csv
Catting LLY.csv
Catting MSFT.csv
Catting NVDA.csv
Catting TSLA.csv


In [23]:
for i, thing in list(enumerate(tensor_dataset))[:10]:
    print(thing)

((tensor([[ 1.2210,  0.5435, 14.1786,  4.3501],
        [ 0.8461,  0.0850,  8.3076,  3.3166],
        [ 1.8592,  9.8285,  1.1807,  0.1624],
        [ 5.8709,  1.6466,  0.7817, 13.4769],
        [ 4.1467,  0.9799,  0.1090,  7.9394],
        [ 3.7399,  1.8592,  9.3302,  1.0426],
        [ 0.1160,  5.5375,  1.4010,  0.8101],
        [10.7563,  3.1795,  0.6423,  0.0979],
        [ 5.5810,  2.9437,  1.3449,  7.5768],
        [ 0.9569,  0.0000,  5.1753,  1.0377],
        [ 0.6079,  9.6039,  3.4001,  0.6616],
        [ 0.0818,  3.8718,  2.0255,  0.7869],
        [ 6.2037,  0.7700,  0.0000,  5.7321]], dtype=torch.float64), tensor([0., 1., 4.], dtype=torch.float64)), tensor(29.1300, dtype=torch.float64))
((tensor([[ 1.2210,  0.5435, 14.1786,  4.3501],
        [ 0.8461,  0.0850,  8.3076,  3.3166],
        [ 1.8592,  9.8285,  1.1807,  0.1624],
        [ 5.8709,  1.6466,  0.7817, 13.4769],
        [ 4.1467,  0.9799,  0.1090,  7.9394],
        [ 3.7399,  1.8592,  9.3302,  1.0426],
        [ 0.1160,

Static model

In [None]:
# class SlidingWindowDataset(Dataset):
#     def __init__(self, dataset, sequence_length):
#         self.dataset = dataset
#         self.sequence_length = sequence_length

#     def __len__(self):
#         return len(self.dataset) - self.sequence_length

#     def __getitem__(self, idx):


# class SlidingWindowDatasetTEST(Dataset):
#     def __init__(self, dataset, sequence_length):
#         self.dataset = dataset
#         self.sequence_length = sequence_length

#     def __len__(self):
#         return len(self.dataset) - self.sequence_length

#     def __getitem__(self, idx):
#         return self.dataset[idx:idx+self.sequence_length]


BATCH_SIZE = 400
HIDDEN_SIZE = 200
EMBED_SIZE = 30
LAYERS = 8
SEQUENCE_LENGTH = 20


if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(device)

# train_tensor = train_tensor.to(device)
# train_label_tensor = train_label_tensor.to(device)
# test_tensor = test_tensor.to(device)
# validate_tensor = validate_tensor.to(device)
# validate_label_tensor = validate_label_tensor.to(device)

train_dataset = torch.utils.data.TensorDataset(train_tensor, train_label_tensor)
train_dataset = SlidingWindowDataset(train_dataset,SEQUENCE_LENGTH)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataset = torch.utils.data.TensorDataset(test_tensor)
###problem here###
test_dataset = SlidingWindowDatasetTEST(test_dataset,SEQUENCE_LENGTH)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
validate_dataset = torch.utils.data.TensorDataset(validate_tensor,validate_label_tensor)
validate_dataset = SlidingWindowDataset(validate_dataset,SEQUENCE_LENGTH)
validate_dataloader = torch.utils.data.DataLoader(validate_dataset, batch_size=BATCH_SIZE, shuffle=False)


class LSTMSales(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, batch_size, layers):
        super(LSTMSales, self).__init__()

        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.layers_num = layers

        #Embedding layers
        self.store_number = nn.Embedding(54,embedding_dim)
        self.family = nn.Embedding(33,embedding_dim)
        self.store_type = nn.Embedding(5,embedding_dim)
        self.cluster = nn.Embedding(17,embedding_dim)
        self.holiday = nn.Embedding(5,embedding_dim)
        
        #input is all the embedding vectors plus all the other variables
        self.lstm = nn.LSTM(120, hidden_dim, num_layers=layers, batch_first=True) 
        self.hidden = (torch.zeros(layers,batch_size,hidden_dim),torch.zeros(layers,batch_size,hidden_dim))
        
        #normalize outputs
        #self.bn1 = nn.BatchNorm1d(out_features)

        self.hidden2sales = nn.Linear(hidden_dim, 1)

    def forward(self, batch_tensor):
        #Trying to predict the log of (the sales + 1) 
        # store_number =self.store_number(batch_tensor[:,:,0].long())
        # family = self.family(batch_tensor[:,:,1].long())
        # store_type = self.store_type(batch_tensor[:,:,3].long())
        # cluster = self.cluster(batch_tensor[:,:,4].long())
        # holiday = self.holiday(batch_tensor[:,:,7].long())
        # batch_tensor = batch_tensor[:,:,[2,5,6,8,9,10]]
        # input = torch.cat([batch_tensor,store_number,family,store_type,cluster,holiday],dim=2)
        lstm_out, self.hidden = self.lstm(batch_tensor)
        last_timestep_output = lstm_out[:, -1, :]
        sales = self.hidden2sales(last_timestep_output)
        sales =sales
        return sales
    
    def hidden_reset(self):
        #reset the hidden and cell state after each epoch
        self.hidden = (torch.zeros(self.layers_num,self.batch_size,self.hidden_dim),
                       torch.zeros(self.layers_num,self.batch_size,self.hidden_dim))
    def batch_reset(self,batch_size):
        self.hidden = (torch.zeros(self.layers_num,batch_size,self.hidden_dim),
                       torch.zeros(self.layers_num,batch_size,self.hidden_dim))
    def flatten_parameters(self):
        self.lstm.flatten_parameters()

Dynamic model

Indicator model