In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import sys
import json
from infer_functions import *


In [81]:
modules_to_reload = ["infer_functions"]
for module_name in modules_to_reload:
    if module_name in sys.modules:
        del sys.modules[module_name]
    __import__(module_name)
    module = sys.modules[module_name]
    globals().update({name: getattr(module, name) for name in dir(module) if not name.startswith('_')})

Data formation

In [49]:
static_tensor_dataset = create_tensor_dataset("static", 4, limit=100, categories= 0, averages=True)

In [44]:
dynamic_tensor_dataset = create_tensor_dataset("dynamic", 4, limit=100, categories= 0, averages=True, use_profile=True, use_multiples=False)

In [72]:
print(len(dynamic_tensor_dataset))
for i, thing in list(enumerate(dynamic_tensor_dataset))[:100]:
    print(thing)

1977
((tensor([[0.2252, 0.1755, 0.1971, 0.0896, 0.1821, 0.1689, 0.2123, 0.3795, 0.2637,
         0.2047, 0.2331, 0.1043, 0.2182, 0.2030, 0.2500, 0.4400],
        [0.3063, 0.2748, 0.3243, 0.2011, 0.3614, 0.3632, 0.3194, 0.3509, 0.2384,
         0.2131, 0.2549, 0.1557, 0.2878, 0.2900, 0.2500, 0.2704],
        [0.2359, 0.2748, 0.2610, 0.4673, 0.2596, 0.2721, 0.2442, 0.0723, 0.2401,
         0.2786, 0.2683, 0.4730, 0.2704, 0.2842, 0.2500, 0.0728],
        [0.2326, 0.2748, 0.2175, 0.2420, 0.1970, 0.1958, 0.2241, 0.1973, 0.2579,
         0.3036, 0.2437, 0.2670, 0.2235, 0.2228, 0.2500, 0.2167]],
       dtype=torch.float64), tensor([0.0000e+00, 1.0000e+00, 4.0000e+00, 1.2730e+11], dtype=torch.float64)), tensor(0.2713, dtype=torch.float64))
((tensor([[0.2938, 0.2500, 0.2771, 0.1835, 0.2953, 0.3338, 0.2922, 0.3992, 0.2498,
         0.2093, 0.2385, 0.1489, 0.2571, 0.2867, 0.2500, 0.3520],
        [0.2263, 0.2500, 0.2230, 0.4264, 0.2122, 0.2501, 0.2234, 0.0822, 0.2516,
         0.2737, 0.2510, 0.4

In [50]:
with open(r"..\other_pickle\measures.json", "r") as file:
    measures = json.load(file)

static_size = len(measures["static"])
dynamic_size = len(measures["dynamic"])

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(device)

STATIC_BATCH_SIZE = 20
DYNAMIC_BATCH_SIZE = 5

static_train_dataloader = torch.utils.data.DataLoader(static_tensor_dataset, batch_size=STATIC_BATCH_SIZE, shuffle=True)
dynamic_train_dataloader = torch.utils.data.DataLoader(dynamic_tensor_dataset, batch_size=DYNAMIC_BATCH_SIZE, shuffle=False)

cpu


Static model

In [21]:
HIDDEN_SIZE = 200
LAYERS = 8

class StaticLSTM(nn.Module):

    def __init__(self, hidden_dim, batch_size, layers, input, categories=0):
        super(StaticLSTM, self).__init__()

        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.layers_num = layers
        
        #input is all the embedding vectors plus all the other variables
        self.lstm = nn.LSTM(input, hidden_dim, num_layers=layers, batch_first=True) 
        self.hidden = (torch.zeros(layers,batch_size,hidden_dim),torch.zeros(layers,batch_size,hidden_dim))
        
        #Squeeeze them into 1 dimension
        if categories > 0:
            self.hidden2label = nn.Linear(hidden_dim, categories)
        else:
            self.hidden2label = nn.Linear(hidden_dim, 1)

    def forward(self, batch_tensor):
        lstm_out, self.hidden = self.lstm(batch_tensor)
        last_timestep_output = lstm_out[:, -1, :]
        sales = self.hidden2label(last_timestep_output)
        return sales
    
    def hidden_reset(self):
        #reset the hidden and cell state after each epoch
        self.hidden = (torch.zeros(self.layers_num,self.batch_size,self.hidden_dim),
                       torch.zeros(self.layers_num,self.batch_size,self.hidden_dim))
    def batch_reset(self,batch_size):
        self.hidden = (torch.zeros(self.layers_num,batch_size,self.hidden_dim),
                       torch.zeros(self.layers_num,batch_size,self.hidden_dim))
    def flatten_parameters(self):
        self.lstm.flatten_parameters()

static_model = StaticLSTM(HIDDEN_SIZE, STATIC_BATCH_SIZE, LAYERS, static_size, categories=6)
static_model = static_model.double()

Static Grid Search

In [None]:
x_train, y_train = split_tensor_dataset(static_tensor_dataset)
grid_search(StaticLSTM, x_train, y_train)

In [None]:
loss_function = nn.L1Loss()
loss_scores = [float('inf')]
learning_rate = 0.01
epochs = 2
static_model = static_model.to(device)
optimizer = optim.Adam(static_model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    static_model.hidden_reset()
    epoch_loss = 0
    for i, batch in enumerate(static_train_dataloader):
        (input, worthless_input), label = batch
        if input.shape[0] != STATIC_BATCH_SIZE:
            static_model.batch_reset(input.shape[0])
            print("Reset triggered due to batch size mismatch")

        input, label = input.to(device), label.to(device)
        optimizer.zero_grad()
        output = static_model(input).squeeze()
        
        # Ensure output and label shapes are compatible for the loss function
        if output.shape != label.shape:
            print(f"Output shape: {output.shape}, Label shape: {label.shape}")
        
        loss = loss_function(output, label)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        if i % 50 == 49:
            print(f"Batch {i+1}, Loss: {epoch_loss / (i+1)}")
    
    average_epoch_loss = epoch_loss / len(static_train_dataloader)
    print(f"Average loss for epoch {epoch+1}: {average_epoch_loss}")
    
    if average_epoch_loss < loss_scores[-1]:
        torch.save(static_model.state_dict(), "../other_pickle/Static_Model.pth")
        print("Model saved")
    loss_scores.append(average_epoch_loss)

In [None]:
static_model.load_state_dict(torch.load("../other_pickle/Static_Model.pth"))
static_model.eval()
for i, batch in list(enumerate(static_train_dataloader))[:10]:
    (input, worthless_input), label = batch
    output = static_model(input).squeeze()
    # loss = loss_function(output, label)
    print(output)

Dynamic model

In [51]:
HIDDEN_SIZE = 200
LAYERS = 8

class DynamicLSTM(nn.Module):

    def __init__(self, hidden_dim, batch_size, layers, input, categories=0):
        super(DynamicLSTM, self).__init__()

        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.layers_num = layers
        
        #input is all the embedding vectors plus all the other variables
        self.lstm = nn.LSTM(input, hidden_dim, num_layers=layers, batch_first=True) 
        self.hidden = (torch.zeros(layers,batch_size,hidden_dim),torch.zeros(layers,batch_size,hidden_dim))
        
        #Squeeeze them into 1 dimension
        if categories > 0:
            self.hidden2label = nn.Linear(hidden_dim, categories)
        else:
            self.hidden2label = nn.Linear(hidden_dim, 1)

    def forward(self, batch_tensor):
        lstm_out, self.hidden = self.lstm(batch_tensor)
        last_timestep_output = lstm_out[:, -1, :]
        sales = self.hidden2label(last_timestep_output)
        return sales
    
    def hidden_reset(self):
        #reset the hidden and cell state after each epoch
        self.hidden = (torch.zeros(self.layers_num,self.batch_size,self.hidden_dim),
                       torch.zeros(self.layers_num,self.batch_size,self.hidden_dim))
    def batch_reset(self,batch_size):
        self.hidden = (torch.zeros(self.layers_num,batch_size,self.hidden_dim),
                       torch.zeros(self.layers_num,batch_size,self.hidden_dim))
    def flatten_parameters(self):
        self.lstm.flatten_parameters()

dynamic_model = DynamicLSTM(HIDDEN_SIZE, DYNAMIC_BATCH_SIZE, LAYERS, dynamic_size, categories=0)
dynamic_model = dynamic_model.double()

Dynamic grid search

In [83]:
x_train, y_train = split_tensor_dataset(dynamic_tensor_dataset)
grid_search(DynamicLSTM, x_train, y_train, n_trials=4, categories=0)

[I 2024-06-04 20:06:37,755] A new study created in memory with name: no-name-a1dc585b-2418-40c3-b277-90d2863df4b3


[I 2024-06-04 20:06:48,447] Trial 0 finished with value: 0.09828905505854968 and parameters: {'hidden_dim': 132, 'layers': 7, 'batch_size': 16, 'lr': 0.0186934322468508, 'epochs': 1}. Best is trial 0 with value: 0.09828905505854968.
[I 2024-06-04 20:07:03,262] Trial 1 finished with value: 0.1302109209630663 and parameters: {'hidden_dim': 196, 'layers': 4, 'batch_size': 16, 'lr': 0.0478294250501504, 'epochs': 2}. Best is trial 1 with value: 0.1302109209630663.
[I 2024-06-04 20:07:08,327] Trial 2 finished with value: 0.09698954866825463 and parameters: {'hidden_dim': 168, 'layers': 2, 'batch_size': 32, 'lr': 0.048065380151088105, 'epochs': 3}. Best is trial 1 with value: 0.1302109209630663.
[W 2024-06-04 20:07:28,975] Trial 3 failed with parameters: {'hidden_dim': 188, 'layers': 6, 'batch_size': 16, 'lr': 0.0849610272822293, 'epochs': 2} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\ficak\AppData\Local\Programs\Python\Python311\L

KeyboardInterrupt: 

In [None]:
# loss_function = nn.CrossEntropyLoss()
loss_function = nn.HuberLoss(delta=1.0)
loss_scores = [float('inf')]
learning_rate = 0.01
epochs = 1
dynamic_model = dynamic_model.to(device)
optimizer = optim.Adam(dynamic_model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    dynamic_model.hidden_reset()
    epoch_loss = 0
    for i, batch in enumerate(dynamic_train_dataloader):
        (input, worthless_input), label = batch
        if input.shape[0] != DYNAMIC_BATCH_SIZE:
            dynamic_model.batch_reset(input.shape[0])
            print("Reset triggered due to batch size mismatch")

        input, label = input.to(device), label.to(device)
        optimizer.zero_grad()
        output = dynamic_model(input).squeeze()
        
        # Ensure output and label shapes are compatible for the loss function
        if output.shape != label.shape:
            print(f"Output shape: {output.shape}, Label shape: {label.shape}")
        
        loss = loss_function(output, label)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        if i % 10 == 9:
            print(f"Batch {i+1}, Loss: {epoch_loss / (i+1)}")
    
    average_epoch_loss = epoch_loss / len(dynamic_train_dataloader)
    print(f"Average loss for epoch {epoch+1}: {average_epoch_loss}")
    
    if average_epoch_loss < loss_scores[-1]:
        torch.save(dynamic_model.state_dict(), "../other_pickle/Dynamic_Model.pth")
        print("Model saved")
    loss_scores.append(average_epoch_loss)

In [None]:
dynamic_model.load_state_dict(torch.load("../other_pickle/Dynamic_Model.pth"))
dynamic_model.eval()
for i, batch in list(enumerate(dynamic_train_dataloader))[:100]:
    (input, worthless_input), label = batch
    output = dynamic_model(input).squeeze()
    # loss = loss_function(output, label)
    # print(F.softmax(output, dim=1), label)
    print(output, label)

Indicator model