In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import sys
import json
from infer_functions import *


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
modules_to_reload = ["infer_functions"]
for module_name in modules_to_reload:
    if module_name in sys.modules:
        del sys.modules[module_name]
    __import__(module_name)
    module = sys.modules[module_name]
    globals().update({name: getattr(module, name) for name in dir(module) if not name.startswith('_')})

Data formation

In [49]:
static_tensor_dataset = create_tensor_dataset("static", 4, limit=100, categories= 0, averages=True)
dynamic_tensor_dataset = create_tensor_dataset("dynamic", 4, limit=100, categories= 0, averages=True, use_profile=True, use_multiples=True)


tensor(False) tensor(False) tensor(False)


In [73]:
for inputs, targets in dynamic_tensor_dataset:
    print(inputs[0].shape)  # Should output something like torch.Size([32, 60, 3])
    break 

torch.Size([4, 16])


In [4]:
with open(r"..\other_pickle\measures.json", "r") as file:
    measures = json.load(file)

static_size = len(measures["static"])
dynamic_size = len(measures["dynamic"])

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(device)

STATIC_BATCH_SIZE = 20
DYNAMIC_BATCH_SIZE = 5

# static_train_dataloader = torch.utils.data.DataLoader(static_tensor_dataset, batch_size=STATIC_BATCH_SIZE, shuffle=True)
dynamic_train_dataloader = torch.utils.data.DataLoader(dynamic_tensor_dataset, batch_size=DYNAMIC_BATCH_SIZE, shuffle=False)

cpu


Static model

In [21]:
HIDDEN_SIZE = 200
LAYERS = 8

class StaticLSTM(nn.Module):

    def __init__(self, hidden_dim, batch_size, layers, input, categories=0):
        super(StaticLSTM, self).__init__()

        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.layers_num = layers
        
        #input is all the embedding vectors plus all the other variables
        self.lstm = nn.LSTM(input, hidden_dim, num_layers=layers, batch_first=True) 
        self.hidden = (torch.zeros(layers,batch_size,hidden_dim),torch.zeros(layers,batch_size,hidden_dim))
        
        #Squeeeze them into 1 dimension
        if categories > 0:
            self.hidden2label = nn.Linear(hidden_dim, categories)
        else:
            self.hidden2label = nn.Linear(hidden_dim, 1)

    def forward(self, batch_tensor):
        lstm_out, self.hidden = self.lstm(batch_tensor)
        last_timestep_output = lstm_out[:, -1, :]
        sales = self.hidden2label(last_timestep_output)
        return sales
    
    def hidden_reset(self):
        #reset the hidden and cell state after each epoch
        self.hidden = (torch.zeros(self.layers_num,self.batch_size,self.hidden_dim),
                       torch.zeros(self.layers_num,self.batch_size,self.hidden_dim))
    def batch_reset(self,batch_size):
        self.hidden = (torch.zeros(self.layers_num,batch_size,self.hidden_dim),
                       torch.zeros(self.layers_num,batch_size,self.hidden_dim))
    def flatten_parameters(self):
        self.lstm.flatten_parameters()

static_model = StaticLSTM(HIDDEN_SIZE, STATIC_BATCH_SIZE, LAYERS, static_size, categories=6)
static_model = static_model.double()

Static Grid Search

In [None]:
x_train, y_train = split_tensor_dataset(static_tensor_dataset)
grid_search(StaticLSTM, x_train, y_train)

In [None]:
loss_function = nn.L1Loss()
loss_scores = [float('inf')]
learning_rate = 0.01
epochs = 2
static_model = static_model.to(device)
optimizer = optim.Adam(static_model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    static_model.hidden_reset()
    epoch_loss = 0
    for i, batch in enumerate(static_train_dataloader):
        (input, worthless_input), label = batch
        if input.shape[0] != STATIC_BATCH_SIZE:
            static_model.batch_reset(input.shape[0])
            print("Reset triggered due to batch size mismatch")

        input, label = input.to(device), label.to(device)
        optimizer.zero_grad()
        output = static_model(input).squeeze()
        
        # Ensure output and label shapes are compatible for the loss function
        if output.shape != label.shape:
            print(f"Output shape: {output.shape}, Label shape: {label.shape}")
        
        loss = loss_function(output, label)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        if i % 50 == 49:
            print(f"Batch {i+1}, Loss: {epoch_loss / (i+1)}")
    
    average_epoch_loss = epoch_loss / len(static_train_dataloader)
    print(f"Average loss for epoch {epoch+1}: {average_epoch_loss}")
    
    if average_epoch_loss < loss_scores[-1]:
        torch.save(static_model.state_dict(), "../other_pickle/Static_Model.pth")
        print("Model saved")
    loss_scores.append(average_epoch_loss)

In [None]:
static_model.load_state_dict(torch.load("../other_pickle/Static_Model.pth"))
static_model.eval()
for i, batch in list(enumerate(static_train_dataloader))[:10]:
    (input, worthless_input), label = batch
    output = static_model(input).squeeze()
    # loss = loss_function(output, label)
    print(output)

Dynamic grid search

In [62]:
modules_to_reload = ["infer_functions"]
for module_name in modules_to_reload:
    if module_name in sys.modules:
        del sys.modules[module_name]
    __import__(module_name)
    module = sys.modules[module_name]
    globals().update({name: getattr(module, name) for name in dir(module) if not name.startswith('_')})

In [5]:
x_train, y_train = split_tensor_dataset(dynamic_tensor_dataset)
best_parameters = grid_search(DynamicLSTM, x_train, y_train, n_trials=4, categories=0)

[I 2024-06-09 10:09:37,695] A new study created in memory with name: no-name-8d8a7744-7b1c-4aef-bf19-67483b5b2803
[I 2024-06-09 10:09:45,016] Trial 0 finished with value: 0.09690393981455006 and parameters: {'hidden_dim': 279, 'layers': 2, 'batch_size': 16, 'lr': 0.003951547225549357, 'epochs': 1}. Best is trial 0 with value: 0.09690393981455006.
[I 2024-06-09 10:09:47,856] Trial 1 finished with value: 0.09667784677979192 and parameters: {'hidden_dim': 109, 'layers': 2, 'batch_size': 32, 'lr': 0.0028993792081484773, 'epochs': 2}. Best is trial 1 with value: 0.09667784677979192.
[I 2024-06-09 10:10:01,571] Trial 2 finished with value: 0.09688430104057269 and parameters: {'hidden_dim': 245, 'layers': 3, 'batch_size': 16, 'lr': 0.006709062114691532, 'epochs': 2}. Best is trial 1 with value: 0.09667784677979192.
[I 2024-06-09 10:10:10,386] Trial 3 finished with value: 0.09692684511053036 and parameters: {'hidden_dim': 192, 'layers': 2, 'batch_size': 16, 'lr': 0.008198602914504749, 'epochs'

In [67]:
print(best_parameters)

{'hidden_dim': 288, 'layers': 3, 'batch_size': 32, 'lr': 0.0014625720043832183, 'epochs': 2}


In [80]:
# loss_function = nn.CrossEntropyLoss()
loss_function = nn.HuberLoss(delta=1.0)
loss_scores = [float('inf')]
learning_rate = best_parameters["lr"]
for inputs, targets in dynamic_tensor_dataset:
    input_size = inputs[0].shape[-1]
    break 
dynamic_model = DynamicLSTM(best_parameters["hidden_dim"], best_parameters["batch_size"], best_parameters["layers"], input=input_size)
dynamic_model = dynamic_model.to(device).double()
optimizer = optim.Adam(dynamic_model.parameters(), lr=learning_rate)

for epoch in range(best_parameters["epochs"]):
    print(f"Epoch {epoch+1}/{best_parameters["epochs"]}")
    dynamic_model.hidden_reset()
    epoch_loss = 0
    for i, batch in enumerate(dynamic_train_dataloader):
        (input, worthless_input), label = batch
        if input.shape[0] != DYNAMIC_BATCH_SIZE:
            dynamic_model.batch_reset(input.shape[0])
            print("Reset triggered due to batch size mismatch")

        input, label = input.to(device), label.to(device)
        optimizer.zero_grad()
        output = dynamic_model(input).squeeze()
        
        # Ensure output and label shapes are compatible for the loss function
        if output.shape != label.shape:
            print(f"Output shape: {output.shape}, Label shape: {label.shape}")
        
        loss = loss_function(output, label)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        if i % 10 == 9:
            print(f"Batch {i+1}, Loss: {epoch_loss / (i+1)}")
    
    average_epoch_loss = epoch_loss / len(dynamic_train_dataloader)
    print(f"Average loss for epoch {epoch+1}: {average_epoch_loss}")
    
    if average_epoch_loss < loss_scores[-1]:
        torch.save(dynamic_model.state_dict(), "../other_pickle/Dynamic_Model.pth")
        print("Model saved")
    loss_scores.append(average_epoch_loss)

Epoch 1/1
Batch 10, Loss: 0.02233407294922456
Batch 20, Loss: 0.013681545790981168
Batch 30, Loss: 0.010033784893149598
Batch 40, Loss: 0.008817536557563121
Batch 50, Loss: 0.008975304180772047
Batch 60, Loss: 0.011723222644648446
Batch 70, Loss: 0.01268501901288315
Batch 80, Loss: 0.012406697902072116
Batch 90, Loss: 0.012234159717179074
Batch 100, Loss: 0.01190799550741154
Batch 110, Loss: 0.011952834011252
Batch 120, Loss: 0.011468859347362682
Batch 130, Loss: 0.011435107446323868
Batch 140, Loss: 0.011485700943334155
Batch 150, Loss: 0.011086994175080552
Batch 160, Loss: 0.010751847390329127
Batch 170, Loss: 0.010340846027805047
Batch 180, Loss: 0.010031015179254022
Batch 190, Loss: 0.009697709659195974
Batch 200, Loss: 0.009455440548242038
Batch 210, Loss: 0.009289782499273105
Batch 220, Loss: 0.009041836779820024
Batch 230, Loss: 0.00891227074619809
Batch 240, Loss: 0.008789619473348189
Batch 250, Loss: 0.009316966459515168
Batch 260, Loss: 0.009134369367044053
Batch 270, Loss: 0

In [None]:
dynamic_model.load_state_dict(torch.load("../other_pickle/Dynamic_Model.pth"))
dynamic_model.eval()
for i, batch in list(enumerate(dynamic_train_dataloader))[:100]:
    (input, worthless_input), label = batch
    output = dynamic_model(input).squeeze()
    # loss = loss_function(output, label)
    # print(F.softmax(output, dim=1), label)
    print(output, label)

Indicator model