In [44]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import sys
import json
from infer_functions import *

In [32]:
modules_to_reload = ["infer_functions"]
for module_name in modules_to_reload:
    if module_name in sys.modules:
        del sys.modules[module_name]
    __import__(module_name)
    module = sys.modules[module_name]
    globals().update({name: getattr(module, name) for name in dir(module) if not name.startswith('_')})

Data formation

In [49]:
static_tensor_dataset = create_tensor_dataset("static", 4, limit=100, categories= 0, averages=True)
dynamic_tensor_dataset = create_tensor_dataset("dynamic", 4, limit=100, categories= 0, averages=True, use_profile=True, use_multiples=True)

In [46]:
with open(r"..\other_pickle\measures.json", "r") as file:
    measures = json.load(file)
combined_tensor_dataset = create_tensor_dataset("together", 5, measures, limit =800, categories= 0, verbose=True, averages=False, use_profile=True, use_multiples=False, ghost=True)

Catting AAPL.csv
Catting ABT.csv
Catting AEP.csv
Catting AMD.csv
Catting AMGN.csv
Catting AMZN.csv
Catting APH.csv
Catting BA.csv
Catting BKR.csv
Catting BMY.csv
Catting BSX.csv
Catting CDW.csv
Catting CI.csv
Catting CL.csv
Catting COP.csv
Catting COR.csv
Catting CPNG.csv
Catting CSGP.csv
Catting CTSH.csv
Catting CVS.csv
Catting DHR.csv
Catting ECL.csv
Catting ED.csv
Catting EFX.csv
Catting EL.csv
Catting ET.csv
Catting FAST.csv
Catting FIS.csv
Catting GILD.csv
Catting HES.csv
Catting HSY.csv
Catting IBM.csv
Catting INTC.csv
Catting IR.csv
Catting KDP.csv
Catting KLAC.csv
Catting KMB.csv
Catting KMI.csv
Catting KO.csv
Catting LHX.csv
Catting LIN.csv
Catting LRCX.csv
Catting LYB.csv
Catting MA.csv
Catting MCK.csv
Catting MCO.csv
Catting MLM.csv
Catting MMM.csv
Catting MPC.csv
Catting MSFT.csv
Catting MSI.csv
Catting NEM.csv
Catting NOC.csv
Catting NSC.csv
Catting NUE.csv
Catting OKE.csv
Catting ON.csv
Catting ORLY.csv
Catting OTIS.csv
Catting PFE.csv
Catting PG.csv
Catting PH.csv
Cattin

In [None]:
for inputs, targets in combined_tensor_dataset:
    print(inputs[0].shape)

In [41]:
with open(r"..\other_pickle\measures.json", "r") as file:
    measures = json.load(file)

static_size = len(measures["static"])
dynamic_size = len(measures["dynamic"])

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(device)

STATIC_BATCH_SIZE = 20
DYNAMIC_BATCH_SIZE = 5
COMBINED_BATCH_SIZE = 32

# static_train_dataloader = torch.utils.data.DataLoader(static_tensor_dataset, batch_size=STATIC_BATCH_SIZE, shuffle=True)
# dynamic_train_dataloader = torch.utils.data.DataLoader(dynamic_tensor_dataset, batch_size=DYNAMIC_BATCH_SIZE, shuffle=False)
# combined_train_dataloader = torch.utils.data.DataLoader(combined_tensor_dataset, batch_size=COMBINED_BATCH_SIZE, shuffle=False)


cpu


In [21]:
HIDDEN_SIZE = 200
LAYERS = 8

static_model = StaticLSTM(HIDDEN_SIZE, STATIC_BATCH_SIZE, LAYERS, static_size, categories=6)
static_model = static_model.double()

Static Grid Search

In [None]:
x_train, y_train = split_tensor_dataset(static_tensor_dataset)
grid_search(StaticLSTM, x_train, y_train)

In [None]:
loss_function = nn.L1Loss()
loss_scores = [float('inf')]
learning_rate = 0.01
epochs = 2
static_model = static_model.to(device)
optimizer = optim.Adam(static_model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    static_model.hidden_reset()
    epoch_loss = 0
    for i, batch in enumerate(static_train_dataloader):
        (input, worthless_input), label = batch
        if input.shape[0] != STATIC_BATCH_SIZE:
            static_model.batch_reset(input.shape[0])
            print("Reset triggered due to batch size mismatch")

        input, label = input.to(device), label.to(device)
        optimizer.zero_grad()
        output = static_model(input).squeeze()
        
        # Ensure output and label shapes are compatible for the loss function
        if output.shape != label.shape:
            print(f"Output shape: {output.shape}, Label shape: {label.shape}")
        
        loss = loss_function(output, label)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        if i % 50 == 49:
            print(f"Batch {i+1}, Loss: {epoch_loss / (i+1)}")
    
    average_epoch_loss = epoch_loss / len(static_train_dataloader)
    print(f"Average loss for epoch {epoch+1}: {average_epoch_loss}")
    
    if average_epoch_loss < loss_scores[-1]:
        torch.save(static_model.state_dict(), "../other_pickle/Static_Model.pth")
        print("Model saved")
    loss_scores.append(average_epoch_loss)

In [None]:
static_model.load_state_dict(torch.load("../other_pickle/Static_Model.pth"))
static_model.eval()
for i, batch in list(enumerate(static_train_dataloader))[:10]:
    (input, worthless_input), label = batch
    output = static_model(input).squeeze()
    # loss = loss_function(output, label)
    print(output)

Dynamic grid search

In [36]:
modules_to_reload = ["infer_functions"]
for module_name in modules_to_reload:
    if module_name in sys.modules:
        del sys.modules[module_name]
    __import__(module_name)
    module = sys.modules[module_name]
    globals().update({name: getattr(module, name) for name in dir(module) if not name.startswith('_')})

In [37]:
x_train, y_train = split_tensor_dataset(combined_tensor_dataset)
best_parameters = grid_search(DynamicLSTM, x_train, y_train, n_trials=10, categories=0)

[I 2024-06-13 00:30:45,479] A new study created in memory with name: no-name-e34ca1de-900c-4d97-9128-2ef98a39edd0
[I 2024-06-13 00:32:03,103] Trial 0 finished with value: 0.10038082953418623 and parameters: {'hidden_dim': 437, 'layers': 4, 'batch_size': 16, 'lr': 0.003038154129716284, 'epochs': 2}. Best is trial 0 with value: 0.10038082953418623.
[I 2024-06-13 00:32:05,642] Trial 1 finished with value: 0.10009098962899576 and parameters: {'hidden_dim': 165, 'layers': 2, 'batch_size': 64, 'lr': 0.00740147025891154, 'epochs': 1}. Best is trial 1 with value: 0.10009098962899576.
[I 2024-06-13 00:32:19,027] Trial 2 finished with value: 0.5972372979827296 and parameters: {'hidden_dim': 331, 'layers': 2, 'batch_size': 16, 'lr': 0.009466467695336572, 'epochs': 1}. Best is trial 1 with value: 0.10009098962899576.
[I 2024-06-13 00:33:34,037] Trial 3 finished with value: 0.20244354824980024 and parameters: {'hidden_dim': 328, 'layers': 4, 'batch_size': 16, 'lr': 0.007598251470058156, 'epochs': 3

In [67]:
print(best_parameters)

{'hidden_dim': 288, 'layers': 3, 'batch_size': 32, 'lr': 0.0014625720043832183, 'epochs': 2}


In [49]:
# loss_function = nn.CrossEntropyLoss()
combined_train_dataloader = DataLoader(combined_tensor_dataset, batch_size=best_parameters["batch_size"], shuffle=False)
loss_function = nn.HuberLoss(delta=1.0)
loss_scores = [float('inf')]
learning_rate = best_parameters["lr"]
combined_model = DynamicLSTM(best_parameters["hidden_dim"], best_parameters["batch_size"], best_parameters["layers"], input=input_size)
combined_model = combined_model.to(device).double()
optimizer = optim.Adam(combined_model.parameters(), lr=learning_rate)
#best_parameters["epochs"]
for epoch in range(2):
    print(f"Epoch {epoch+1}/{best_parameters['epochs']}")
    combined_model.hidden_reset()
    combined_model.train()
    epoch_loss = 0
    for i, batch in enumerate(combined_train_dataloader):
        (input, worthless_input), label = batch
        if input.shape[0] != best_parameters["batch_size"]:
            combined_model.batch_reset(input.shape[0])
            print("Reset triggered due to batch size mismatch")

        input, label = input.to(device), label.to(device)
        optimizer.zero_grad()
        output = combined_model(input).squeeze()
        
        # Ensure output and label shapes are compatible for the loss function
        if output.shape != label.shape:
            print(f"Output shape: {output.shape}, Label shape: {label.shape}")
        
        loss = loss_function(output, label)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        if i % 10 == 9:
            print(f"Batch {i+1}, Loss: {epoch_loss / (i+1)}")
    
    average_epoch_loss = epoch_loss / len(combined_train_dataloader)
    print(f"Average loss for epoch {epoch+1}: {average_epoch_loss}")
    
    if average_epoch_loss < loss_scores[-1]:
        torch.save(combined_model.state_dict(), "../other_pickle/Combined_Model.pth")
        print("Model saved")
    loss_scores.append(average_epoch_loss)

Epoch 1/1
Batch 10, Loss: 0.7624322595160065
Batch 20, Loss: 0.386967732861853
Batch 30, Loss: 0.2613999674617259
Batch 40, Loss: 0.1975048067858554
Batch 50, Loss: 0.1597081456687775
Batch 60, Loss: 0.13536079461954834
Batch 70, Loss: 0.11740619922441936
Batch 80, Loss: 0.1038365854815854
Batch 90, Loss: 0.09343729232504773
Batch 100, Loss: 0.08504591803135755
Batch 110, Loss: 0.07825082887840876
Batch 120, Loss: 0.07402816352052223
Reset triggered due to batch size mismatch
Average loss for epoch 1: 0.07182994614602504
Model saved
Epoch 2/1
Batch 10, Loss: 0.014380470689310521
Batch 20, Loss: 0.011746353080158343
Batch 30, Loss: 0.010758333578077409
Batch 40, Loss: 0.009818680875900012
Batch 50, Loss: 0.009923647432612622
Batch 60, Loss: 0.01098262778718081
Batch 70, Loss: 0.011287995123552563
Batch 80, Loss: 0.011420591634955174
Batch 90, Loss: 0.011205457416902365
Batch 100, Loss: 0.010976625398587904
Batch 110, Loss: 0.010882718150887172
Batch 120, Loss: 0.012331125424967263
Reset

In [None]:
combined_model.load_state_dict(torch.load("../other_pickle/Combined_Model.pth"))
combined_model.eval()
for i, batch in list(enumerate(combined_train_dataloader))[:100]:
    (input, worthless_input), label = batch
    output = combined_model(input).squeeze()
    # loss = loss_function(output, label)
    # print(F.softmax(output, dim=1), label)
    print(output, label)

Indicator model