In [1]:
# load libraries
import lstm
import torch
import queue
import threading
import numpy as np 
import pandas as pd
import torch.nn as nn
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset, random_split

print("Libraries loaded successfully")

Libraries loaded successfully


In [2]:
x_train = pd.read_csv('../../../data/pecan/dataset4/x_train_05_27_24.csv')
y_train = pd.read_csv('../../../data/pecan/dataset4/y_train_05_27_24.csv')
x_test = pd.read_csv('../../../data/pecan/dataset4/x_test_05_27_24.csv')
y_test = pd.read_csv('../../../data/pecan/dataset4/y_test_05_27_24.csv')

In [3]:
X = torch.tensor(x_train.values, dtype=torch.float32)
Y = torch.tensor(y_train.values, dtype=torch.float32)

sequence_length = 1  # xrbitrary number chosen


def create_sequences(X, Y, seq_length):
    xs, ys = [], []
    for i in range(len(X) - seq_length):
        x = X[i:i+seq_length]
        y = Y[i+seq_length]
        xs.append(x)
        ys.append(y)
    return torch.stack(xs), torch.stack(ys)

X, y = create_sequences(X, Y, sequence_length)


print(X.shape, y.shape) 


torch.Size([15, 1, 122976]) torch.Size([15, 1])


In [4]:
dataset = TensorDataset(X, y)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
new_val_size = int(0.8 * val_size)
test_size = val_size - new_val_size

generator = torch.Generator().manual_seed(42)

train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator=generator)
val_dataset, test_dataset = random_split(val_dataset, [new_val_size, test_size], generator=generator)

print(f'Train Dataset size: {len(train_dataset)} \nValidation Dataset size {len(val_dataset)}\nTest Dataset size {len(test_dataset)}')

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Train Dataset size: 12 
Validation Dataset size 2
Test Dataset size 1


In [7]:
# load libraries
import json
import torch
import queue
import threading
import numpy as np 
import pandas as pd
import torch.nn as nn
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset, random_split




class LSTM(nn.Module):
    def __init__(self, input_size=122976, hidden_size=1280, num_layers=20, patience=10, min_delta=10):
        super(LSTM, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        
        self.activations = []
        #member vars for the early stopper
        self.patience = patience
        self.min_delta = min_delta
        self.min_validation_loss = float('inf')

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        self.activations.append(out.squeeze())
        return out.squeeze()

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss - self.min_delta):
            self.counter +=1
            if self.counter >= self.patience:
                return True
        return False

class Trainer():
    def __init__(self, model, train_loader, validation_loader, device, criterion=nn.MSELoss(), lr=0.001, num_epochs=10000):
        self.model = model
        self.train_loader = train_loader
        self.validation_loader = validation_loader
        self.criterion = criterion
        self.lr = lr
        self.num_epochs = num_epochs
        self.device = device
        self.optimizer = torch.optim.Adam(model.parameters(), lr=self.lr)
        self.train_losses = []
        self.val_losses = []
        self.loss = None

    def train_one_epoch(self):
        self.model.train()
        epoch_loss = 0
        for data, targets in self.train_loader:
            data = data.to(self.device)
            targets = targets.to(self.device)

            self.optimizer.zero_grad()
            outputs = self.model(data)
            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optimizer.step()
            epoch_loss += loss.item()
        return epoch_loss / len(self.train_loader)

    def val_one_epoch(self):
        self.model.eval()
        epoch_loss = 0
        with torch.no_grad():
            for data, targets in self.validation_loader:
                data = data.to(self.device)
                targets = targets.to(self.device)
                outputs = self.model(data)
                loss = self.criterion(outputs, targets)
                epoch_loss += loss.item()
        return epoch_loss / len(self.validation_loader)
        
    def train(self):
        for epoch in np.arange(self.num_epochs):
            train_loss = self.train_one_epoch()
            val_loss = self.val_one_epoch()
            
            self.train_losses.append(train_loss)
            self.val_losses.append(val_loss)

            if(epoch % 100 == 0):
                print(f'Epoch: {epoch} Train Loss: {train_loss} Validation Loss: {val_loss}')

            if self.model.early_stop(val_loss):
                break
            
        print('#'*100)
        print(f'Final Epoch: {epoch} Train Loss: {train_loss} Validation Loss: {val_loss}')
        return self.model, self.train_losses, self.val_losses


In [8]:
# set device 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device set to:", device)

model = LSTM().to(device)
trainer = Trainer(model, train_loader, val_loader, device)
model, train_losses, val_losses = trainer.train()

Device set to: cuda
Epoch: 0 Train Loss: 3275211.25 Validation Loss: 4533117.0
Epoch: 100 Train Loss: 2893961.25 Validation Loss: 4071949.0
Epoch: 200 Train Loss: 2585181.0 Validation Loss: 3697886.0
Epoch: 300 Train Loss: 2306242.75 Validation Loss: 3356271.0
Epoch: 400 Train Loss: 2052863.625 Validation Loss: 3042342.25
Epoch: 500 Train Loss: 1822772.75 Validation Loss: 2753684.0
Epoch: 600 Train Loss: 1614237.125 Validation Loss: 2488499.75
Epoch: 700 Train Loss: 1425753.75 Validation Loss: 2245259.5
Epoch: 800 Train Loss: 1255948.875 Validation Loss: 2022578.0
Epoch: 900 Train Loss: 1103534.75 Validation Loss: 1819166.125
Epoch: 1000 Train Loss: 967290.25 Validation Loss: 1633807.75
Epoch: 1100 Train Loss: 846046.3125 Validation Loss: 1465344.5
Epoch: 1200 Train Loss: 738680.75 Validation Loss: 1312666.875
Epoch: 1300 Train Loss: 644113.5 Validation Loss: 1174710.375
Epoch: 1400 Train Loss: 561302.9375 Validation Loss: 1050451.125
Epoch: 1500 Train Loss: 489245.21875 Validation Los