In [1]:
from slstm import sLSTM
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import numpy as np
import time

def trainer(model, epochs, train_loader, val_loader, loss_fn, optim):
    train_losses = []
    val_losses = []
    best_train_loss = float('inf')
    best_val_loss = float('inf')
    best_train_epoch = 0
    best_val_epoch = 0

    for epoch in range(1, epochs+1):
        # Training
        model.train()
        num_batches = len(train_loader)
        total_train_loss = 0
        for x, y in train_loader:
            x, y = x.to('cuda'), y.to('cuda')
            output = model(x)
            loss = loss_fn(output, y)
            optim.zero_grad()
            loss.backward()
            optim.step()
            total_train_loss += loss.item()
        
        avg_train_loss = np.sqrt(total_train_loss / num_batches)
        train_losses.append(avg_train_loss)
        
        if avg_train_loss < best_train_loss:
            best_train_loss = avg_train_loss
            best_train_epoch = epoch

        # Validation
        model.eval()
        num_val_batches = len(val_loader)
        total_val_loss = 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to('cuda'), y.to('cuda')
                output = model(x)
                loss = loss_fn(output, y)
                total_val_loss += loss.item()
        
        avg_val_loss = np.sqrt(total_val_loss / num_val_batches)
        val_losses.append(avg_val_loss)
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_val_epoch = epoch

        print(f'Epoch {epoch} Train RMSE Loss: {avg_train_loss:.4f}, Val RMSE Loss: {avg_val_loss:.4f}')

        # Save results to a .txt file
        with open('test_2_round_3_training_results.txt', 'w') as f:
            f.write(f'Best Training RMSE Loss: {best_train_loss:.4f} at epoch {best_train_epoch}\n')
            f.write(f'Best Validation RMSE Loss: {best_val_loss:.4f} at epoch {best_val_epoch}\n')
            f.write('\nEpoch-wise losses:\n')
            for epoch, (train_loss, val_loss) in enumerate(zip(train_losses, val_losses), 1):
                f.write(f'Epoch {epoch}: Train RMSE = {train_loss:.4f}, Val RMSE = {val_loss:.4f}\n')

    return train_losses, val_losses, best_train_loss, best_val_loss, best_train_epoch, best_val_epoch


train = torch.load('../../../../data/cleaned/train.pt')
val = torch.load('../../../../data/cleaned/val.pt')
batch_size = 32
train_loader = DataLoader(train, batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val, batch_size=32, shuffle=True, drop_last=True)
model = sLSTM(hidden_size=512)
model.to('cuda')
loss_fn = torch.nn.MSELoss()
optim = torch.optim.Adam(params=model.parameters(), lr=0.001)

start = time.time()
losses = trainer(model, 1000, train_loader, val_loader, loss_fn, optim)
end = time.time()

print(f'\nTotal training time on 100 epochs: {end-start}')

Epoch 1 Train RMSE Loss: 286.7381, Val RMSE Loss: 91.5900
Epoch 2 Train RMSE Loss: 283.0928, Val RMSE Loss: 143.7112
Epoch 3 Train RMSE Loss: 281.9990, Val RMSE Loss: 146.5732
Epoch 4 Train RMSE Loss: 278.8869, Val RMSE Loss: 148.4795
Epoch 5 Train RMSE Loss: 277.0889, Val RMSE Loss: 167.8414
Epoch 6 Train RMSE Loss: 275.4948, Val RMSE Loss: 137.2697
Epoch 7 Train RMSE Loss: 272.6938, Val RMSE Loss: 172.8381
Epoch 8 Train RMSE Loss: 268.8677, Val RMSE Loss: 152.7639
Epoch 9 Train RMSE Loss: 271.4347, Val RMSE Loss: 97.6947
Epoch 10 Train RMSE Loss: 269.8340, Val RMSE Loss: 134.2257
Epoch 11 Train RMSE Loss: 276.0935, Val RMSE Loss: 133.5127
Epoch 12 Train RMSE Loss: 269.8443, Val RMSE Loss: 176.5813
Epoch 13 Train RMSE Loss: 254.1788, Val RMSE Loss: 192.2868
Epoch 14 Train RMSE Loss: 249.7128, Val RMSE Loss: 207.3008
Epoch 15 Train RMSE Loss: 239.8882, Val RMSE Loss: 397.4180
Epoch 16 Train RMSE Loss: 250.3219, Val RMSE Loss: 251.1933
Epoch 17 Train RMSE Loss: 252.6751, Val RMSE Loss: 