In [1]:
import os
import sys

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import new_models
from config import load_data

from ray import tune
from ray.air.integrations.mlflow import MLflowLoggerCallback
from ray.tune.schedulers import ASHAScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
import torch.nn as nn
import torch

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size

        # define the linear input layer
        self.linear_in = nn.Linear(input_size, hidden_size)

        # define the LSTM layer
        self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

        # define the batch normalization layer
        self.batch_norm = nn.BatchNorm1d(hidden_size)

        # define the linear output layer
        self.linear_out = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        print(x.shape)
        # apply the linear input layer
        x = self.linear_in(x)
        print(x.shape)

        # apply batch normalization
        x = self.batch_norm(x.transpose(1,2)).transpose(1,2)
        print(x.shape)
        # apply the LSTM layer
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        lstm_out, _ = self.lstm(x, (h0, c0))
        print(lstm_out.shape)
        # apply the linear output layer
        out = self.linear_out(lstm_out[:, -1, :])
        print(out.shape)
        # squeeze the output tensor to shape [batch_size]
        out = out.squeeze()
        print(out.shape)
        return out

In [3]:
def fit(model, loss_function, optimizer, data_loader, num_epochs, mode, use_amp=False):
	history = {"train": {"loss": [], "mae": []}, "val": {"loss": [], "mae": []}}
	scaler = torch.cuda.amp.GradScaler(enabled=use_amp) # Mixed-precision support for compatible GPUs
	print("\nTraining the model:")
	for epoch in range(num_epochs):
		print("\nEpoch", epoch+1)
		if epoch < num_epochs - 1:
			keys = ["train", "val"]
		else:
			keys = ["train", "val", "test"]
		for key in keys:
			dataset_size = 0
			dataset_loss = 0.0
			if key == "train":
				model.train()
			else:
				model.eval()
			for X_batch, y_batch in tqdm(data_loader[key]):
				X_batch, y_batch = X_batch.to(mode["device"]), y_batch.to(mode["device"])
				with torch.set_grad_enabled(mode=(key=="train")): # Autograd activated only during training
					with torch.cuda.amp.autocast(enabled=use_amp): # Mixed-precision support for compatible GPUs
						batch_output = model(X_batch.float())
						batch_loss = loss_function(batch_output, y_batch)
					if key == "train":
						scaler.scale(batch_loss).backward()
						scaler.step(optimizer) 	
						scaler.update()
						optimizer.zero_grad()
				dataset_size += y_batch.shape[0]
				dataset_loss += y_batch.shape[0] * batch_loss.item()
			dataset_loss /= dataset_size
			if key in ["train", "val"]:
				history[key]["loss"].append(dataset_loss)
			else:
				print("\nEvaluating the model:")
			print(key, "loss:", dataset_loss)
	return history

In [4]:
def print_history(history):
	absciss = np.arange(1, len(history["train"]["loss"])+1)
	plt.figure()
	plt.suptitle("Training history")
	plt.subplot(121)
	plt.title("Loss history")
	plt.plot(absciss, history["train"]["loss"], label="Train")
	plt.plot(absciss, history["val"]["loss"], label="Validation")
	plt.xlabel("Epoch")
	plt.ylabel("Loss")
	plt.legend()
	plt.show()

In [5]:
from config import load_data

def train_model(data_dir):

    use_GPU = torch.cuda.is_available()
    if use_GPU:  
        mode = {"name": "cuda", "device": torch.device("cuda")}
    else:
        mode = {"name": "cpu", "device": torch.device("cpu")}

    # Define hyperparameters
    train_size = 0.6
    val_size = 0.2
    test_size = 0.2

    sequence_length = 25
    batch_size = 256
    num_epochs = 100
    lr = 1e-4
    weight_decay = 0
    vars = None #['Nedbør Nilsebu']

    ld = load_data(data_dir = data_dir, target_variable = 'Q_Kalltveit')
    
    X, y = ld.create_lagged_matrix(window_size=sequence_length, vars_to_lag=vars)


    X_train, y_train, X_val, y_val, X_test, y_test = ld.split_data(X, y)
    

    train_dataloader = ld.create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
    val_dataloader = ld.create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=True)
    test_dataloader = ld.create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)

    for i, j in train_dataloader:
        print(i.shape)
        break

    # Model inputs
    input_size = X_train.shape[-1]
    hidden_size = 64
    num_layers = 2
    output_size = 1

    net = LSTM(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )

    data_loader = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader,
    }
    
    net.to(mode["device"])

    loss_function = nn.MSELoss().to(mode["device"])
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)

    hist = fit(net, loss_function, optimizer, data_loader, num_epochs, mode)

    print_history(hist)             


In [6]:
data_dir = "./data/"
train_model(data_dir)
#TODO: Problem: Did some changes in load_data.py and training does not work any more? 

test
torch.Size([256, 25, 1])


KeyboardInterrupt: 

In [None]:
ld = load_data(data_dir = data_dir, target_variable = 'Q_Kalltveit')
sequence_length = 25
batch_size = 256
X, y = ld.create_lagged_matrix(window_size=sequence_length)

X_train, y_train, X_val, y_val, X_test, y_test = ld.split_data(X, y)

train_dataloader = ld.create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
val_dataloader = ld.create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=True)
test_dataloader = ld.create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)

for i, j in test_dataloader:
    print(i.shape)
    break

torch.Size([256, 25, 1])
