In [1]:
import os
import sys

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import new_models

from ray import tune
from ray.air.integrations.mlflow import MLflowLoggerCallback
from ray.tune.schedulers import ASHAScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

import mlflow
from mlflow.tracking import MlflowClient

In [2]:
# Seed
seed = 123
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [3]:
client = MlflowClient()
cwd = os.getcwd()
exp_base_name = "Feature_extraction"

created = 0
for i in range(100):
    try:
        exp_name = exp_base_name+"_{}".format(i)
        experiment_id = client.create_experiment(exp_name)
        created=1
        break
    except (TypeError, mlflow.exceptions.MlflowException):
        continue

if not created:
    print("ERROR: Try new experiment name.")
    sys.exit(1)

weights_root = "./model_weights/"
weights_dir = weights_root+exp_name+'/'
os.mkdir(weights_dir)

In [4]:
def fit(net, loss_function, optimizer, data_loader, num_epochs, mode, use_amp=False):
	history = {"train": {"loss": [], "mae": []}, "val": {"loss": [], "mae": []}}
	scaler = torch.cuda.amp.GradScaler(enabled=use_amp) # Mixed-precision support for compatible GPUs
	print("\nTraining the model:")
	for epoch in range(num_epochs):
		print("\nEpoch", epoch+1)
		if epoch < num_epochs - 1:
			keys = ["train", "val"]
		else:
			keys = ["train", "val", "test"]
		for key in keys:
			dataset_size = 0
			dataset_loss = 0.0
			if key == "train":
				net.train()
			else:
				net.eval()
			for X_batch, y_batch in tqdm(data_loader[key]):
				X_batch, y_batch = X_batch.to(mode["device"]), y_batch.to(mode["device"])
				with torch.set_grad_enabled(mode=(key=="train")): # Autograd activated only during training
					with torch.cuda.amp.autocast(enabled=use_amp): # Mixed-precision support for compatible GPUs
						batch_output = net(X_batch.float())
						batch_loss = loss_function(batch_output, y_batch)
					if key == "train":
						scaler.scale(batch_loss).backward()
						scaler.step(optimizer) 	
						scaler.update()
						optimizer.zero_grad()
				dataset_size += y_batch.shape[0]
				dataset_loss += y_batch.shape[0] * batch_loss.item()
			dataset_loss /= dataset_size
			if key in ["train", "val"]:
				history[key]["loss"].append(dataset_loss)
				if key == "train":
					tune.report(train_loss=dataset_loss)
				else:
					tune.report(val_loss=dataset_loss)
			else:
				print("\nEvaluating the model:")
				print(key, "loss:", dataset_loss)
				tune.report(test_loss=dataset_loss)
	return net

In [5]:
from sklearn.model_selection import train_test_split

def create_lagged_matrix(data, window_size):
    # Create a copy of the data with shifted values
    df = data.copy()
    for i in range(1, window_size + 1):
        df[f'lag_{i}'] = df['Q_Kalltveit'].shift(i)
    # Drop rows with missing values
    df.dropna(inplace=True)
    # Create feature variables
    X = df.drop('Q_Kalltveit', axis=1)
    y = df['Q_Kalltveit']
    return X, y

def split_data(X, y, train_size=.6, val_size=.2, test_size=.2):
    # Split the dataset into training, validation, and test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=test_size, shuffle=False, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=val_size/(train_size+val_size), shuffle=False, random_state=seed)
    return X_train, y_train, X_val, y_val, X_test, y_test

def rolling_statistics(data):
    df = data.copy()
    # calculate rolling means and standard deviations over 24-hour, 7-day, and 30-day windows
    df['rolling_means_24h'] = df['Q_Kalltveit'].rolling(24).mean().values
    df['rolling_means_7d'] = df['Q_Kalltveit'].rolling(24*7).mean().values
    df['rolling_means_30d'] = df['Q_Kalltveit'].rolling(24*30).mean().values
    df['rolling_stds_24h'] = df['Q_Kalltveit'].rolling(24).std().values
    df['rolling_stds_7d'] = df['Q_Kalltveit'].rolling(24*7).std().values
    df['rolling_stds_30d'] = df['Q_Kalltveit'].rolling(24*30).std().values
    # Drop rows with missing values
    df.dropna(inplace=True)
    #df = df.drop('Q_Kalltveit', axis=1)
    return df

In [6]:
def get_lagged_matrix(data, seq_len):
    lagged_matrix, target = create_lagged_matrix(data, seq_len)
    X_train, y_train, X_val, y_val, X_test, y_test = split_data(lagged_matrix, target)
    X_train = torch.tensor(np.array(X_train)).float()
    y_train = torch.tensor(np.array(y_train)).float()
    X_val = torch.tensor(np.array(X_val)).float()
    y_val = torch.tensor(np.array(y_val)).float()
    X_test = torch.tensor(np.array(X_test)).float()
    y_test = torch.tensor(np.array(y_test)).float()
    # Reshape to wanted shape (when working with univariate data (N, seq_len, input_size))
    X_train = X_train.unsqueeze(-1)
    X_val = X_val.unsqueeze(-1)
    X_test = X_test.unsqueeze(-1)
    return X_train, y_train, X_val, y_val, X_test, y_test

def get_feature_extracted_matrix(data, seq_len):
    lagged_matrix, _ = create_lagged_matrix(data, seq_len)
    feature_matrix = rolling_statistics(data)
    new_matrix = pd.merge(lagged_matrix, feature_matrix, left_index=True, right_index=True)

    target = new_matrix['Q_Kalltveit']
    new_matrix = new_matrix.drop('Q_Kalltveit', axis=1)

    X_train, y_train, X_val, y_val, X_test, y_test = split_data(new_matrix, target)
    X_train = torch.tensor(np.array(X_train)).float()
    y_train = torch.tensor(np.array(y_train)).float()
    X_val = torch.tensor(np.array(X_val)).float()
    y_val = torch.tensor(np.array(y_val)).float()
    X_test = torch.tensor(np.array(X_test)).float()
    y_test = torch.tensor(np.array(y_test)).float()
    return X_train, y_train, X_val, y_val, X_test, y_test

In [7]:
def create_dataloader(X, y, sequence_length, batch_size, shuffle):
    if X.shape[-1] != 1:
        # reshape X_train into a 3D tensor with dimensions (number of sequences, sequence length, number of features)
        num_sequences = X.shape[0]
        num_features = X.shape[1]
        X_train_3d = np.zeros((num_sequences, sequence_length, num_features))
        for i in range(sequence_length, num_sequences):
            X_train_3d[i] = X[i-sequence_length:i, :]
        X_train_3d = X_train_3d.astype(np.float32)
        X = X_train_3d.copy()
        X = torch.tensor(X)

    # create a PyTorch dataset and dataloader
    dataset = TensorDataset(X, y)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return dataloader

In [8]:
# load the CSV file into a pandas dataframe
data = pd.read_csv('./data/clean_data/univariate/Q_Kalltveit/Q_Kalltveit.csv', index_col='Datetime')

In [9]:
sequence_length = 25
batch_size = 256

In [10]:
X_train, y_train, X_val, y_val, X_test, y_test = get_lagged_matrix(
                                                    data, 
                                                    seq_len=sequence_length,
                                                    )
                                                    
train_dataloader = create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
val_dataloader = create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=True)
test_dataloader = create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)

In [11]:
for i, j in test_dataloader:
    print(i)
    break

tensor([[[6.0980],
         [6.2147],
         [6.3280],
         ...,
         [9.8355],
         [9.8740],
         [9.7970]],

        [[5.9943],
         [6.0980],
         [6.2147],
         ...,
         [9.6865],
         [9.8355],
         [9.8740]],

        [[5.8748],
         [5.9943],
         [6.0980],
         ...,
         [9.4810],
         [9.6865],
         [9.8355]],

        ...,

        [[8.3181],
         [8.3960],
         [8.5756],
         ...,
         [9.5001],
         [9.2343],
         [9.0038]],

        [[8.3044],
         [8.3181],
         [8.3960],
         ...,
         [9.4050],
         [9.5001],
         [9.2343]],

        [[8.2678],
         [8.3044],
         [8.3181],
         ...,
         [9.0976],
         [9.4050],
         [9.5001]]])


In [12]:
from config import load_data

def train_model(config):

    use_GPU = torch.cuda.is_available()
    if use_GPU:
        mode = {"name": "cuda", "device": torch.device("cuda")}
    else:
        mode = {"name": "cpu", "device": torch.device("cpu")}

    # Define hyperparameters
    train_size = 0.6
    val_size = 0.2
    test_size = 0.2

    sequence_length = config['lags']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    lr = config['lr']
    weight_decay = config['weigth_decay']

    ld = load_data(data_dir = "./data/",
    target_variable = 'Q_Kalltveit')
    X, y = ld.create_lagged_matrix(window_size=sequence_length, vars_to_lag=None)
    X_train, y_train, X_val, y_val, X_test, y_test = ld.split_data(X, y)

    train_dataloader = create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
    val_dataloader = create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=True)
    test_dataloader = create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)
    
    # Model inputs
    input_size = X_train.shape[-1]
    hidden_size = config['hidden_size']
    num_layers = config['num_layers']
    output_size = 1

    if config['arch'] == "FCN":
        net = new_models.FCN(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] ==  "FCNTemporalAttention":
        net = new_models.FCNTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTM":
        net = new_models.LSTM(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMTemporalAttention":
        net = new_models.LSTMTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )

    data_loader = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader,
    }
    
    net.to(mode["device"])

    loss_function = nn.MSELoss().to(mode["device"])
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)

    # Define your learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
                                           
    scaler = torch.cuda.amp.GradScaler(enabled=False) # Mixed-precision support for compatible GPUs
    for epoch in range(num_epochs):
        print("\nEpoch", epoch+1)
        if epoch < num_epochs - 1:
            keys = ["train", "val"]
        else:
            keys = ["train", "val", "test"]
        for key in keys:
            dataset_size = 0
            dataset_loss = 0.0
            if key == "train":
                net.train()
            else:
                net.eval()
            for X_batch, y_batch in tqdm(data_loader[key]):
                X_batch, y_batch = X_batch.to(mode["device"]), y_batch.to(mode["device"])
                with torch.set_grad_enabled(mode=(key=="train")): # Autograd activated only during training
                    with torch.cuda.amp.autocast(enabled=False): # Mixed-precision support for compatible GPUs
                        batch_output = net(X_batch.float())
                        batch_loss = loss_function(batch_output, y_batch)
                    if key == "train":
                        scaler.scale(batch_loss).backward()
                        scaler.step(optimizer) 	
                        scaler.update()
                        optimizer.zero_grad()
                dataset_size += y_batch.shape[0]
                dataset_loss += y_batch.shape[0] * batch_loss.item()
            dataset_loss /= dataset_size
            if key in ["train", "val"]:
                if key == "train":
                    tune.report(train_loss=dataset_loss)
                else:
                    val_loss = dataset_loss
                    lr_scheduler.step(metrics=val_loss)
                    tune.report(val_loss=val_loss)
            else:
                print("\nEvaluating the model:")
                print(key, "loss:", dataset_loss)
                tune.report(test_loss=dataset_loss)

    out_name = ""
    for k, v in config.items():
        if not k in ['weights_dir', 'cwd']:
            out_name += '{}-{}_'.format(k, v)
    torch.save(net.state_dict(), os.path.join(config['cwd'], config['weights_dir'], out_name[:-1] + '.pth'))

In [13]:
config = {
    "mlflow_experiment_id": experiment_id,
    "weights_dir": weights_dir,
    "cwd": cwd,
    'num_epochs': tune.grid_search([150]),
    'num_layers': tune.choice([2, 3, 4]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "weigth_decay": tune.choice([0]),
    "batch_size": tune.choice([256]),
    "lags": tune.grid_search([25]),
    "hidden_size": tune.grid_search([64]),
    "arch": tune.grid_search(["LSTM", "LSTMTemporalAttention"]), # "FCN", "FCNTemporalAttention", 
    #"preprocessing": tune.grid_search(["lagged_matrix", "add_rolling_stats"]) # 
}

analysis = tune.run(
    train_model,
    config=config,
    resources_per_trial={"cpu": 12, "gpu": 1},
    num_samples=3,
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name)],
)

0,1
Current time:,2023-02-22 19:46:08
Running for:,00:00:14.31
Memory:,18.0/31.9 GiB

Trial name,status,loc,arch,batch_size,hidden_size,lags,lr,num_epochs,num_layers,weigth_decay,iter,total time (s),train_loss
train_model_22d56_00000,RUNNING,127.0.0.1:8004,FCN,256,64,25,0.0137678,150,4,0,17.0,10.5898,39.922
train_model_22d56_00001,PENDING,,FCNTemporalAttention,256,64,25,0.000479241,150,4,0,,,
train_model_22d56_00002,PENDING,,LSTM,256,64,25,0.0143698,150,4,0,,,
train_model_22d56_00003,PENDING,,LSTMTemporalAtt_9990,256,64,25,0.00185917,150,4,0,,,
train_model_22d56_00004,PENDING,,FCN,256,64,25,0.00170912,150,4,0,,,
train_model_22d56_00005,PENDING,,FCNTemporalAttention,256,64,25,0.00277202,150,3,0,,,
train_model_22d56_00006,PENDING,,LSTM,256,64,25,0.00159607,150,3,0,,,
train_model_22d56_00007,PENDING,,LSTMTemporalAtt_9990,256,64,25,0.0153868,150,2,0,,,
train_model_22d56_00008,PENDING,,FCN,256,64,25,0.00015102,150,4,0,,,
train_model_22d56_00009,PENDING,,FCNTemporalAttention,256,64,25,0.00604933,150,2,0,,,
