In [19]:
import config.load_data as load_data
from models import model

from tqdm import tqdm
import os.path
import sys
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import mutual_info_regression
from sklearn.model_selection import train_test_split

from ray import tune
from ray.air.integrations.mlflow import MLflowLoggerCallback
from ray.tune.schedulers import ASHAScheduler

import mlflow
from mlflow.tracking import MlflowClient

#### Creating Mlflow experiment

In [20]:
client = MlflowClient()
cwd = os.getcwd()
exp_base_name = "Univariate_experiment"

created = 0
for i in range(100):
    try:
        exp_name = exp_base_name+"_{}".format(i)
        experiment_id = client.create_experiment(exp_name)
        created=1
        break
    except (TypeError, mlflow.exceptions.MlflowException):
        continue

if not created:
    print("ERROR: Try new experiment name.")
    sys.exit(1)

weights_root = "./model_weights/"
weights_dir = weights_root+exp_name+'/'
os.mkdir(weights_dir)

#### Data preparation

In [21]:
def split_data(data, shuffle_train=False, train_size=0.7, test_size=0.3):
    train, temp = train_test_split(data, train_size=train_size, shuffle=shuffle_train)
    val, test = train_test_split(temp, test_size=test_size, shuffle=False)
    return train, val, test

def sliding_windows(data, seq_length):
    x = []
    y = []
    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)
    return np.array(x).reshape(len(x), -1), np.array(y)


In [22]:
ld = load_data()
    
data = ld.get_univariate_data()
train, val, test = split_data(data)

In [23]:
def get_lagged_data(sequence_length, mi):
    X_train, y_train = sliding_windows(train.values, sequence_length)
    X_val, y_val = sliding_windows(val.values, sequence_length)
    X_test, y_test = sliding_windows(test.values, sequence_length)

    if mi:
        
        X = pd.DataFrame(X_train)
        y = pd.DataFrame(y_train)

        discrete_features = X.dtypes == int
        mi_scores = mutual_info_regression(X, y, discrete_features=discrete_features)
        mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)
        mi_scores = mi_scores.sort_values(ascending=False)
        selected_dimention = mi_scores[mi_scores.values >= 2]

        X_train = X_train[:, selected_dimention.index]
        X_val = X_val[:, selected_dimention.index]
        X_test = X_test[:, selected_dimention.index]

    X_train, y_train = torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float()
    X_val, y_val = torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float()
    X_test, y_test = torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float()

    return torch.utils.data.TensorDataset(X_train, y_train), torch.utils.data.TensorDataset(X_val, y_val), torch.utils.data.TensorDataset(X_test, y_test)


#### Training and testing method

In [24]:
def fit(net, loss_function, optimizer, data_loader, num_epochs, mode, use_amp=False):
	history = {"train": {"loss": [], "mae": []}, "val": {"loss": [], "mae": []}}
	scaler = torch.cuda.amp.GradScaler(enabled=use_amp) # Mixed-precision support for compatible GPUs
	print("\nTraining the model:")
	for epoch in range(num_epochs):
		print("\nEpoch", epoch+1)
		if epoch < num_epochs - 1:
			keys = ["train", "val"]
		else:
			keys = ["train", "val", "test"]
		for key in keys:
			dataset_size = 0
			dataset_loss = 0.0
			if key == "train":
				net.train()
			else:
				net.eval()
			for X_batch, y_batch in tqdm(data_loader[key]):
				X_batch, y_batch = X_batch.to(mode["device"]), y_batch.to(mode["device"])
				with torch.set_grad_enabled(mode=(key=="train")): # Autograd activated only during training
					with torch.cuda.amp.autocast(enabled=use_amp): # Mixed-precision support for compatible GPUs
						batch_output = net(X_batch.float())
						batch_loss = loss_function(batch_output, y_batch)
					if key == "train":
						scaler.scale(batch_loss).backward()
						scaler.step(optimizer) 	
						scaler.update()
						optimizer.zero_grad()
				dataset_size += y_batch.shape[0]
				dataset_loss += y_batch.shape[0] * batch_loss.item()
			dataset_loss /= dataset_size
			if key in ["train", "val"]:
				history[key]["loss"].append(dataset_loss)
				if key == "train":
					tune.report(train_loss=dataset_loss)
				else:
					tune.report(val_loss=dataset_loss)
			else:
				print("\nEvaluating the model:")
				print(key, "loss:", dataset_loss)
				tune.report(test_loss=dataset_loss)
	return net

In [25]:
def train_model(config):
    use_GPU = torch.cuda.is_available()
    if use_GPU:
        mode = {"name": "cuda", "device": torch.device("cuda")}
    else:
        mode = {"name": "cpu", "device": torch.device("cpu")}

    num_epochs = 50
    batch_size = 512 #config['batch_size']
    
    lr = config['lr']
    
    sequence_length = 6
    in_dim = 72
    mi = 0

    train_, val_, test_ = get_lagged_data(in_dim, mi)

    
    lstm_in_dim = 12
    lstm_hidden_dim = 64 #config['hidden_dim']
    out_dim = 1

    if config['arch'] == "FCN":
        net = model.FCN(in_dim,
                        sequence_length,
                        lstm_in_dim,
                        lstm_hidden_dim,
                        out_dim,
                        mode,)
    elif config['arch'] == "LSTM":
        net = model.LSTM(in_dim,
                        sequence_length,
                        lstm_in_dim,
                        lstm_hidden_dim,
                        out_dim,
                        mode,)
    elif config['arch'] == "TA_LSTM":
        net = model.TA_LSTM(in_dim,
                        sequence_length,
                        lstm_in_dim,
                        lstm_hidden_dim,
                        out_dim,
                        mode,) 

    train_dataloader = torch.utils.data.DataLoader(train_,
                                           batch_size = batch_size,
                                           shuffle = True)
    val_dataloader = torch.utils.data.DataLoader(val_,
                                            batch_size = batch_size,
                                            shuffle = False)
    test_dataloader = torch.utils.data.DataLoader(test_,
                                            batch_size = batch_size,
                                            shuffle = False)

    data_loader = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader,
    }
    
    net.to(mode["device"])

    loss_function = nn.MSELoss().to(mode["device"])
    optimizer = optim.Adam(net.parameters(), lr=lr)
                                           
    best_trained_model = fit(net, loss_function, optimizer, data_loader, num_epochs, mode)
    out_name = ""
    for k, v in config.items():
        if not k in ['weights_dir', 'cwd']:
            out_name += '{}-{}_'.format(k, v)
    torch.save(best_trained_model.state_dict(), os.path.join(config['cwd'], config['weights_dir'], out_name[:-1] + '.pth'))


#### Hyperparameter tuning with ray tuning

In [26]:
import os
os.environ['TUNE_DISABLE_STRICT_METRIC_CHECKING'] = "1"

In [27]:
config = {
    "mlflow_experiment_id": experiment_id,
    "weights_dir": weights_dir,
    "cwd": cwd,
    "lr": tune.loguniform(1e-4, 1e-1),
    #"batch_size": tune.choice([128*2, 128*3, 128*4]),
    #"mi": tune.grid_search(['True', 'False']),
    #"in_dim": tune.grid_search([24, 24*2, 24*3]),
    "arch": tune.grid_search(["FCN", "LSTM", "TA_LSTM"]),
    #"hidden_dim": tune.choice([32, 64, 128])
}
"""scheduler = ASHAScheduler(
        metric='val_loss',
        mode="min",
        max_t=100,
        grace_period=1,
        reduction_factor=2,
)"""

analysis = tune.run(
    train_model,
    config=config,
    resources_per_trial={"cpu": 12, "gpu": 1},
    num_samples=3,
    # scheduler=scheduler,
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name)],
)

0,1
Current time:,2023-02-16 18:54:53
Running for:,00:07:33.13
Memory:,13.5/31.9 GiB

Trial name,status,loc,arch,lr,iter,total time (s),test_loss
train_model_f650c_00000,TERMINATED,127.0.0.1:35396,FCN,0.0495508,201,35.4572,1.75788
train_model_f650c_00001,TERMINATED,127.0.0.1:7876,LSTM,0.00243087,201,47.4349,0.313283
train_model_f650c_00002,TERMINATED,127.0.0.1:3044,TA_LSTM,0.000197517,201,55.2875,6.96543
train_model_f650c_00003,TERMINATED,127.0.0.1:37416,FCN,0.000333819,201,35.7544,6.60987
train_model_f650c_00004,TERMINATED,127.0.0.1:30500,LSTM,0.0686043,201,47.411,4.78717
train_model_f650c_00005,TERMINATED,127.0.0.1:27772,TA_LSTM,0.0006966,201,55.9771,1.06579
train_model_f650c_00006,TERMINATED,127.0.0.1:27276,FCN,0.00515184,201,36.722,0.502742
train_model_f650c_00007,TERMINATED,127.0.0.1:31440,LSTM,0.0828753,201,47.9711,16.0248
train_model_f650c_00008,TERMINATED,127.0.0.1:30044,TA_LSTM,0.000977645,201,57.3268,0.637691


2023-02-16 18:54:54,060	INFO tune.py:763 -- Total run time: 453.29 seconds (453.11 seconds for the tuning loop).
