### Testing Hyper-param tuning with Ray and Tune on a Simple CNN.
https://docs.ray.io/en/latest/tune/examples/tune-pytorch-cifar.html

In [1]:
# Ensure the necessary imports
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from ray import tune
from ray.tune.schedulers import ASHAScheduler

from src.engine import simpleCNN_engine
from src.model_simpleCNN import SimpleCNN
from src.dataset import SignalsDataset
from src.preprocess import Prep_data_for_CNN

In [2]:
import pandas as pd
import numpy as np

In [3]:
windowed_df = Prep_data_for_CNN().get_window_df_from_dir('./data/sample_sub_segmented_data_112.csv',
                                                         prep_for_FMAPextract=False)

In [4]:
X, y, uniq = Prep_data_for_CNN().prep_input_for_CNN(windowed_df)

In [5]:
uniq, np.unique(y)

(array(['Not_driving', 'User1', 'User2'], dtype='<U11'),
 array([0, 1, 2], dtype=int64))

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

In [25]:
# Perform a train-test split
np.random.seed(1)
X_tr, X_test, y_tr, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X_tr, y_tr, test_size=0.2, stratify=y_tr)

class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train), y=y_train), dtype=torch.float32)
class_weights = class_weights.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# normalizing X
mean = X_train.mean(axis=(0, 2), keepdims=True)
std = X_train.std(axis=(0, 2), keepdims=True)

X_train = (X_train - mean) / std
X_valid = (X_valid - mean) / std
X_test = (X_test - mean) / std

In [26]:
# Create datasets for each feature
feature_indices = {'long_acc': 0, 'tranv_acc': 1, 'ang_vel': 2}

train_datasets = {feature: SignalsDataset(X_train, y_train, idx) for feature, idx in feature_indices.items()}
valid_datasets = {feature: SignalsDataset(X_valid, y_valid, idx) for feature, idx in feature_indices.items()}
test_datasets = {feature: SignalsDataset(X_test, y_test, idx) for feature, idx in feature_indices.items()}

## dataset for extracting feature maps from data
all_dataset = {feature: SignalsDataset((X-mean)/std, y, idx) for feature, idx in feature_indices.items()}

In [27]:
np.random.seed(1)

batch_size = 8 
train_dl_long = DataLoader(train_datasets['long_acc'], batch_size, shuffle=True)
train_dl_tranv = DataLoader(train_datasets['tranv_acc'], batch_size, shuffle=True)
train_dl_angvel = DataLoader(train_datasets['ang_vel'], batch_size, shuffle=True)

valid_dl_long = DataLoader(valid_datasets['long_acc'], batch_size, shuffle=False)
valid_dl_tranv = DataLoader(valid_datasets['tranv_acc'], batch_size, shuffle=False)
valid_dl_angvel = DataLoader(valid_datasets['ang_vel'], batch_size, shuffle=False)

test_dl_long = DataLoader(test_datasets['long_acc'], batch_size, shuffle=False)
test_dl_tranv = DataLoader(test_datasets['tranv_acc'], batch_size, shuffle=False)
test_dl_angvel = DataLoader(test_datasets['ang_vel'], batch_size, shuffle=False)

Define Hyperparameter search space

In [28]:
# Enhanced Hyperparameter search space
config = {
    "optimizer": tune.choice(["adam", "sgd"]),
    "lr": tune.loguniform(1e-5, 1e-1),
    "scheduler": tune.choice(["step", "exp", "cos"]),
    "step_size": tune.choice([10, 20, 30, 40, 50]),
    "gamma": tune.uniform(0.1, 0.9),
    "weight_decay": tune.choice([0.0, 1e-4, 1e-3, 1e-2]),
    "epochs": tune.choice([10, 20, 30]),
    "batch_size": tune.choice([8, 16, 32, 64])
}

Define a training function that integrates with Ray Tune

In [29]:
import ray
import tempfile
from ray import train, tune
from ray.train import Checkpoint
from ray.tune.schedulers import ASHAScheduler

In [30]:
# Define a training function that integrates with Ray Tune
def train_simpl_model(config, train_datasets, valid_datasets):
      device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
      model = SimpleCNN().to(device)

      if config["optimizer"] == "adam":
            optimizer = optim.Adam(model.parameters(), lr=config["lr"])
      elif config["optimizer"] == "sgd":
            optimizer = optim.SGD(model.parameters(), lr=config["lr"])

      if config["scheduler"] == "step":
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config["step_size"], gamma=config["gamma"])
      elif config["scheduler"] == "exp":
            scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=config["gamma"])
      elif config["scheduler"] == "cos":
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config["epochs"])

      # Load existing checkpoint through `get_checkpoint()` API.
      if train.get_checkpoint():
            loaded_checkpoint = train.get_checkpoint()
            with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
                  model_state, optimizer_state, scheduler_state = torch.load(
                  os.path.join(loaded_checkpoint_dir, "checkpoint.pt")
                  )
                  model.load_state_dict(model_state)
                  optimizer.load_state_dict(optimizer_state)
                  scheduler.load_state_dict(scheduler_state)
      
      train_loader = DataLoader(train_datasets, batch_size=int(config["batch_size"]), 
                                shuffle=True, num_workers=4)
      val_loader = DataLoader(valid_datasets, batch_size=int(config["batch_size"]), 
                                shuffle=True, num_workers=4)
      
      criterion = nn.CrossEntropyLoss(weight=class_weights)
      engine = simpleCNN_engine(model, optimizer, scheduler, criterion, device)
      
      for epoch in range(config['epochs']):
            train_loss, train_acc = engine.train(train_loader)
            val_loss, val_acc, _ = engine.validate(val_loader)

            
            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
                  path = os.path.join(temp_checkpoint_dir, "checkpoint.pt")
                  torch.save(
                  (model.state_dict(), optimizer.state_dict(), scheduler.state_dict()), path
                  )
                  checkpoint = Checkpoint.from_directory(temp_checkpoint_dir)
                  train.report(
                  {"loss": val_loss, "accuracy": val_acc},
                  checkpoint=checkpoint,
                  )
            
            engine.scheduler.step()

      print('Finished Training')

In [31]:
def test_simpl_model(best_result, test_dataset):
      device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
      best_trained_model = SimpleCNN().to(device)

      checkpoint_path = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")

      if best_result.config["optimizer"] == "adam":
            optimizer = optim.Adam(best_trained_model.parameters(), lr=best_result.config["lr"])
      elif best_result.config["optimizer"] == "sgd":
            optimizer = optim.SGD(best_trained_model.parameters(), lr=best_result.config["lr"])

      if best_result.config["scheduler"] == "step":
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=best_result.config["step_size"], gamma=config["gamma"])
      elif best_result.config["scheduler"] == "exp":
            scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=best_result.config["gamma"])
      elif best_result.config["scheduler"] == "cos":
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=best_result.config["epochs"])

      model_state, optimizer_state, scheduler_state = torch.load(checkpoint_path)
      best_trained_model.load_state_dict(model_state)


      test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)
      
      criterion = nn.CrossEntropyLoss(weight=class_weights)
      engine = simpleCNN_engine(best_trained_model, optimizer, scheduler, criterion, device)
      
      test_acc = engine.test(test_loader)

In [32]:
def custom_trial_dirname_creator(trial):
    '''A  function that generates shorter directory names for the trials'''
    return f"trial_{trial.trial_id}"

In [33]:
def main(config, train_datasets, valid_datasets, test_dataset, num_samples=10, max_num_epochs=10, gpus_per_trial=1):
      def custom_trial_dirname_creator(trial):
            '''A  function that generates shorter directory names for the trials'''
            return f"trial_{trial.trial_id}"

      scheduler = ASHAScheduler(
            max_t=max_num_epochs,
            grace_period=1,
            reduction_factor=2)
      #train_simpl_model(config, train_datasets, valid_datasets)
      tuner = tune.Tuner(
            tune.with_resources(
                  tune.with_parameters(train_simpl_model, 
                                       train_datasets=train_datasets,
                                       valid_datasets=valid_datasets),
                  resources={"cpu": 2, "gpu": gpus_per_trial}
            ),
            tune_config=tune.TuneConfig(
                  metric="loss",
                  mode="min",
                  scheduler=scheduler,
                  num_samples=num_samples,
                  trial_dirname_creator=custom_trial_dirname_creator
            ),
            param_space=config
      )
      results = tuner.fit()
      
      best_result = results.get_best_result("loss", "min")

      print("Best trial config: {}".format(best_result.config))
      print("Best trial final validation loss: {}".format(
            best_result.metrics["loss"]))
      print("Best trial final validation accuracy: {}".format(
            best_result.metrics["accuracy"]))

      test_simpl_model(best_result, test_dataset)

In [34]:
from IPython.display import display, HTML

# Load custom CSS file
css = HTML('<link rel="stylesheet" type="text/css" href="./custom.css">')  # Ensure the path is correct
display(css)

In [35]:
main(config, train_datasets['long_acc'], valid_datasets['long_acc'], test_datasets['long_acc'], num_samples=10, max_num_epochs=10, gpus_per_trial=1)

0,1
Current time:,2024-07-22 00:33:11
Running for:,00:21:42.94
Memory:,11.3/13.9 GiB

Trial name,status,loc,batch_size,epochs,gamma,lr,optimizer,scheduler,step_size,weight_decay,iter,total time (s),loss,accuracy
train_simpl_model_8f736_00000,TERMINATED,127.0.0.1:10112,8,10,0.341866,0.0974404,sgd,step,20,0.01,10,477.823,1.08482,73.913
train_simpl_model_8f736_00001,TERMINATED,127.0.0.1:21432,32,20,0.531053,0.000241125,sgd,cos,20,0.001,1,46.6047,1.10245,13.0435
train_simpl_model_8f736_00002,TERMINATED,127.0.0.1:22076,16,20,0.83117,0.0325439,adam,exp,30,0.01,8,376.591,1.07869,78.2609
train_simpl_model_8f736_00003,TERMINATED,127.0.0.1:29936,16,30,0.742206,0.0129884,adam,exp,20,0.0001,1,47.9842,1.10018,8.69565
train_simpl_model_8f736_00004,TERMINATED,127.0.0.1:6060,32,10,0.235864,0.0207294,adam,step,40,0.0001,1,47.7005,1.56344,13.0435
train_simpl_model_8f736_00005,TERMINATED,127.0.0.1:23040,32,10,0.653502,0.0678513,adam,exp,40,0.0001,1,49.4842,8.7399,39.1304
train_simpl_model_8f736_00006,TERMINATED,127.0.0.1:10056,64,30,0.698533,1.36989e-05,sgd,cos,10,0.01,1,45.76,1.10952,23.1884
train_simpl_model_8f736_00007,TERMINATED,127.0.0.1:22208,8,20,0.334891,0.000618834,sgd,exp,40,0.01,1,46.9355,1.1075,78.2609
train_simpl_model_8f736_00008,TERMINATED,127.0.0.1:31668,64,20,0.312437,0.00519209,sgd,step,10,0.01,1,47.9172,1.22699,8.69565
train_simpl_model_8f736_00009,TERMINATED,127.0.0.1:21100,16,20,0.526759,0.000110641,adam,step,10,0.0001,1,47.3713,1.10961,57.971


2024-07-22 00:33:11,754	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/LEGION/ray_results/train_simpl_model_2024-07-22_00-11-28' in 0.0153s.
2024-07-22 00:33:11,769	INFO tune.py:1041 -- Total run time: 1302.97 seconds (1302.92 seconds for the tuning loop).


Best trial config: {'optimizer': 'adam', 'lr': 0.03254391115088484, 'scheduler': 'exp', 'step_size': 30, 'gamma': 0.8311696196633863, 'weight_decay': 0.01, 'epochs': 20, 'batch_size': 16}
Best trial final validation loss: 1.0786944150924682
Best trial final validation accuracy: 78.26086956521739
Test Accuracy: 0.7906976744186046
