### Testing Hyper-param tuning with Ray and Tune on a Simple CNN.
https://docs.ray.io/en/latest/tune/examples/tune-pytorch-cifar.html

In [1]:
# Ensure the necessary imports
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from ray import tune, train
from ray.tune.schedulers import ASHAScheduler

from src.engine import simpleCNN_engine
from src.model_simpleCNN import SimpleCNN
from src.dataset import SignalsDataset
from src.preprocess import Prep_data_for_CNN
from src.hyperparam import RayTuning

In [2]:
import pandas as pd
import numpy as np

In [3]:
windowed_df = Prep_data_for_CNN().get_window_df_from_dir('./data/sample_sub_segmented_data_112.csv',
                                                         prep_for_FMAPextract=False)

In [4]:
X, y, uniq = Prep_data_for_CNN().prep_input_for_CNN(windowed_df)

In [5]:
uniq, np.unique(y)

(array(['Not_driving', 'User1', 'User2'], dtype='<U11'),
 array([0, 1, 2], dtype=int64))

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

In [7]:
# Perform a train-test split
np.random.seed(1)
X_tr, X_test, y_tr, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X_tr, y_tr, test_size=0.2, stratify=y_tr)

class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train), y=y_train), dtype=torch.float32)
class_weights = class_weights.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# normalizing X
mean = X_train.mean(axis=(0, 2), keepdims=True)
std = X_train.std(axis=(0, 2), keepdims=True)

X_train = (X_train - mean) / std
X_valid = (X_valid - mean) / std
X_test = (X_test - mean) / std

In [8]:
# Create datasets for each feature
feature_indices = {'long_acc': 0, 'tranv_acc': 1, 'ang_vel': 2}

train_datasets = {feature: SignalsDataset(X_train, y_train, idx) for feature, idx in feature_indices.items()}
valid_datasets = {feature: SignalsDataset(X_valid, y_valid, idx) for feature, idx in feature_indices.items()}
test_datasets = {feature: SignalsDataset(X_test, y_test, idx) for feature, idx in feature_indices.items()}

## dataset for extracting feature maps from data
all_dataset = {feature: SignalsDataset((X-mean)/std, y, idx) for feature, idx in feature_indices.items()}

In [9]:
np.random.seed(1)

batch_size = 8 
train_dl_long = DataLoader(train_datasets['long_acc'], batch_size, shuffle=True)
train_dl_tranv = DataLoader(train_datasets['tranv_acc'], batch_size, shuffle=True)
train_dl_angvel = DataLoader(train_datasets['ang_vel'], batch_size, shuffle=True)

valid_dl_long = DataLoader(valid_datasets['long_acc'], batch_size, shuffle=False)
valid_dl_tranv = DataLoader(valid_datasets['tranv_acc'], batch_size, shuffle=False)
valid_dl_angvel = DataLoader(valid_datasets['ang_vel'], batch_size, shuffle=False)

test_dl_long = DataLoader(test_datasets['long_acc'], batch_size, shuffle=False)
test_dl_tranv = DataLoader(test_datasets['tranv_acc'], batch_size, shuffle=False)
test_dl_angvel = DataLoader(test_datasets['ang_vel'], batch_size, shuffle=False)

Define Hyperparameter search space

In [10]:
# Enhanced Hyperparameter search space
config = {
    "optimizer": tune.choice(["adam", "sgd"]),
    "lr": tune.loguniform(1e-5, 1e-1),
    "scheduler": tune.choice(["step", "exp", "cos"]),
    "step_size": tune.choice([10, 20, 30, 40, 50]),
    "gamma": tune.uniform(0.1, 0.9),
    "weight_decay": tune.choice([0.0, 1e-4, 1e-3, 1e-2]),
    "epochs": tune.choice([10, 20, 30]),
    "batch_size": tune.choice([8, 16, 32, 64])
}

criterion = nn.CrossEntropyLoss(weight=class_weights)
save_dir = 'test'
model = SimpleCNN
modelType = 'SimpleCNN'
engine = simpleCNN_engine

Define a training function that integrates with Ray Tune

In [11]:
mytuner = RayTuning(config, save_dir, criterion, model, modelType, engine)

In [12]:
from IPython.display import display, HTML

# Load custom CSS file
css = HTML('<link rel="stylesheet" type="text/css" href="./custom.css">')  # Ensure the path is correct
display(css)

In [13]:
mytuner.main(train_datasets['long_acc'], valid_datasets['long_acc'], test_datasets['long_acc'])

0,1
Current time:,2024-07-22 17:27:10
Running for:,00:22:24.05
Memory:,9.3/13.9 GiB

Trial name,status,loc,batch_size,epochs,gamma,lr,optimizer,scheduler,step_size,weight_decay,iter,total time (s),loss,accuracy
train_model_1d5b1_00000,TERMINATED,127.0.0.1:12104,8,10,0.341866,0.0974404,sgd,step,20,0.01,10,540.055,1.11497,78.2609
train_model_1d5b1_00001,TERMINATED,127.0.0.1:13072,32,20,0.531053,0.000241125,sgd,cos,20,0.001,1,57.2564,1.10892,20.2899
train_model_1d5b1_00002,TERMINATED,127.0.0.1:8280,16,20,0.83117,0.0325439,adam,exp,30,0.01,1,57.2614,1.10047,14.4928
train_model_1d5b1_00003,TERMINATED,127.0.0.1:13780,16,30,0.742206,0.0129884,adam,exp,20,0.0001,1,55.4,1.19581,40.5797
train_model_1d5b1_00004,TERMINATED,127.0.0.1:26460,32,10,0.235864,0.0207294,adam,step,40,0.0001,2,111.021,1.09778,13.0435
train_model_1d5b1_00005,TERMINATED,127.0.0.1:15256,32,10,0.653502,0.0678513,adam,exp,40,0.0001,1,55.0703,60.9487,20.2899
train_model_1d5b1_00006,TERMINATED,127.0.0.1:2032,64,30,0.698533,1.36989e-05,sgd,cos,10,0.01,2,110.884,1.0962,44.9275
train_model_1d5b1_00007,TERMINATED,127.0.0.1:23236,8,20,0.334891,0.000618834,sgd,exp,40,0.01,2,111.245,1.1025,66.6667
train_model_1d5b1_00008,TERMINATED,127.0.0.1:29476,64,20,0.312437,0.00519209,sgd,step,10,0.01,2,112.432,1.24011,14.4928
train_model_1d5b1_00009,TERMINATED,127.0.0.1:2068,16,20,0.526759,0.000110641,adam,step,10,0.0001,1,56.5806,1.17898,42.029


[36m(train_model pid=12104)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=c:/Users/LEGION/Documents/Msc Data Science - Uni of Exeter/ECMM451 - Data Science Research Project (2023)/workspace/ray_results/test/trial_1d5b1_00000/checkpoint_000000)
[36m(train_model pid=12104)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=c:/Users/LEGION/Documents/Msc Data Science - Uni of Exeter/ECMM451 - Data Science Research Project (2023)/workspace/ray_results/test/trial_1d5b1_00000/checkpoint_000001)
[36m(train_model pid=12104)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=c:/Users/LEGION/Documents/Msc Data Science - Uni of Exeter/ECMM451 - Data Science Research Project (2023)/workspace/ray_results/test/trial_1d5b1_00000/checkpoint_000002)
[36m(train_model pid=12104)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=c:/Users/LEGION/Documents/Msc Data Science - Uni of Exeter/ECMM451 - Data Scie

Best trial config: {'optimizer': 'sgd', 'lr': 1.3698885785105936e-05, 'scheduler': 'cos', 'step_size': 10, 'gamma': 0.6985325235038715, 'weight_decay': 0.01, 'epochs': 30, 'batch_size': 64}
Best trial final validation loss: 1.0962030291557312
Best trial final validation accuracy: 44.9275%
Test Accuracy: 0.4418604651162791
Best trial test set accuracy: 0.4419%


Go with adam, exp, batchsize=32


In [14]:
# Enhanced Hyperparameter search space
config = {
    "optimizer": tune.choice(["adam"]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "scheduler": tune.choice(["exp"]),
    #"step_size": tune.choice([10, 20, 30, 40, 50]),
    "gamma": tune.uniform(0.1, 0.9),
    #"weight_decay": tune.choice([0.0, 1e-4, 1e-3, 1e-2]),
    "epochs": tune.choice([30]),
    "batch_size": tune.choice([16])
}

In [2]:
1e-4

0.0001

In [15]:
mytuner = RayTuning(config, save_dir, criterion, model, modelType, engine)

In [16]:
mytuner.main(train_datasets['long_acc'], valid_datasets['long_acc'], test_datasets['long_acc'])

0,1
Current time:,2024-07-22 17:41:47
Running for:,00:02:32.07
Memory:,10.1/13.9 GiB

Trial name,status,loc,batch_size,epochs,gamma,lr,optimizer,scheduler,iter,total time (s),loss,accuracy
train_model_eea09_00000,RUNNING,127.0.0.1:4416,32,30,0.528717,0.000141208,adam,exp,2.0,110.616,1.15031,44.9275
train_model_eea09_00001,PENDING,,32,30,0.855676,0.00350483,adam,exp,,,,
train_model_eea09_00002,PENDING,,32,30,0.20998,0.0513104,adam,exp,,,,
train_model_eea09_00003,PENDING,,32,30,0.418141,0.0264347,adam,exp,,,,
train_model_eea09_00004,PENDING,,32,30,0.378213,0.0606074,adam,exp,,,,
train_model_eea09_00005,PENDING,,32,30,0.806645,0.0150659,adam,exp,,,,
train_model_eea09_00006,PENDING,,32,30,0.379119,0.0178989,adam,exp,,,,
train_model_eea09_00007,PENDING,,32,30,0.442473,0.0487145,adam,exp,,,,
train_model_eea09_00008,PENDING,,32,30,0.597357,0.00977968,adam,exp,,,,
train_model_eea09_00009,PENDING,,32,30,0.45993,0.0705452,adam,exp,,,,


[36m(train_model pid=4416)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=c:/Users/LEGION/Documents/Msc Data Science - Uni of Exeter/ECMM451 - Data Science Research Project (2023)/workspace/ray_results/test/trial_eea09_00000/checkpoint_000000)
[36m(train_model pid=4416)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=c:/Users/LEGION/Documents/Msc Data Science - Uni of Exeter/ECMM451 - Data Science Research Project (2023)/workspace/ray_results/test/trial_eea09_00000/checkpoint_000001)


: 