<a href="https://colab.research.google.com/github/jbsdoki/Squishy_Robots_Quant_Models/blob/main/Squish_Robot_Quant_Model_4_VGN_w_Batch_Pool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Sources:
###Hyperparameter Tuning with Optuna:
https://medium.com/@taeefnajib/hyperparameter-tuning-using-optuna-c46d7b29a3e

https://optuna.org/#code_examples
###Multi-Modal ML Models
https://www.nature.com/articles/s41598-025-14901-4

###Next Models to test:
VideoGasNet:
https://www.sciencedirect.com/science/article/pii/S0360544221017643

GasVit: https://www.sciencedirect.com/science/article/pii/S1568494623011560?via%3Dihub#sec3

In [1]:
pip install optuna #Hyperparameter Optimizer

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/400.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.5.0


In [2]:
import os
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from collections import defaultdict
from collections import Counter

from sklearn.model_selection import train_test_split

import optuna


In [3]:
!unzip -q Final_Dataset.zip

## Print out the structure of the data

In [4]:
# Example of loading the first file
file_path = './Final_Dataset/data/class_0/1237_frame_1004_class_0.npy'
sample_data = np.load(file_path)
print(f"Shape of preprocessed sample data: {sample_data.shape}")
print(f"Data type of preprocessed sample data: {sample_data.dtype}")

# GasVid synthetic processed dataset should be 2 channels, 240x320 in dimension

Shape of preprocessed sample data: (2, 240, 320)
Data type of preprocessed sample data: float32


## Load and preprocess the data

In [5]:
# Assuming the data is in 'Final_Dataset/data' and class folders are named 'class_0' ... 'class_7'
data_dir = 'Final_Dataset/data'
classes = sorted(os.listdir(data_dir))
print(f"Classes: {classes}")

Classes: ['class_0', 'class_1', 'class_2', 'class_3', 'class_4', 'class_5', 'class_6', 'class_7']


In [6]:
file_paths = []
labels = []

# Group by video ID only (not by video+class)
video_to_files = defaultdict(list)
for i, class_name in enumerate(classes):
    class_dir = os.path.join(data_dir, class_name)
    for file_name in os.listdir(class_dir):
        if file_name.endswith('.npy'):
            video_id = file_name.split('_')[0]
            video_to_files[video_id].append((os.path.join(class_dir, file_name), i))

print(f"Number of videos: {len(video_to_files)}")

Number of videos: 28


In [7]:

# Function to load and preprocess a single .npy file
def load_and_preprocess_npy(filepath):
    data = np.load(filepath)
    if data.dtype != np.float32:
        data = data.astype(np.float32)

    return data

# Normalize the data
def normalize_data(data):
    tiny_number = 1e-5
    mean = data.mean()
    std = data.std()

    data = (data - mean) / (std + tiny_number)

    return data

In [8]:
# Split by video IDs
video_ids = list(video_to_files.keys())
train_vids, test_vids = train_test_split(video_ids, test_size=0.2, random_state=42)

train_files, train_labels = [], []
test_files, test_labels = [], []

for vid in train_vids:
    for filepath, label in video_to_files[vid]:
        train_files.append(filepath)
        train_labels.append(label)

for vid in test_vids:
    for filepath, label in video_to_files[vid]:
        test_files.append(filepath)
        test_labels.append(label)

In [9]:
print(f"Training videos: {sorted(train_vids)}")
print(f"Testing videos:  {sorted(test_vids)}")

# Make sure no overlap
overlap = set(train_vids) & set(test_vids)
print(f"Video overlap: {overlap if overlap else 'None'}")

# Make sure classes are distributed
print(f"\nTraining classes: {dict(sorted(Counter(train_labels).items()))}")
print(f"Testing classes:  {dict(sorted(Counter(test_labels).items()))}")

# Make sure both sets have all 8 classes
train_classes = set(train_labels)
test_classes = set(test_labels)
missing_train = set(range(8)) - train_classes
missing_test = set(range(8)) - test_classes

if missing_train:
    print(f"WARNING: Training missing classes {missing_train}")
if missing_test:
    print(f"WARNING: Testing missing classes {missing_test}")
if not missing_train and not missing_test:
    print(f"Both train and test have all 8 classes")

Training videos: ['1237', '1238', '1239', '1240', '1241', '1242', '1467', '1469', '1470', '1471', '1472', '2559', '2560', '2562', '2563', '2566', '2567', '2568', '2569', '2571', '2578', '2579']
Testing videos:  ['1468', '2561', '2564', '2580', '2581', '2583']
Video overlap: None

Training classes: {0: 4249, 1: 4236, 2: 4230, 3: 4247, 4: 4230, 5: 4249, 6: 4226, 7: 4244}
Testing classes:  {0: 1146, 1: 1157, 2: 1163, 3: 1150, 4: 1152, 5: 1162, 6: 1154, 7: 1162}
Both train and test have all 8 classes


## Create a dataset and dataloader

In [10]:


class NpyDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        filepath = self.file_paths[idx]
        label = self.labels[idx]
        data = load_and_preprocess_npy(filepath) # Reuse the functions from the previous step
        data = normalize_data(data)

        if self.transform:
            data = self.transform(data)

        return torch.from_numpy(data), torch.tensor(label)


# # Create instances of datasets
train_dataset = NpyDataset(train_files, train_labels)
test_dataset = NpyDataset(test_files, test_labels)

## Define the CNN model

## Define the Optuna Objective Function

This function will be called by Optuna for each trial. It will:
1. Suggest hyperparameters using the trial object.
2. Build and train the CNN model with the suggested hyperparameters.
3. Evaluate the model on a validation set
4. Return the metric to minimize (loss) or maximize (accuracy).

In [15]:
def objective(trial):

    #############################
    # All Hyperparameters Tested
    #############################
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD', 'Adadelta', 'AdamW'])
    momentum = trial.suggest_float('momentum', 0.0, 0.99) if optimizer_name in ['SGD', 'RMSprop'] else 0.0
    weight_decay = trial.suggest_float('weight_decay', 0.0, 0.01)
    hidden_size = trial.suggest_int('hidden_size', 64, 256)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
    num_epochs = trial.suggest_int('num_epochs', 5, 10)
    fc_drop_rate = trial.suggest_float('fc_drop_rate', 0.2, 0.6)
    cnn_drop_rate = trial.suggest_float('cnn_drop_rate', 0.0, 0.3)

    #####################
    # Define the Model
    #####################
    class VideoGasNet(nn.Module):
        def __init__(self, fc_drop_rate = 0.3, cnn_drop_rate = 0.3):
            super(VideoGasNet, self).__init__()

            self.conv1    = nn.Conv2d(2, 32, kernel_size=3, padding=1)
            self.bn1      = nn.BatchNorm2d(32)
            self.relu1    = nn.ReLU()
            self.pool1    = nn.MaxPool2d(kernel_size=2, stride=2)
            self.dropout1 = nn.Dropout2d(cnn_drop_rate)

            self.conv2    = nn.Conv2d(32, 64, kernel_size=3, padding=1)
            self.bn2      = nn.BatchNorm2d(64)
            self.relu2    = nn.ReLU()
            self.pool2    = nn.MaxPool2d(kernel_size=2, stride=2)
            self.dropout2 = nn.Dropout2d(cnn_drop_rate)

            self.conv3    = nn.Conv2d(64, 128, kernel_size=3, padding=1)
            self.bn3      = nn.BatchNorm2d(128)
            self.relu3    = nn.ReLU()
            self.pool3    = nn.MaxPool2d(kernel_size=2, stride=2)
            self.dropout3 = nn.Dropout2d(cnn_drop_rate)

            # self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
            # self.relu4 = nn.ReLU()
            # self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

            # Calculate flatten size from conv layers from input(240x320)
            # flatten_size = 128 * (240 // 16) * (320 // 16) # 2^4 = 8 use for every conv + relu + pool section

            # Calculate flatten size from conv layers from input(240x320)
            flatten_size = 128 * (240 // 8) * (320 // 8) # 2^3 = 8 use for every conv + relu + pool section
            self.fc1 = nn.Linear(flatten_size, hidden_size)
            self.bn4 = nn.BatchNorm1d(hidden_size)
            self.relu4 = nn.ReLU()
            self.dropout4 = nn.Dropout(fc_drop_rate)
            self.fc2 = nn.Linear(hidden_size, 8)

        def forward(self, x):
            # Convolutional Blocks
            x = self.dropout1(self.pool1(self.relu1(self.bn1(self.conv1(x)))))
            x = self.dropout2(self.pool2(self.relu2(self.bn2(self.conv2(x)))))
            x = self.dropout3(self.pool3(self.relu3(self.bn3(self.conv3(x)))))

            # Fully Connected Blocks (Neural Network)
            x = x.view(x.size(0), -1)  # flatten
            x = self.relu4(self.bn4(self.fc1(x)))
            x = self.dropout4(x)
            x = self.fc2(x)

            return x

    model = VideoGasNet(fc_drop_rate=fc_drop_rate, cnn_drop_rate=cnn_drop_rate)

    ###############################
    # Define optimizer
    ###############################
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_name == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == 'Adadelta':
        optimizer = optim.Adadelta(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimzer_name == "Muon":
        optimizer = optim.Muon(model.parameters(), lr=lr, weight_decay=weight_decay)
    else:
        raise ValueError(f"Unknown optimizer name: {optimizer_name}")

    criterion = nn.CrossEntropyLoss()

    ##########################################
    # Create DataLoaders with trial batch_size
    ##########################################
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    ###############################
    # Train the model
    ###############################
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)


    print(f"\n{'='*70}")
    print(f"Trial {trial.number} | lr={lr:.6f} | optimizer={optimizer_name} | "
          f"batch={batch_size} | hidden={hidden_size}")
    print(f"{'='*70}")


    model.train()
    train_correct = 0
    train_total = 0
    for epoch in range(num_epochs):
        train_correct = 0
        train_total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Calculate training accuracy
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        #Print out training during each epoch
        train_accuracy = train_correct / train_total

        # Print training accuracy for this epoch
        print(f"Epoch [{epoch+1:2d}/{num_epochs} ] Train Acc: {train_accuracy:.4f}")

    #####################
    # Evaluate the model
    #####################
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total

    print(f"Validation Acc: {accuracy:.4f}")
    print(f"{'='*70}\n")

    return accuracy


## Run the Optuna Study

Now we will create an Optuna study and run the optimization process.

In [None]:
# Create a study object and specify the direction of optimization (maximize accuracy)
study = optuna.create_study(direction='maximize',
                             pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5))

# Run the optimization
study.optimize(objective, n_trials = 30)

# Print the best hyperparameters found
print("Best hyperparameters: ", study.best_params)

# Print the best accuracy found
print("Best accuracy: ", study.best_value)

# Plot the visualization
optuna.visualization.plot_param_importances(study).show()

# Run more trials
# study.optimize(objective, n_trials=20)

[I 2025-10-31 20:53:09,601] A new study created in memory with name: no-name-fb1e39db-2b2b-4877-8c4f-e3ab9266eb3d



Trial 0 | lr=0.010904 | optimizer=Adam | batch=64 | hidden=188
Epoch [ 1/9 ] Train Acc: 0.1940
Epoch [ 2/9 ] Train Acc: 0.2704
Epoch [ 3/9 ] Train Acc: 0.3473
Epoch [ 4/9 ] Train Acc: 0.3707
Epoch [ 5/9 ] Train Acc: 0.3848
Epoch [ 6/9 ] Train Acc: 0.3952
Epoch [ 7/9 ] Train Acc: 0.3996
Epoch [ 8/9 ] Train Acc: 0.4064
Epoch [ 9/9 ] Train Acc: 0.4041


[I 2025-10-31 21:36:24,445] Trial 0 finished with value: 0.11886221068570192 and parameters: {'lr': 0.01090414001346956, 'optimizer': 'Adam', 'weight_decay': 0.0035262288800592535, 'hidden_size': 188, 'batch_size': 64, 'num_epochs': 9, 'fc_drop_rate': 0.4781178856570385, 'cnn_drop_rate': 0.27034617733123645}. Best is trial 0 with value: 0.11886221068570192.


Validation Acc: 0.1189


Trial 1 | lr=0.000054 | optimizer=Adadelta | batch=16 | hidden=73
Epoch [ 1/5 ] Train Acc: 0.1434
Epoch [ 2/5 ] Train Acc: 0.1662
Epoch [ 3/5 ] Train Acc: 0.1831
Epoch [ 4/5 ] Train Acc: 0.1964
Epoch [ 5/5 ] Train Acc: 0.2084


[I 2025-10-31 22:00:25,573] Trial 1 finished with value: 0.11615833874107723 and parameters: {'lr': 5.4145460445979695e-05, 'optimizer': 'Adadelta', 'weight_decay': 0.009605348799595192, 'hidden_size': 73, 'batch_size': 16, 'num_epochs': 5, 'fc_drop_rate': 0.45985078294144655, 'cnn_drop_rate': 0.18543232289034708}. Best is trial 0 with value: 0.11886221068570192.


Validation Acc: 0.1162


Trial 2 | lr=0.000057 | optimizer=SGD | batch=128 | hidden=76
Epoch [ 1/7 ] Train Acc: 0.1697
Epoch [ 2/7 ] Train Acc: 0.2345
Epoch [ 3/7 ] Train Acc: 0.2734
Epoch [ 4/7 ] Train Acc: 0.3098
Epoch [ 5/7 ] Train Acc: 0.3410
Epoch [ 6/7 ] Train Acc: 0.3681
Epoch [ 7/7 ] Train Acc: 0.3950


[I 2025-10-31 22:35:00,296] Trial 2 finished with value: 0.17737399956738048 and parameters: {'lr': 5.65965865422462e-05, 'optimizer': 'SGD', 'momentum': 0.7045415440540381, 'weight_decay': 0.002267909254625704, 'hidden_size': 76, 'batch_size': 128, 'num_epochs': 7, 'fc_drop_rate': 0.4620733847679902, 'cnn_drop_rate': 0.1590496973501304}. Best is trial 2 with value: 0.17737399956738048.


Validation Acc: 0.1774


Trial 3 | lr=0.000028 | optimizer=SGD | batch=64 | hidden=166
Epoch [ 1/8 ] Train Acc: 0.1469
Epoch [ 2/8 ] Train Acc: 0.1785
Epoch [ 3/8 ] Train Acc: 0.1946
Epoch [ 4/8 ] Train Acc: 0.2112
Epoch [ 5/8 ] Train Acc: 0.2263
Epoch [ 6/8 ] Train Acc: 0.2410
Epoch [ 7/8 ] Train Acc: 0.2504
Epoch [ 8/8 ] Train Acc: 0.2683


[I 2025-10-31 23:13:31,290] Trial 3 finished with value: 0.12372918018602638 and parameters: {'lr': 2.7794255581077293e-05, 'optimizer': 'SGD', 'momentum': 0.04767685465637174, 'weight_decay': 0.0009668584287904303, 'hidden_size': 166, 'batch_size': 64, 'num_epochs': 8, 'fc_drop_rate': 0.4213510225678576, 'cnn_drop_rate': 0.25498715973921343}. Best is trial 2 with value: 0.17737399956738048.


Validation Acc: 0.1237


Trial 4 | lr=0.000018 | optimizer=SGD | batch=32 | hidden=81
