# Model training with CONNIE image dataset

In [None]:
%run ./notebook_init.py

import os
import torch
import sys

from collections import Counter
from itertools import product
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import random_split

from core import DATA_FOLDER

from scripts.connie_training_utils import ModelTraining, TransformedSubset, \
    Seed, get_test_transform, IMG_SIZE,\
    get_train_transform, calculate_mean_std

Load file paths and set the computation device to GPU if available; otherwise, use CPU, and initialize the random seed

In [None]:
processed_data_folder = os.path.join(DATA_FOLDER, "png_processed_data")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
seed = Seed()

Compute dataset mean and standard deviation, then define training and test transforms with normalization

In [None]:
basic_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor()
])

# Load dataset without transform
full_dataset_transform = datasets.ImageFolder(processed_data_folder, transform=basic_transform)

mean, std = calculate_mean_std(full_dataset_transform)
test_transform = get_test_transform(mean, std)
train_transform = get_train_transform(mean, std)

Split train + validation and test set

In [None]:
full_dataset = datasets.ImageFolder(processed_data_folder)
total_len = len(full_dataset)
trainval_len = int(0.85 * total_len)
test_len = total_len - trainval_len

trainval_set, test_set = random_split(full_dataset, [trainval_len, test_len],
                                      generator=seed.generator())

# Apply eval_transform to test set
test_set = TransformedSubset(test_set, test_transform)


Set all classes other than muons to label 0

In [None]:
class_idx_map = full_dataset.class_to_idx
print("Classes index:  ", class_idx_map)
muon_idx = class_idx_map['Muon']
full_dataset.targets = [1 if target == muon_idx else 0 for target in full_dataset.targets]
print("Label distribution:", Counter(full_dataset.targets))

## Training with K-fold

In [None]:
# Define your parameter grid
param_grid = {
    "learning_rate": [5e-4],
    'weight_decay': [5e-5],
    "step_size": [10],
    "gamma": [0.5]
}

# Create all combinations
grid = list(product(
    param_grid["learning_rate"],
    param_grid["weight_decay"],
    param_grid["step_size"],
    param_grid["gamma"]
))

In [None]:
k_folds = 5
num_epochs = 100

best_acc = 0.0
best_model = None
best_params = None
all_results = []

model_training = ModelTraining()

for i, (lr, wd, step_size, gamma) in enumerate(grid):
    print(f"\nGrid Search {i+1}/{len(grid)} — LR={lr}, WD={wd}, Step={step_size}, Gamma={gamma}")
    hyperparam = {"lr": lr, "wd": wd, "step": step_size, "gamma": gamma}

    model, val_acc = model_training.train_model_kfold(device, trainval_set,
                                                      num_epochs, train_transform,
                                                      test_transform, k_folds, muon_idx,
                                                      seed, hyperparam)
    all_results.append(((lr, wd, step_size, gamma), val_acc))

    if val_acc > best_acc:
        best_acc = val_acc
        best_model = model
        best_params = (lr, wd, step_size, gamma)

In [None]:
print(f"Best Hyperparameters:")
print(f"Learning Rate: {best_params[0]}")
print(f"Weight Decay:  {best_params[1]}")
print(f"Step Size:     {best_params[2]}")
print(f"Gamma:         {best_params[3]}")
print(f"CV Accuracy:   {best_acc:.4f}")