### Configuration

In [None]:
import os
# Set the environment variable for CUDA devices
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
!export CUDA_VISIBLE_DEVICES=3

import warnings
import torch
from optimization.experiments import run_experiment, plot_experiment, calc_sharpness
from utils.config_loader import load_config
from utils.plots import plot_loss_curves
from utils.misc import set_seed, create_loaders

# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning, message=".*?.*")
warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch\.backends\.cuda\.sdp_kernel.*")

# Load config
config = load_config("config/base.yaml")

# Device setup
config.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set random seed for reproducibility
set_seed(42)

# Set a default number of epochs for experiments
EPOCHS = 60

# Create the results directory if it doesn't exist
os.makedirs("exp_results", exist_ok=True)

### Experiments

#### 1) Batch Size

In [None]:
# Clear CUDA cache
torch.cuda.empty_cache()

# Define BS experiments
experiments = [
    {'optimizer_class': torch.optim.SGD, 'lr': 0.001, 'batch_size': 4, 'shuffle_mode': 'random', 'weight_decay': 0.0005},
    {'optimizer_class': torch.optim.SGD, 'lr': 0.001, 'batch_size': 24, 'shuffle_mode': 'random', 'weight_decay': 0.0005},
]

# Initialize results and models list
results_list = []
models_list = []
models_names = []
    
# Run experiments
for exp in experiments:
    print(f"\nRunning experiment with {exp['optimizer_class'].__name__}, lr={exp['lr']}, bs={exp['batch_size']}, shuffle={exp['shuffle_mode']}, wd: {exp['weight_decay']}")
    
    results, model = run_experiment(
            config=config,
            optimizer_class=exp['optimizer_class'],
            lr=exp['lr'],
            weight_decay=exp['weight_decay'],
            batch_size=exp['batch_size'],
            epochs=EPOCHS,
            shuffle_mode=exp['shuffle_mode'],
        )
    
    results_list.append(results)
    models_list.append(model)

    # Generate a consistent filename
    fname = f"{exp['optimizer_class'].__name__}_{exp['lr']}_{exp['batch_size']}_{exp['shuffle_mode']}_{exp['weight_decay']}"

    # Save model state dict
    torch.save(model.state_dict(), f"exp_results/model_params_{fname}.pt")
    models_names.append(f"exp_results/model_params_{fname}.pt")

    # Save results dict
    torch.save(results, f"exp_results/results_{fname}.pt")

# Plot loss curves with named output
if len(experiments) == 1:
    exp = experiments[0]
    fname = f"{exp['optimizer_class'].__name__}_{exp['lr']}_{exp['batch_size']}_{exp['shuffle_mode']}_{exp['weight_decay']}"
else:
    fname = "multiple_experiments"

plot_loss_curves(results_list, save_path="exp_plots", fname=fname)

# Print exepriment summary
print("\nExperiments Summary:")
for res in results_list:
    print(f"\nOptimizer: {res['optimizer']}")
    print(f"LR: {res['lr']}, Shuffle: {res['shuffle_mode']}, Batch Size: {res['batch_size']}, Weight Decay: {res['weight_decay']}")
    print(f"Final Train Loss: {res['train_loss']:.4f}, Val Loss: {res['val_loss']:.4f}")


In [None]:
# Crete loaders for plotting
train_loader, val_loader, _ = create_loaders(config) 

# Define paths for plotting
PATH = './exp_plots/'
paths = [PATH + f"segment_{models_names[0][:-3]}_{models_names[1][:-3]}.png",
        PATH + f"2D_plot_{models_names[0][:-3]}.png",
        PATH + f"2D_plot_{models_names[1][:-3]}.png"]

# Plot the experiment results
plot_experiment(models_list[0], models_list[1], train_loader, val_loader, config.device, paths=paths)

In [None]:
for i, model in enumerate(models_list):
    print(f"\nCalculating sharpness for model {i + 1} ({models_names[i]})...")
    sharpenss, hv_norm = calc_sharpness(model, train_loader, config.device)
    print(f"Model: {models_names[i]}, Sharpness: {sharpenss:.4f}%, HV Norm: {hv_norm:.4f} \n")