In [None]:
!pip3 install -r requirements.txt
#!pip install torch --upgrade
!pip install accelerate==0.21.0
!pip install optuna tensorboard

In [None]:
from pickle import TRUE
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import models
import matplotlib.pyplot as plt
import pandas as pd


from modules.dataset import IntelImageClassificationDataset
from modules.trainer_2 import Trainer
from modules.utility import InferenceSession, Evaluator
from modules.profiler import TorchProfiler
from optuna_optimizer import OptunaTuner
from resolution_tuner import ResolutionTuner
import shutil
import os


from torch.optim.lr_scheduler import CosineAnnealingLR


# ==== CONFIGURATION ====
# Control which scenarios to run individually
RUN_BASELINE = False
RUN_RESOLUTION_TUNING = False
RUN_OPTUNA_HP_TUNING = True

# Control detailed profiling for specific runs (e.g., just for diagnostics)
# For general comparison runs, keep this False to avoid profiler overhead
PERFORM_DETAILED_PROFILING = True # Set this to True only if you need a detailed profiler trace for a specific scenario

GLOBAL_SEED = 42

torch.manual_seed(GLOBAL_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(GLOBAL_SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==== MODEL FACTORY ====
def build_model():
    model = models.squeezenet1_1(weights=models.SqueezeNet1_1_Weights.DEFAULT)
    in_channels = model.classifier[1].in_channels
    kernel_size = model.classifier[1].kernel_size
    model.classifier[1] = nn.Conv2d(in_channels, 6, kernel_size)
    return model

# Initialize dataset once for baseline and HP tuning (resolution tuning creates its own)
initial_resize_val = 150 # Default before resolution tuning
dataset_for_baseline = IntelImageClassificationDataset(resize=(initial_resize_val, initial_resize_val))


# --- Data Storage for Plotting ---
no_opt_history = []
resolution_opt_history = []
optuna_hp_history = []


# ==== SCENARIO 1: NO OPTIMIZATION (BASELINE) ====
if RUN_BASELINE:
    print("\\n--- Running Scenario 1: No Optimization (Baseline) ---")
    baseline_model = build_model()
    baseline_dataloader = DataLoader(dataset_for_baseline.train_dataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=False)
    baseline_optimizer = torch.optim.Adam(baseline_model.parameters(), lr= 0.001)
    baseline_epochs = 10

    baseline_trainer = Trainer(model=baseline_model, device=DEVICE)
    # For baseline, typically no detailed profiler running, just for speed measurement
    baseline_profiler = TorchProfiler(use_profiler=True, output_dir="./logs/baseline_profiler", detailed_profiling=PERFORM_DETAILED_PROFILING) # Pass the flag
    shutil.rmtree('./logs/baseline_profiler', ignore_errors=True)

    with baseline_profiler as prof:
        no_opt_history = baseline_trainer.train(baseline_dataloader, epochs=baseline_epochs, optimizer=baseline_optimizer, profiler=prof, silent=False)
    print(f"Baseline Run History (last epoch): {no_opt_history[-1] if no_opt_history else 'N/A'}")
else:
    print("\\n--- Skipping Scenario 1: No Optimization (Baseline) ---")


# ==== SCENARIO 2: RESOLUTION OPTIMIZATION ====
best_resolution = initial_resize_val # Fallback if tuning is skipped
if RUN_RESOLUTION_TUNING:
    print("\\n--- Running Scenario 2: Resolution Optimization ---")
    res_tuner = ResolutionTuner(model_fn=build_model, device=DEVICE)
    print("🔍 Starting Resolution Tuning...")
    resolution_study = res_tuner.run(n_trials=7) # Adjust n_trials as needed
    best_resolution = resolution_study.best_params["resolution"]
    print(f"📏 Best resolution found by Optuna: {best_resolution}")

    # Retrain with the best resolution to capture full training history for plotting
    print(f"--- Retraining with Best Resolution ({best_resolution}) for History ---")
    resolution_dataset = IntelImageClassificationDataset(resize=(best_resolution, best_resolution))
    resolution_model = build_model()
    # Use a reasonable batch size for retraining, e.g., the baseline one
    resolution_dataloader = DataLoader(resolution_dataset.train_dataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=False)
    resolution_optimizer = torch.optim.Adam(resolution_model.parameters(), lr=0.001) # You might want to tune this LR as well
    resolution_epochs = 10 # Default epochs for retraining (match baseline for fair comparison)

    resolution_trainer = Trainer(model=resolution_model, device=DEVICE)
    resolution_profiler = TorchProfiler(use_profiler=True, output_dir="./logs/res_opt_profiler", detailed_profiling=True) # No detailed profiler during this history capture
    shutil.rmtree('./logs/res_opt_profiler', ignore_errors=True)

    # Add a scheduler for this retraining run
    resolution_scheduler = CosineAnnealingLR(resolution_optimizer, T_max=resolution_epochs)

    with resolution_profiler as prof:
        resolution_opt_history = resolution_trainer.train(resolution_dataloader, epochs=resolution_epochs, optimizer=resolution_optimizer, profiler=prof, silent=False, scheduler=resolution_scheduler)
    print(f"Resolution Optimized Run History (last epoch): {resolution_opt_history[-1] if resolution_opt_history else 'N/A'}")
else:
    print("\\n--- Skipping Scenario 2: Resolution Optimization ---")


# ==== SCENARIO 3: OPTUNA HYPERPARAMETER OPTIMIZATION ====
if RUN_OPTUNA_HP_TUNING:
    print("\\n--- Running Scenario 3: Optuna Hyperparameter Optimization ---")
    # Use the baseline dataset for HP tuning as resolution tuning is a separate step
    train_size = int(0.8 * len(dataset_for_baseline.train_dataset))
    val_size = len(dataset_for_baseline.train_dataset) - train_size
    train_subset, val_subset = random_split(dataset_for_baseline.train_dataset, [train_size, val_size], generator=torch.Generator().manual_seed(GLOBAL_SEED))

    hp_tuner = OptunaTuner(model_fn=build_model, train_dataset=train_subset, val_dataset=val_subset, device=DEVICE)
    print("🔍 Starting Hyperparameter Tuning...")
    hp_study = hp_tuner.run(n_trials=30, seed=GLOBAL_SEED) # Adjust n_trials for more comprehensive search
    best_params = hp_study.best_trial.params
    print(f"✨ Best Hyperparameters found by Optuna: {best_params}")

    # Retrain with best hyperparameters to capture full training history for plotting
    print("--- Retraining with Best Hyperparameters for History ---")
    optuna_hp_model = build_model()
    # Ensure num_workers=2 and pin_memory=True
    optuna_hp_dataloader = DataLoader(train_subset, batch_size=best_params["batch_size"], shuffle=True, num_workers=2, pin_memory=True)
    optuna_hp_optimizer = torch.optim.Adam(optuna_hp_model.parameters(), lr=best_params["lr"])
    optuna_hp_epochs = best_params["epochs"]

    optuna_hp_trainer = Trainer(model=optuna_hp_model, device=DEVICE)
    optuna_hp_profiler = TorchProfiler(use_profiler=True, output_dir="./logs/optuna_hp_profiler", detailed_profiling=True) # No detailed profiler during this history capture
    shutil.rmtree('./logs/optuna_hp_profiler', ignore_errors=True)

    # Initialize scheduler for the best Optuna run
    optuna_hp_scheduler = CosineAnnealingLR(optuna_hp_optimizer, T_max=optuna_hp_epochs)

    with optuna_hp_profiler as prof:
        optuna_hp_history = optuna_hp_trainer.train(optuna_hp_dataloader, epochs=optuna_hp_epochs, optimizer=optuna_hp_optimizer, profiler=prof, silent=False, scheduler=optuna_hp_scheduler)
    print(f"Optuna HP Run History (last epoch): {optuna_hp_history[-1] if optuna_hp_history else 'N/A'}")
else:
    print("\\n--- Skipping Scenario 3: Optuna Hyperparameter Optimization ---")


# ==== PLOTTING RESULTS ====
print("\\n--- Plotting Results ---")

histories = {
    "No Optimization": no_opt_history,
    "Resolution Optimized": resolution_opt_history,
    "Optuna HP Optimized": optuna_hp_history
}

# Remove empty histories if their respective scenarios were skipped
histories = {k: v for k, v in histories.items() if v}

if not histories:
    print("No histories collected to plot. Ensure at least one scenario flag (RUN_BASELINE, RUN_RESOLUTION_TUNING, RUN_OPTUNA_HP_TUNING) is True.")
else:
    # Convert histories to DataFrames for easier plotting
    df_loss = pd.DataFrame()
    df_accuracy = pd.DataFrame()

    for name, history in histories.items():
        if history:
            epochs_list = [d['epoch'] for d in history]
            loss_list = [d['loss'] for d in history]
            accuracy_list = [d['accuracy'] for d in history]

            # Use epoch as index for alignment on plots
            # Reindex to ensure all dataframes have the same epoch range for clean plotting
            max_epoch = max(epochs_list)
            full_epochs = range(1, max_epoch + 1)

            df_loss[name] = pd.Series(loss_list, index=epochs_list).reindex(full_epochs)
            df_accuracy[name] = pd.Series(accuracy_list, index=epochs_list).reindex(full_epochs)


    # Plot Loss
    plt.figure(figsize=(10, 6))
    for col in df_loss.columns:
        # Only plot if there's data for this column
        if not df_loss[col].isnull().all():
            plt.plot(df_loss.index, df_loss[col], label=col, marker='o', markersize=4)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training Loss Comparison")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("training_loss_comparison.png")
    print("Loss plot saved as training_loss_comparison.png")
    plt.show()
    # Plot Accuracy
    plt.figure(figsize=(10, 6))
    for col in df_accuracy.columns:
        # Only plot if there's data for this column
        if not df_accuracy[col].isnull().all():
            plt.plot(df_accuracy.index, df_accuracy[col], label=col, marker='o', markersize=4)
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Training Accuracy Comparison")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("training_accuracy_comparison.png")
    print("Accuracy plot saved as training_accuracy_comparison.png")
    plt.show()

# ==== SAVE MODEL (example, can be moved/modified) ====
model_path = "./intel_model.pt"
final_model_to_save = None
# Prioritize saving the model from Optuna HP if run, else Resolution, else Baseline
if RUN_OPTUNA_HP_TUNING and optuna_hp_history:
    final_model_to_save = optuna_hp_model
elif RUN_RESOLUTION_TUNING and resolution_opt_history:
    final_model_to_save = resolution_model
elif RUN_BASELINE and no_opt_history:
    final_model_to_save = baseline_model
else:
    print("No model trained in a runnable scenario to save.")


if final_model_to_save:
    torch.save(final_model_to_save.state_dict(), model_path)
    print(f"✅ Model saved to {model_path}")

[I 2025-07-09 17:36:19,102] A new study created in memory with name: no-name-2e6c155f-1d87-48fa-ae29-4fd824d24335
  lr = trial.suggest_loguniform("lr", 1e-5, 2e-3)
Positional arguments ['self', 'name', 'low', 'high', 'step', 'log'] in suggest_int() have been deprecated since v3.5.0. They will be replaced with the corresponding keyword arguments in v5.0.0, so please use the keyword specification instead. See https://github.com/optuna/optuna/releases/tag/v3.5.0 for details.
  epochs = trial.suggest_int("epochs", 20, 100, 10)
Downloading: "https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth" to /root/.cache/torch/hub/checkpoints/squeezenet1_1-b8a52dc0.pth


\n--- Skipping Scenario 1: No Optimization (Baseline) ---
\n--- Skipping Scenario 2: Resolution Optimization ---
\n--- Running Scenario 3: Optuna Hyperparameter Optimization ---
🔍 Starting Hyperparameter Tuning...


100%|██████████| 4.73M/4.73M [00:00<00:00, 78.5MB/s]
[I 2025-07-09 17:46:06,653] Trial 0 finished with value: 0.9120057000356252 and parameters: {'lr': 7.274917088027814e-05, 'batch_size': 516, 'epochs': 30}. Best is trial 0 with value: 0.9120057000356252.
[I 2025-07-09 18:12:07,683] Trial 1 finished with value: 0.8952618453865336 and parameters: {'lr': 2.2853255256339197e-05, 'batch_size': 1024, 'epochs': 80}. Best is trial 0 with value: 0.9120057000356252.
[I 2025-07-09 18:21:49,036] Trial 2 finished with value: 0.8507303170644817 and parameters: {'lr': 1.1152328125494341e-05, 'batch_size': 516, 'epochs': 30}. Best is trial 0 with value: 0.9120057000356252.
[I 2025-07-09 18:35:07,857] Trial 3 finished with value: 0.8820805130032062 and parameters: {'lr': 2.642526057549916e-05, 'batch_size': 1024, 'epochs': 40}. Best is trial 0 with value: 0.9120057000356252.
[I 2025-07-09 19:00:42,115] Trial 4 finished with value: 0.9045244032775205 and parameters: {'lr': 0.0002557948896094734, 'batc

In [None]:
# ==== TESTING PHASE ====
print("\n=== TESTING ===")
model_loaded = build_model().to(DEVICE)
model_loaded.load_state_dict(torch.load(model_path, map_location=DEVICE))
model_loaded.eval()

session = InferenceSession(model_loaded)
# Corrected: Use dataset_for_baseline instead of dataset
all_inputs = torch.stack([item[0] for item in dataset_for_baseline.test_dataset])
all_targets = torch.tensor([item[1] for item in dataset_for_baseline.test_dataset])
output = session(all_inputs)

acc = Evaluator.acc(output, all_targets).item()
print(f"📊 Test Accuracy: {acc:.4f}")

In [None]:
# ==== TESTING PHASE (For Resolution Optimized Scenario ONLY) ====


print("\n=== TESTING RESOLUTION OPTIMIZED MODEL ===")

# Check if the resolution model was actually trained and is available
if 'resolution_model' in locals() and 'best_resolution' in locals():
    try:
        # Recreate the model structure
        model_loaded = build_model().to(DEVICE)

        # Load the state_dict from the 'resolution_model' directly,
        # since it's in scope and should be the last model trained when only this scenario runs.
        # Alternatively, if it was saved to a specific path like './intel_model_res_opt.pt'
        # within your main script's resolution block, you'd load from there.
        # For this specific setup (RUN_RESOLUTION_TUNING=True, others False),
        # resolution_model will be the final_model_to_save and saved to './intel_model.pt'
        # based on your existing saving logic.

        # So, we load from './intel_model.pt' which contains the resolution_model's state_dict.
        model_path_to_test = "./intel_model.pt"
        model_loaded.load_state_dict(torch.load(model_path_to_test, map_location=DEVICE))
        model_loaded.eval()

        session = InferenceSession(model_loaded)

        # IMPORTANT: Create the test dataset with the BEST RESOLUTION found for this model
        test_dataset_for_resolution_model = IntelImageClassificationDataset(resize=(best_resolution, best_resolution))

        all_inputs = torch.stack([item[0] for item in test_dataset_for_resolution_model.test_dataset])
        all_targets = torch.tensor([item[1] for item in test_dataset_for_resolution_model.test_dataset])
        output = session(all_inputs)

        acc = Evaluator.acc(output, all_targets).item()
        print(f"📊 Resolution Optimized Model Test Accuracy (Res: {best_resolution}): {acc:.4f}")

    except FileNotFoundError:
        print(f"⚠️ Error: Model not found at {model_path_to_test}. Cannot perform testing for resolution optimized model.")
    except Exception as e:
        print(f"⚠️ An unexpected error occurred during testing: {e}")
else:
    print("--- Resolution Optimized Model was not trained or its variables are not accessible for testing. ---")


=== TESTING RESOLUTION OPTIMIZED MODEL ===
📊 Resolution Optimized Model Test Accuracy (Res: 160): 0.9177
