imports + config

In [1]:
import subprocess
import itertools
import re
import pandas as pd
from pathlib import Path
from IPython.display import display

SCRIPT_PATH = str(Path("block2_group4.py").resolve())
PYTHON_BIN = "python"

hyperparameter grids

In [2]:
train_epochs_list    = [9.9, 10.9]
lr_list              = [11.5, 13.0]
weight_decay_list    = [0.0153, 0.01]
label_smoothing_list = [0.2, 0.15, 0.1]
block1_list          = [48, 64]
block2_list          = [128, 192, 256]

n_runs_coarse = 3   # for quick coarse search

run_experiment helper

In [3]:
def run_experiment(
    train_epochs,
    lr,
    weight_decay,
    label_smoothing,
    block1,
    block2,
    n_runs,
    exp_name=None,
):
    if exp_name is None:
        exp_name = (
            f"te{train_epochs}_lr{lr}_wd{weight_decay}_ls{label_smoothing}"
            f"_b1{block1}_b2{block2}"
        )

    cmd = [
        PYTHON_BIN, SCRIPT_PATH,
        "--train_epochs", str(train_epochs),
        "--lr", str(lr),
        "--weight_decay", str(weight_decay),
        "--label_smoothing", str(label_smoothing),
        "--block1", str(block1),      # << use block1, not block1_width
        "--block2", str(block2),      # << use block2, not block2_width
        "--n_runs", str(n_runs),
        "--exp_name", exp_name,
    ]

    print("Running:", " ".join(cmd))

    out = subprocess.check_output(cmd, text=True)

    m_acc = re.search(r"Mean accuracy:\s+([0-9.]+)\s+Std:\s+([0-9.]+)", out)
    m_time = re.search(r"Mean training time \(s\):\s+([0-9.]+)\s+Std:\s+([0-9.]+)", out)

    mean_acc  = float(m_acc.group(1))  if m_acc  else None
    std_acc   = float(m_acc.group(2))  if m_acc  else None
    mean_time = float(m_time.group(1)) if m_time else None
    std_time  = float(m_time.group(2)) if m_time else None

    return {
        "train_epochs": train_epochs,
        "lr": lr,
        "weight_decay": weight_decay,
        "label_smoothing": label_smoothing,
        "block1": block1,
        "block2": block2,
        "n_runs": n_runs,
        "exp_name": exp_name,
        "mean_acc": mean_acc,
        "std_acc": std_acc,
        "mean_time": mean_time,
        "std_time": std_time,
    }


coarse sweep

In [4]:
coarse_results = []
for (
    train_epochs,
    lr,
    weight_decay,
    label_smoothing,
    block1,
    block2,
) in itertools.product(
    train_epochs_list,
    lr_list,
    weight_decay_list,
    label_smoothing_list,
    block1_list,
    block2_list,
):
    res = run_experiment(
        train_epochs=train_epochs,
        lr=lr,
        weight_decay=weight_decay,
        label_smoothing=label_smoothing,
        block1=block1,
        block2=block2,
        n_runs=n_runs_coarse,
    )
    coarse_results.append(res)

results_coarse = pd.DataFrame(coarse_results)
display(results_coarse.sort_values("mean_acc", ascending=False))

print("\nTop configs with mean_time < 3.2s:")
display(
    results_coarse[results_coarse["mean_time"] < 3.2]
    .sort_values("mean_acc", ascending=False)
    .head(10)
)


Running: python /workspace/ESE3060-Final-Project/models/block2_group4.py --train_epochs 9.9 --lr 11.5 --weight_decay 0.0153 --label_smoothing 0.2 --block1 48 --block2 128 --n_runs 3 --exp_name te9.9_lr11.5_wd0.0153_ls0.2_b148_b2128
Running: python /workspace/ESE3060-Final-Project/models/block2_group4.py --train_epochs 9.9 --lr 11.5 --weight_decay 0.0153 --label_smoothing 0.2 --block1 48 --block2 192 --n_runs 3 --exp_name te9.9_lr11.5_wd0.0153_ls0.2_b148_b2192
Running: python /workspace/ESE3060-Final-Project/models/block2_group4.py --train_epochs 9.9 --lr 11.5 --weight_decay 0.0153 --label_smoothing 0.2 --block1 48 --block2 256 --n_runs 3 --exp_name te9.9_lr11.5_wd0.0153_ls0.2_b148_b2256
Running: python /workspace/ESE3060-Final-Project/models/block2_group4.py --train_epochs 9.9 --lr 11.5 --weight_decay 0.0153 --label_smoothing 0.2 --block1 64 --block2 128 --n_runs 3 --exp_name te9.9_lr11.5_wd0.0153_ls0.2_b164_b2128
Running: python /workspace/ESE3060-Final-Project/models/block2_group4.py

Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
95,10.9,11.5,0.0100,0.20,64,256,3,te10.9_lr11.5_wd0.01_ls0.2_b164_b2256,0.9387,0.0023,4.0119,0.0013
77,10.9,11.5,0.0153,0.20,64,256,3,te10.9_lr11.5_wd0.0153_ls0.2_b164_b2256,0.9376,0.0019,4.0357,0.0218
107,10.9,11.5,0.0100,0.10,64,256,3,te10.9_lr11.5_wd0.01_ls0.1_b164_b2256,0.9373,0.0013,4.0453,0.0435
110,10.9,13.0,0.0153,0.20,48,256,3,te10.9_lr13.0_wd0.0153_ls0.2_b148_b2256,0.9367,0.0006,3.8995,0.0115
113,10.9,13.0,0.0153,0.20,64,256,3,te10.9_lr13.0_wd0.0153_ls0.2_b164_b2256,0.9367,0.0013,4.0547,0.0319
...,...,...,...,...,...,...,...,...,...,...,...,...
66,9.9,13.0,0.0100,0.10,48,128,3,te9.9_lr13.0_wd0.01_ls0.1_b148_b2128,0.9254,0.0007,2.7165,0.0056
120,10.9,13.0,0.0153,0.10,48,128,3,te10.9_lr13.0_wd0.0153_ls0.1_b148_b2128,0.9254,0.0023,2.9951,0.0023
24,9.9,11.5,0.0100,0.15,48,128,3,te9.9_lr11.5_wd0.01_ls0.15_b148_b2128,0.9251,0.0029,2.7158,0.0025
54,9.9,13.0,0.0100,0.20,48,128,3,te9.9_lr13.0_wd0.01_ls0.2_b148_b2128,0.9244,0.0006,2.7103,0.0019



Top configs with mean_time < 3.2s:


Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
123,10.9,13.0,0.0153,0.1,64,128,3,te10.9_lr13.0_wd0.0153_ls0.1_b164_b2128,0.9326,0.0018,3.0978,0.0
129,10.9,13.0,0.01,0.2,64,128,3,te10.9_lr13.0_wd0.01_ls0.2_b164_b2128,0.9315,0.0029,3.0961,0.0016
99,10.9,11.5,0.01,0.15,64,128,3,te10.9_lr11.5_wd0.01_ls0.15_b164_b2128,0.9314,0.0008,3.105,0.0406
45,9.9,13.0,0.0153,0.15,64,128,3,te9.9_lr13.0_wd0.0153_ls0.15_b164_b2128,0.9306,0.0004,2.8026,0.0013
105,10.9,11.5,0.01,0.1,64,128,3,te10.9_lr11.5_wd0.01_ls0.1_b164_b2128,0.9306,0.0007,3.0803,0.0008
111,10.9,13.0,0.0153,0.2,64,128,3,te10.9_lr13.0_wd0.0153_ls0.2_b164_b2128,0.9304,0.0011,3.0759,0.0044
87,10.9,11.5,0.0153,0.1,64,128,3,te10.9_lr11.5_wd0.0153_ls0.1_b164_b2128,0.9303,0.0007,3.0844,0.0028
117,10.9,13.0,0.0153,0.15,64,128,3,te10.9_lr13.0_wd0.0153_ls0.15_b164_b2128,0.9301,0.001,3.1086,0.0199
141,10.9,13.0,0.01,0.1,64,128,3,te10.9_lr13.0_wd0.01_ls0.1_b164_b2128,0.93,0.0015,3.0936,0.0078
135,10.9,13.0,0.01,0.15,64,128,3,te10.9_lr13.0_wd0.01_ls0.15_b164_b2128,0.9298,0.0033,3.0925,0.0036


In [5]:
import pandas as pd
from pathlib import Path

# 1) Show full tables in the notebook
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 0)

print("=== Full coarse sweep (sorted by mean_acc) ===")
display(results_coarse.sort_values("mean_acc", ascending=False))

# 2) Save everything to disk so you can open as a spreadsheet
log_dir = Path("../logs/block2_group4")   # adjust if your path is different
log_dir.mkdir(parents=True, exist_ok=True)

coarse_path = log_dir / "coarse_sweep_results_full.csv"
results_coarse.to_csv(coarse_path, index=False)
print(f"\nSaved full coarse sweep to: {coarse_path}")

# 3) (Optional) show best configs under a time budget
time_budget = 3.2

best_overall = results_coarse.sort_values("mean_acc", ascending=False).head(5)
print("\n=== Top 5 configs by accuracy (no time constraint) ===")
display(best_overall)

best_under_time = (
    results_coarse[results_coarse["mean_time"] < time_budget]
    .sort_values("mean_acc", ascending=False)
    .head(5)
)
print(f"\n=== Top 5 configs with mean_time < {time_budget}s ===")
display(best_under_time)


=== Full coarse sweep (sorted by mean_acc) ===


Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
95,10.9,11.5,0.01,0.2,64,256,3,te10.9_lr11.5_wd0.01_ls0.2_b164_b2256,0.9387,0.0023,4.0119,0.0013
77,10.9,11.5,0.0153,0.2,64,256,3,te10.9_lr11.5_wd0.0153_ls0.2_b164_b2256,0.9376,0.0019,4.0357,0.0218
107,10.9,11.5,0.01,0.1,64,256,3,te10.9_lr11.5_wd0.01_ls0.1_b164_b2256,0.9373,0.0013,4.0453,0.0435
110,10.9,13.0,0.0153,0.2,48,256,3,te10.9_lr13.0_wd0.0153_ls0.2_b148_b2256,0.9367,0.0006,3.8995,0.0115
113,10.9,13.0,0.0153,0.2,64,256,3,te10.9_lr13.0_wd0.0153_ls0.2_b164_b2256,0.9367,0.0013,4.0547,0.0319
83,10.9,11.5,0.0153,0.15,64,256,3,te10.9_lr11.5_wd0.0153_ls0.15_b164_b2256,0.9367,0.0011,4.0341,0.0199
41,9.9,13.0,0.0153,0.2,64,256,3,te9.9_lr13.0_wd0.0153_ls0.2_b164_b2256,0.9366,0.0002,3.6681,0.0225
59,9.9,13.0,0.01,0.2,64,256,3,te9.9_lr13.0_wd0.01_ls0.2_b164_b2256,0.9366,0.0002,3.6803,0.0423
125,10.9,13.0,0.0153,0.1,64,256,3,te10.9_lr13.0_wd0.0153_ls0.1_b164_b2256,0.9364,0.0013,4.0434,0.0014
119,10.9,13.0,0.0153,0.15,64,256,3,te10.9_lr13.0_wd0.0153_ls0.15_b164_b2256,0.9363,0.0032,4.0694,0.0415



Saved full coarse sweep to: ../logs/block2_group4/coarse_sweep_results_full.csv

=== Top 5 configs by accuracy (no time constraint) ===


Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
95,10.9,11.5,0.01,0.2,64,256,3,te10.9_lr11.5_wd0.01_ls0.2_b164_b2256,0.9387,0.0023,4.0119,0.0013
77,10.9,11.5,0.0153,0.2,64,256,3,te10.9_lr11.5_wd0.0153_ls0.2_b164_b2256,0.9376,0.0019,4.0357,0.0218
107,10.9,11.5,0.01,0.1,64,256,3,te10.9_lr11.5_wd0.01_ls0.1_b164_b2256,0.9373,0.0013,4.0453,0.0435
110,10.9,13.0,0.0153,0.2,48,256,3,te10.9_lr13.0_wd0.0153_ls0.2_b148_b2256,0.9367,0.0006,3.8995,0.0115
113,10.9,13.0,0.0153,0.2,64,256,3,te10.9_lr13.0_wd0.0153_ls0.2_b164_b2256,0.9367,0.0013,4.0547,0.0319



=== Top 5 configs with mean_time < 3.2s ===


Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
123,10.9,13.0,0.0153,0.1,64,128,3,te10.9_lr13.0_wd0.0153_ls0.1_b164_b2128,0.9326,0.0018,3.0978,0.0
129,10.9,13.0,0.01,0.2,64,128,3,te10.9_lr13.0_wd0.01_ls0.2_b164_b2128,0.9315,0.0029,3.0961,0.0016
99,10.9,11.5,0.01,0.15,64,128,3,te10.9_lr11.5_wd0.01_ls0.15_b164_b2128,0.9314,0.0008,3.105,0.0406
45,9.9,13.0,0.0153,0.15,64,128,3,te9.9_lr13.0_wd0.0153_ls0.15_b164_b2128,0.9306,0.0004,2.8026,0.0013
105,10.9,11.5,0.01,0.1,64,128,3,te10.9_lr11.5_wd0.01_ls0.1_b164_b2128,0.9306,0.0007,3.0803,0.0008
