imports + config

In [1]:
import subprocess
import itertools
import re
import pandas as pd
from pathlib import Path
from IPython.display import display

SCRIPT_PATH = str(Path("width_rebalance.py").resolve())
PYTHON_BIN = "python"

hyperparameter grids

In [2]:
# Fixed to baseline values
train_epochs_list     = [9.9, 10.9]
weight_decay_list     = [0.0153, 0.01]
label_smoothing_list  = [0.2]

# Things we actually sweep
lr_list      = [11.5, 13.0, 15.0]
block1_list  = [48, 64]
block2_list  = [128, 192]

n_runs_coarse = 3   # for quick coarse search


run_experiment helper

In [3]:
def run_experiment(
    train_epochs,
    lr,
    weight_decay,
    label_smoothing,
    block1,
    block2,
    n_runs,
    exp_name=None,
):
    if exp_name is None:
        exp_name = (
            f"te{train_epochs}_lr{lr}_wd{weight_decay}_ls{label_smoothing}"
            f"_b1{block1}_b2{block2}"
        )

    cmd = [
        PYTHON_BIN, SCRIPT_PATH,
        "--train_epochs", str(train_epochs),
        "--lr", str(lr),
        "--weight_decay", str(weight_decay),
        "--label_smoothing", str(label_smoothing),
        "--block1", str(block1),
        "--block2", str(block2),
        "--n_runs", str(n_runs),
        "--exp_name", exp_name,
    ]

    print("Running:", " ".join(cmd))

    out = subprocess.check_output(cmd, text=True)

    m_acc = re.search(r"Mean accuracy:\s+([0-9.]+)\s+Std:\s+([0-9.]+)", out)
    m_time = re.search(r"Mean training time \(s\):\s+([0-9.]+)\s+Std:\s+([0-9.]+)", out)

    mean_acc  = float(m_acc.group(1))  if m_acc  else None
    std_acc   = float(m_acc.group(2))  if m_acc  else None
    mean_time = float(m_time.group(1)) if m_time else None
    std_time  = float(m_time.group(2)) if m_time else None

    return {
        "train_epochs": train_epochs,
        "lr": lr,
        "weight_decay": weight_decay,
        "label_smoothing": label_smoothing,
        "block1": block1,
        "block2": block2,
        "n_runs": n_runs,
        "exp_name": exp_name,
        "mean_acc": mean_acc,
        "std_acc": std_acc,
        "mean_time": mean_time,
        "std_time": std_time,
    }


coarse sweep

In [4]:
coarse_results = []
for (
    train_epochs,
    lr,
    weight_decay,
    label_smoothing,
    block1,
    block2,
) in itertools.product(
    train_epochs_list,
    lr_list,
    weight_decay_list,
    label_smoothing_list,
    block1_list,
    block2_list,
):
    res = run_experiment(
        train_epochs=train_epochs,
        lr=lr,
        weight_decay=weight_decay,
        label_smoothing=label_smoothing,
        block1=block1,
        block2=block2,
        n_runs=n_runs_coarse,
    )
    coarse_results.append(res)

results_coarse = pd.DataFrame(coarse_results)

# Show all rows, sorted by accuracy
display(results_coarse.sort_values("mean_acc", ascending=False).reset_index(drop=True))

print("\nTop configs with mean_time < 3.2s:")
display(
    results_coarse[results_coarse["mean_time"] < 3.2]
    .sort_values("mean_acc", ascending=False)
    .head(10)
    .reset_index(drop=True)
)


Running: python /workspace/ESE3060-Final-Project/models/width_rebalance.py --train_epochs 9.9 --lr 11.5 --weight_decay 0.0153 --label_smoothing 0.2 --block1 48 --block2 128 --n_runs 3 --exp_name te9.9_lr11.5_wd0.0153_ls0.2_b148_b2128
Running: python /workspace/ESE3060-Final-Project/models/width_rebalance.py --train_epochs 9.9 --lr 11.5 --weight_decay 0.0153 --label_smoothing 0.2 --block1 48 --block2 192 --n_runs 3 --exp_name te9.9_lr11.5_wd0.0153_ls0.2_b148_b2192
Running: python /workspace/ESE3060-Final-Project/models/width_rebalance.py --train_epochs 9.9 --lr 11.5 --weight_decay 0.0153 --label_smoothing 0.2 --block1 64 --block2 128 --n_runs 3 --exp_name te9.9_lr11.5_wd0.0153_ls0.2_b164_b2128
Running: python /workspace/ESE3060-Final-Project/models/width_rebalance.py --train_epochs 9.9 --lr 11.5 --weight_decay 0.0153 --label_smoothing 0.2 --block1 64 --block2 192 --n_runs 3 --exp_name te9.9_lr11.5_wd0.0153_ls0.2_b164_b2192
Running: python /workspace/ESE3060-Final-Project/models/width_re

Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
0,10.9,13.0,0.01,0.2,64,192,3,te10.9_lr13.0_wd0.01_ls0.2_b164_b2192,0.9387,0.0001,3.8233,0.0072
1,9.9,15.0,0.0153,0.2,64,192,3,te9.9_lr15.0_wd0.0153_ls0.2_b164_b2192,0.9387,0.0027,3.5011,0.0392
2,10.9,11.5,0.0153,0.2,64,192,3,te10.9_lr11.5_wd0.0153_ls0.2_b164_b2192,0.9383,0.0007,3.8175,0.0013
3,10.9,15.0,0.0153,0.2,64,192,3,te10.9_lr15.0_wd0.0153_ls0.2_b164_b2192,0.9382,0.0013,3.8333,0.0064
4,9.9,13.0,0.0153,0.2,64,192,3,te9.9_lr13.0_wd0.0153_ls0.2_b164_b2192,0.938,0.0007,3.5017,0.0512
5,10.9,15.0,0.0153,0.2,48,192,3,te10.9_lr15.0_wd0.0153_ls0.2_b148_b2192,0.9378,0.0004,3.6883,0.0041
6,10.9,13.0,0.0153,0.2,64,192,3,te10.9_lr13.0_wd0.0153_ls0.2_b164_b2192,0.9377,0.0011,3.8432,0.0324
7,9.9,13.0,0.01,0.2,64,192,3,te9.9_lr13.0_wd0.01_ls0.2_b164_b2192,0.9376,0.0006,3.5181,0.0329
8,10.9,15.0,0.01,0.2,64,192,3,te10.9_lr15.0_wd0.01_ls0.2_b164_b2192,0.9372,0.0008,3.8585,0.0317
9,10.9,11.5,0.01,0.2,64,192,3,te10.9_lr11.5_wd0.01_ls0.2_b164_b2192,0.9371,0.0003,3.8509,0.0505



Top configs with mean_time < 3.2s:


Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
0,10.9,13.0,0.01,0.2,64,128,3,te10.9_lr13.0_wd0.01_ls0.2_b164_b2128,0.9355,0.0006,3.128,0.0086
1,10.9,11.5,0.0153,0.2,64,128,3,te10.9_lr11.5_wd0.0153_ls0.2_b164_b2128,0.9339,0.0008,3.1295,0.0051
2,9.9,13.0,0.01,0.2,64,128,3,te9.9_lr13.0_wd0.01_ls0.2_b164_b2128,0.9338,0.0004,2.8933,0.0476
3,9.9,13.0,0.0153,0.2,64,128,3,te9.9_lr13.0_wd0.0153_ls0.2_b164_b2128,0.9332,0.0002,2.8486,0.0004
4,10.9,15.0,0.0153,0.2,48,128,3,te10.9_lr15.0_wd0.0153_ls0.2_b148_b2128,0.933,0.0013,3.0399,0.0045
5,10.9,13.0,0.0153,0.2,64,128,3,te10.9_lr13.0_wd0.0153_ls0.2_b164_b2128,0.9329,0.0006,3.1335,0.0028
6,9.9,11.5,0.0153,0.2,64,128,3,te9.9_lr11.5_wd0.0153_ls0.2_b164_b2128,0.9328,0.0016,2.8205,0.0046
7,10.9,15.0,0.01,0.2,64,128,3,te10.9_lr15.0_wd0.01_ls0.2_b164_b2128,0.9325,0.0004,3.1322,0.0008
8,10.9,11.5,0.01,0.2,64,128,3,te10.9_lr11.5_wd0.01_ls0.2_b164_b2128,0.9325,0.0009,3.1383,0.0167
9,10.9,15.0,0.01,0.2,48,128,3,te10.9_lr15.0_wd0.01_ls0.2_b148_b2128,0.9323,0.0018,3.0389,0.004


In [5]:
import pandas as pd
from pathlib import Path

# 1) Show full tables in the notebook
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 0)

print("=== Full coarse sweep (sorted by mean_acc) ===")
display(results_coarse.sort_values("mean_acc", ascending=False))

# 2) Save everything to disk so you can open as a spreadsheet
log_dir = Path("../logs/block2_group4")   # adjust if your path is different
log_dir.mkdir(parents=True, exist_ok=True)

coarse_path = log_dir / "coarse_sweep_results_full.csv"
results_coarse.to_csv(coarse_path, index=False)
print(f"\nSaved full coarse sweep to: {coarse_path}")

# 3) (Optional) show best configs under a time budget
time_budget = 3.2

best_overall = results_coarse.sort_values("mean_acc", ascending=False).head(5)
print("\n=== Top 5 configs by accuracy (no time constraint) ===")
display(best_overall)

best_under_time = (
    results_coarse[results_coarse["mean_time"] < time_budget]
    .sort_values("mean_acc", ascending=False)
    .head(5)
)
print(f"\n=== Top 5 configs with mean_time < {time_budget}s ===")
display(best_under_time)


=== Full coarse sweep (sorted by mean_acc) ===


Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
39,10.9,13.0,0.01,0.2,64,192,3,te10.9_lr13.0_wd0.01_ls0.2_b164_b2192,0.9387,0.0001,3.8233,0.0072
19,9.9,15.0,0.0153,0.2,64,192,3,te9.9_lr15.0_wd0.0153_ls0.2_b164_b2192,0.9387,0.0027,3.5011,0.0392
27,10.9,11.5,0.0153,0.2,64,192,3,te10.9_lr11.5_wd0.0153_ls0.2_b164_b2192,0.9383,0.0007,3.8175,0.0013
43,10.9,15.0,0.0153,0.2,64,192,3,te10.9_lr15.0_wd0.0153_ls0.2_b164_b2192,0.9382,0.0013,3.8333,0.0064
11,9.9,13.0,0.0153,0.2,64,192,3,te9.9_lr13.0_wd0.0153_ls0.2_b164_b2192,0.938,0.0007,3.5017,0.0512
41,10.9,15.0,0.0153,0.2,48,192,3,te10.9_lr15.0_wd0.0153_ls0.2_b148_b2192,0.9378,0.0004,3.6883,0.0041
35,10.9,13.0,0.0153,0.2,64,192,3,te10.9_lr13.0_wd0.0153_ls0.2_b164_b2192,0.9377,0.0011,3.8432,0.0324
15,9.9,13.0,0.01,0.2,64,192,3,te9.9_lr13.0_wd0.01_ls0.2_b164_b2192,0.9376,0.0006,3.5181,0.0329
47,10.9,15.0,0.01,0.2,64,192,3,te10.9_lr15.0_wd0.01_ls0.2_b164_b2192,0.9372,0.0008,3.8585,0.0317
31,10.9,11.5,0.01,0.2,64,192,3,te10.9_lr11.5_wd0.01_ls0.2_b164_b2192,0.9371,0.0003,3.8509,0.0505



Saved full coarse sweep to: ../logs/block2_group4/coarse_sweep_results_full.csv

=== Top 5 configs by accuracy (no time constraint) ===


Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
39,10.9,13.0,0.01,0.2,64,192,3,te10.9_lr13.0_wd0.01_ls0.2_b164_b2192,0.9387,0.0001,3.8233,0.0072
19,9.9,15.0,0.0153,0.2,64,192,3,te9.9_lr15.0_wd0.0153_ls0.2_b164_b2192,0.9387,0.0027,3.5011,0.0392
27,10.9,11.5,0.0153,0.2,64,192,3,te10.9_lr11.5_wd0.0153_ls0.2_b164_b2192,0.9383,0.0007,3.8175,0.0013
43,10.9,15.0,0.0153,0.2,64,192,3,te10.9_lr15.0_wd0.0153_ls0.2_b164_b2192,0.9382,0.0013,3.8333,0.0064
11,9.9,13.0,0.0153,0.2,64,192,3,te9.9_lr13.0_wd0.0153_ls0.2_b164_b2192,0.938,0.0007,3.5017,0.0512



=== Top 5 configs with mean_time < 3.2s ===


Unnamed: 0,train_epochs,lr,weight_decay,label_smoothing,block1,block2,n_runs,exp_name,mean_acc,std_acc,mean_time,std_time
38,10.9,13.0,0.01,0.2,64,128,3,te10.9_lr13.0_wd0.01_ls0.2_b164_b2128,0.9355,0.0006,3.128,0.0086
26,10.9,11.5,0.0153,0.2,64,128,3,te10.9_lr11.5_wd0.0153_ls0.2_b164_b2128,0.9339,0.0008,3.1295,0.0051
14,9.9,13.0,0.01,0.2,64,128,3,te9.9_lr13.0_wd0.01_ls0.2_b164_b2128,0.9338,0.0004,2.8933,0.0476
10,9.9,13.0,0.0153,0.2,64,128,3,te9.9_lr13.0_wd0.0153_ls0.2_b164_b2128,0.9332,0.0002,2.8486,0.0004
40,10.9,15.0,0.0153,0.2,48,128,3,te10.9_lr15.0_wd0.0153_ls0.2_b148_b2128,0.933,0.0013,3.0399,0.0045
