In [None]:
# ============================================================
# Jigsaw — SCRATCH Hyperparameter Tuning (Grid/Random, Resumable)
# * Windows/VS Code/Jupyter safe DataLoader (num_workers=0 on Windows)
# * tqdm console progress bars (no ipywidgets errors)
# * Early stopping
# * TensorBoard logging (train loss, val AUC/ACC, hparams snapshot)
# * Skips combos already logged in trial_results_scratch.csv
# * Optional per-trial checkpoints
# * Best trial of the session writes submission_scratch.(csv|xlsx)
# ============================================================

import os, re, json, time, random, hashlib, platform
from datetime import datetime, timezone
from itertools import product
from typing import Dict, Any, List

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

# ------------------- Paths & switches -------------------
TRAIN_PATH = "train.csv"
TEST_PATH  = "test.csv"
SUB_PATH   = "sample_submission.csv"

RESULTS_CSV = "trial_results_scratch.csv"    # resumable log (append)
CHECKPOINT_DIR = "checkpoints_scratch"       # per-trial .pt files
SAVE_CHECKPOINTS = True

# TensorBoard
ENABLE_TENSORBOARD = True                    # turn on/off
TB_LOGDIR_BASE = "tb_scratch_tune"           # tensorboard --logdir tb_scratch_tune
TB_WRITE_HPARAMS = True

EARLY_STOP_PATIENCE = 3                      # epochs without AUC gain before stopping (0 to disable)
MAX_TRIALS_PER_RUN  = 10                     # safety cap per session
SAVE_BEST_SESSION_SUBMISSION = True
SAVE_EVERY_TRIAL_SUBMISSION = True  # NEW: Save submission after every trial
SUBMISSION_CSV  = "submission_scratch.csv"
SUBMISSION_XLSX = "submission_scratch.xlsx"

# --- IO / dataloader runtime safety (Windows/Jupyter safe) ---
IS_WINDOWS = (os.name == "nt")
NUM_WORKERS = 0 if IS_WINDOWS else 2         # KEY: avoid multiprocessing on Windows
PERSISTENT_WORKERS = False
PIN_MEMORY = torch.cuda.is_available()
LOG_EVERY_N = 50                              # fallback batch logging if tqdm unavailable

# Progress bars: force console (no ipywidgets)
FORCE_CONSOLE_TQDM = True
if FORCE_CONSOLE_TQDM:
    os.environ["TQDM_NOTEBOOK"] = "0"
    try:
        from tqdm import tqdm  # console bar
    except Exception:
        tqdm = None
else:
    tqdm = None

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

# ------------------- Load data -------------------
assert os.path.exists(TRAIN_PATH) and os.path.exists(TEST_PATH) and os.path.exists(SUB_PATH), \
    "Place train.csv, test.csv, sample_submission.csv in the working directory."

TEXT_COLS = ['body','rule','subreddit','positive_example_1','positive_example_2','negative_example_1','negative_example_2']
train_df = pd.read_csv(TRAIN_PATH)
test_df  = pd.read_csv(TEST_PATH)

for df in [train_df, test_df]:
    for c in TEXT_COLS:
        if c in df.columns:
            df[c] = df[c].fillna("").astype(str).str.strip()

def build_input_template(row):
    return " [SEP] ".join([
        f"[COMMENT] {row['body']}",
        f"[RULE] {row['rule']}",
        f"[POS_EX_1] {row['positive_example_1']}",
        f"[POS_EX_2] {row['positive_example_2']}",
        f"[NEG_EX_1] {row['negative_example_1']}",
        f"[NEG_EX_2] {row['negative_example_2']}",
        f"[SUBREDDIT] r/{row['subreddit']}"
    ])

if "input_text" not in train_df.columns:
    train_df["input_text"] = train_df.apply(build_input_template, axis=1)
    test_df["input_text"]  = test_df.apply(build_input_template, axis=1)

# ------------------- Utils -------------------
def set_seed(seed:int=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

def now_iso(): return datetime.now(timezone.utc).isoformat()

def combo_key(params:Dict[str,Any])->str:
    s = json.dumps({k:params[k] for k in sorted(params)}, sort_keys=True)
    return hashlib.md5(s.encode("utf-8")).hexdigest()

def load_done_keys(path:str)->set:
    if not os.path.exists(path): return set()
    try:
        df = pd.read_csv(path)
        return set(df["key"].astype(str).tolist()) if "key" in df.columns else set()
    except Exception:
        return set()

def append_result_row(row:Dict[str,Any], path=RESULTS_CSV):
    df = pd.DataFrame([row], columns=list(row.keys()))
    if os.path.exists(path): df.to_csv(path, mode="a", header=False, index=False)
    else:                    df.to_csv(path, index=False)

os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(TB_LOGDIR_BASE, exist_ok=True)

# ------------------- Tokenizer/Vocab -------------------
TOKEN_RE = re.compile(r"[A-Za-z0-9_']+")
def tokenize(s): return TOKEN_RE.findall((s or "").lower())

VOCAB_CACHE: Dict[int, Dict[str,int]] = {}
def build_vocab(df:pd.DataFrame, vocab_size:int=30000)->Dict[str,int]:
    if vocab_size in VOCAB_CACHE: return VOCAB_CACHE[vocab_size]
    from collections import Counter
    cnt = Counter()
    for col in ["body","rule"]:
        for txt in df[col].tolist():
            cnt.update(tokenize(txt))
    vocab = {"<pad>":0, "<unk>":1}
    for i,(tok,_) in enumerate(cnt.most_common(vocab_size-2), start=2):
        vocab[tok] = i
    VOCAB_CACHE[vocab_size] = vocab
    return vocab

def encode_text(s, vocab, max_len):
    ids = [vocab.get(t,1) for t in tokenize(s)][:max_len]
    if len(ids) < max_len: ids += [0]*(max_len-len(ids))
    return np.array(ids, dtype=np.int64)

class ScratchDataset(Dataset):
    def __init__(self, df, vocab, seq_len, with_labels=True):
        self.df=df.reset_index(drop=True); self.vocab=vocab; self.seq_len=seq_len; self.with_labels=with_labels
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        r = self.df.loc[i]
        half = self.seq_len//2
        x = np.concatenate([encode_text(r["body"], self.vocab, half),
                            encode_text(r["rule"], self.vocab, half)])
        if self.with_labels:
            y = int(r["rule_violation"])
            return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.float32)
        return torch.tensor(x, dtype=torch.long)

def make_dataloader(ds: Dataset, batch_size: int, shuffle: bool) -> DataLoader:
    kwargs = dict(batch_size=batch_size, shuffle=shuffle, num_workers=NUM_WORKERS)
    if NUM_WORKERS > 0:
        kwargs["prefetch_factor"] = 2
        kwargs["persistent_workers"] = PERSISTENT_WORKERS
    if torch.cuda.is_available():
        kwargs["pin_memory"] = PIN_MEMORY
    return DataLoader(ds, **kwargs)

# ------------------- Model -------------------
def parse_kernel_sizes(spec:str):
    ks = []
    for k in str(spec).split("-"):
        k = k.strip()
        if k.isdigit(): ks.append(int(k))
    return ks or [3,5]

def channel_schedule(start:int, blocks:int, growth:str):
    chs = [start]
    for _ in range(1, blocks):
        if growth == "x1.5": chs.append(int(round(chs[-1]*1.5)))
        elif growth == "x2": chs.append(chs[-1]*2)
        else:                chs.append(chs[-1])
    return chs

class TextCNN(nn.Module):
    def __init__(self, vocab_size, emb_dim, conv_blocks, channels_start,
                 channel_growth, kernel_sizes_spec, use_batchnorm=True,
                 pooling="max", dropout=0.2):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        ks = parse_kernel_sizes(kernel_sizes_spec)
        chs = channel_schedule(channels_start, conv_blocks, channel_growth)
        self.blocks = nn.ModuleList()
        in_ch = emb_dim
        for bi in range(conv_blocks):
            k = ks[min(bi, len(ks)-1)]
            out_ch = chs[bi]
            conv = nn.Conv1d(in_ch, out_ch, kernel_size=k, padding=k//2)
            bn   = nn.BatchNorm1d(out_ch) if use_batchnorm else nn.Identity()
            self.blocks.append(nn.Sequential(conv, bn, nn.ReLU()))
            in_ch = out_ch
        self.pooling = pooling
        self.drop = nn.Dropout(dropout)
        self.fc = nn.Linear(in_ch, 1)

    def forward(self, x):
        e = self.emb(x).transpose(1,2)   # [B,E,L]
        h = e
        for blk in self.blocks: h = blk(h)
        if self.pooling == "avg": h = F.adaptive_avg_pool1d(h,1).squeeze(-1)
        else:                     h = F.adaptive_max_pool1d(h,1).squeeze(-1)
        h = self.drop(h)
        return self.fc(h).squeeze(-1)

# ------------------- Loss/Optim/Val -------------------
class BCEWithLS(nn.Module):
    def __init__(self, smoothing=0.0): super().__init__(); self.s=smoothing
    def forward(self, logits, targets):
        if self.s>0: targets = targets*(1-self.s)+0.5*self.s
        return F.binary_cross_entropy_with_logits(logits, targets)

class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, smoothing=0.0): super().__init__(); self.g=gamma; self.s=smoothing
    def forward(self, logits, targets):
        p = torch.sigmoid(logits)
        if self.s>0: targets = targets*(1-self.s)+0.5*self.s
        loss_pos = -targets * ((1-p)**self.g) * torch.log(torch.clamp(p, 1e-8, 1.0))
        loss_neg = -(1-targets) * (p**self.g) * torch.log(torch.clamp(1-p, 1.0-1e-8))
        return (loss_pos+loss_neg).mean()

def get_loss(name, smoothing):
    return FocalLoss(2.0, smoothing) if name=="focal" else BCEWithLS(smoothing)

def make_optimizer(model, name, lr, weight_decay):
    if name == "adamw": return torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif name == "sgd": return torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
    else: raise ValueError(f"Unknown optimizer: {name}")

def _epoch_validate(model, dl, device="cpu"):
    model.eval()
    preds, ys = [], []
    with torch.no_grad():
        for xb,yb in dl:
            xb,yb = xb.to(device), yb.to(device)
            p = torch.sigmoid(model(xb)).detach().cpu().numpy()
            preds.append(p); ys.append(yb.detach().cpu().numpy())
    preds = np.concatenate(preds); ys = np.concatenate(ys)
    auc = roc_auc_score(ys, preds)
    acc = accuracy_score(ys.astype(int), (preds >= 0.5).astype(int))
    return auc, acc, preds, ys

# ------------------- Train one combo -------------------
def train_eval_once_with_best(params:dict, enable_tb:bool=False):
    set_seed(int(params["seed"]))
    vocab = build_vocab(train_df, int(params["vocab_size"]))
    seq_len = int(params["seq_len"])
    tr, va = train_test_split(train_df, test_size=0.2, random_state=int(params["seed"]),
                              stratify=train_df["rule_violation"])
    ds_tr = ScratchDataset(tr, vocab, seq_len, True)
    ds_va = ScratchDataset(va, vocab, seq_len, True)
    dl_tr = make_dataloader(ds_tr, int(params["batch_size"]), True)
    dl_va = make_dataloader(ds_va, int(params["batch_size"]), False)

    model = TextCNN(
        vocab_size=len(vocab),
        emb_dim=int(params["emb_dim"]),
        conv_blocks=int(params["conv_blocks"]),
        channels_start=int(params["channels_start"]),
        channel_growth=str(params["channel_growth"]),
        kernel_sizes_spec=str(params["kernel_sizes"]),
        use_batchnorm=bool(params["use_batchnorm"]),
        pooling=str(params["pooling"]),
        dropout=float(params["dropout"])
    ).to(DEVICE)

    opt = make_optimizer(model, str(params["optimizer"]), float(params["learning_rate"]), float(params["weight_decay"]))
    loss_fn = get_loss(str(params["loss_fn"]), float(params["label_smoothing"]))
    grad_clip = float(params["grad_clip"])
    epochs = int(params["epochs"])

    pos_weight = None
    if str(params["class_weighting"])=="balanced":
        pos_weight = torch.tensor([(len(tr)-tr["rule_violation"].sum())/(tr["rule_violation"].sum()+1e-6)], device=DEVICE)

    tb = None
    tb_run_dir = None
    if enable_tb:
        try:
            from torch.utils.tensorboard import SummaryWriter
            tag = (
                f"emb{params['emb_dim']}_cb{params['conv_blocks']}_ch{params['channels_start']}"
                f"_lr{params['learning_rate']}_bs{params['batch_size']}"
            )
            tb_run_dir = os.path.join(TB_LOGDIR_BASE, f"{tag}_{datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%S')}")
            tb = SummaryWriter(log_dir=tb_run_dir)
            tb.add_text("hparams/json", json.dumps(params, indent=2))
        except Exception as e:
            print("TensorBoard unavailable:", e)
            tb = None

    best_auc, best_acc, best_state = -1.0, 0.0, None
    global_step = 0
    no_improve = 0

    for ep in range(epochs):
        model.train()
        iterator = dl_tr if tqdm is None else tqdm(dl_tr, leave=False, desc=f"Epoch {ep+1}/{epochs}")
        for i, (xb, yb) in enumerate(iterator):
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad()
            logits = model(xb)
            loss = (F.binary_cross_entropy_with_logits(logits, yb, pos_weight=pos_weight)
                    if pos_weight is not None else loss_fn(logits, yb))
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
            opt.step()
            if tqdm is None and (i % LOG_EVERY_N == 0):
                print(f"  batch {i:>4}/{len(dl_tr)}  loss={float(loss.item()):.4f}")
            if tb:
                tb.add_scalar("train/loss", float(loss.item()), global_step)
            global_step += 1

        auc, acc, _, _ = _epoch_validate(model, dl_va, device=DEVICE)
        improved = auc > best_auc + 1e-5
        if improved:
            best_auc, best_acc = auc, acc
            best_state = {k: v.detach().cpu() for k,v in model.state_dict().items()}
            no_improve = 0
        else:
            no_improve += 1

        print(f"[SCRATCH] Epoch {ep+1}/{epochs} AUC={auc:.5f} ACC={acc:.4f} "
              f"(best {best_auc:.5f}, patience {no_improve}/{EARLY_STOP_PATIENCE})")
        if tb:
            tb.add_scalar("val/auc", float(auc), ep)
            tb.add_scalar("val/accuracy", float(acc), ep)

        if EARLY_STOP_PATIENCE and no_improve >= EARLY_STOP_PATIENCE:
            print("Early stopping: no improvement.")
            break

    if tb:
        # Snapshot final best metrics + (optional) hparams summary
        tb.add_scalar("val/best_auc", float(best_auc))
        tb.add_scalar("val/best_acc", float(best_acc))
        if TB_WRITE_HPARAMS:
            try:
                # TensorBoard HParams (writes to a separate event file inside this run)
                from torch.utils.tensorboard.summary import hparams
                metric_dict = {"hparam/best_auc": float(best_auc), "hparam/best_acc": float(best_acc)}
                tb.file_writer.add_summary(hparams(params, metric_dict))
            except Exception:
                pass
        tb.close()

    return best_auc, best_acc, best_state, vocab

# ------------------- Predict test with a state -------------------
def predict_test_with_state(best_state, params, vocab, out_csv="submission_scratch.csv"):
    seq_len = int(params["seq_len"])
    class TestDS(Dataset):
        def __init__(self, df, vocab, seq_len):
            self.df=df.reset_index(drop=True); self.vocab=vocab; self.seq_len=seq_len
        def __len__(self): return len(self.df)
        def __getitem__(self, i):
            r = self.df.loc[i]
            half = self.seq_len//2
            x = np.concatenate([encode_text(r["body"], self.vocab, half),
                                encode_text(r["rule"], self.vocab, half)])
            return torch.tensor(x, dtype=torch.long)

    test_ds = TestDS(test_df, vocab, seq_len)
    test_dl = make_dataloader(test_ds, int(params["batch_size"]), False)

    model = TextCNN(
        vocab_size=len(vocab),
        emb_dim=int(params["emb_dim"]),
        conv_blocks=int(params["conv_blocks"]),
        channels_start=int(params["channels_start"]),
        channel_growth=str(params["channel_growth"]),
        kernel_sizes_spec=str(params["kernel_sizes"]),
        use_batchnorm=bool(params["use_batchnorm"]),
        pooling=str(params["pooling"]),
        dropout=float(params["dropout"])
    ).to(DEVICE)
    model.load_state_dict({k: v.to(DEVICE) for k,v in best_state.items()})
    model.eval()

    preds = []
    with torch.no_grad():
        for xb in test_dl:
            xb = xb.to(DEVICE)
            p = torch.sigmoid(model(xb)).detach().cpu().numpy()
            preds.append(p)
    preds = np.concatenate(preds).reshape(-1)

    sub = pd.read_csv(SUB_PATH).copy()
    if "row_id" not in sub.columns:
        if "row_id" in test_df.columns:
            sub = test_df[["row_id"]].copy()
        else:
            sub["row_id"] = np.arange(len(preds))
    sub["rule_violation"] = np.clip(preds, 0, 1)
    sub.to_csv(out_csv, index=False)
    print(f"✅ Wrote {out_csv} (rows={len(sub)})")
    return out_csv

# ------------------- Param space handling -------------------
CONSTANTS_DEFAULT = {
    "vocab_size": 30000,
    "use_batchnorm": True,
    "pooling": "max",
    "optimizer": "adamw",
    "grad_clip": 1.0,
    "scheduler": "none",        # catalog only
    "class_weighting": "none",
    "seed": 42,
}
REQ = ['seq_len','emb_dim','conv_blocks','channels_start','channel_growth','kernel_sizes',
       'dropout','weight_decay','label_smoothing','learning_rate','batch_size','epochs','loss_fn']
INTS   = ["seq_len","emb_dim","conv_blocks","channels_start","batch_size","epochs","seed"]
FLOATS = ["dropout","weight_decay","label_smoothing","learning_rate","grad_clip"]
STRS   = ["channel_growth","kernel_sizes","pooling","optimizer","scheduler","loss_fn","class_weighting"]
BOOLS  = ["use_batchnorm"]

def coerce_one(p:Dict[str,Any])->Dict[str,Any]:
    x = {**CONSTANTS_DEFAULT, **p}
    missing = [k for k in REQ if k not in x]
    if missing: raise KeyError(f"Missing required param(s): {missing}")
    for k in INTS:   x[k] = int(x[k])
    for k in FLOATS: x[k] = float(x[k])
    for k in STRS:   x[k] = str(x[k])
    for k in BOOLS:
        v = x[k]; x[k] = (v.strip().lower() in ("true","1","yes","y")) if isinstance(v,str) else bool(v)
    return x

def expand_grid(space:Dict[str,List[Any]], shuffle=True, seed=42)->List[Dict[str,Any]]:
    from itertools import product
    keys = list(space.keys())
    vals = [space[k] if isinstance(space[k], (list, tuple)) else [space[k]] for k in keys]
    combos = []
    for tup in product(*vals):
        combos.append({k:v for k,v in zip(keys, tup)})
    if shuffle:
        rnd = random.Random(seed); rnd.shuffle(combos)
    return combos

# ------------------- Tuner (grid/random + resume) -------------------
def run_param_space(space:Dict[str,List[Any]],
                    constants:Dict[str,Any]=None,
                    mode:str="grid",        # "grid" or "random"
                    n_samples:int=None,     # only for mode="random"
                    max_trials:int=10,
                    enable_tb:bool=False,
                    save_best_submission:bool=True):
    constants = constants or {}
    grid = expand_grid(space, shuffle=True, seed=int(constants.get("seed", 42)))
    if mode == "random" and n_samples is not None:
        grid = grid[:n_samples]  # shuffled already

    done = load_done_keys(RESULTS_CSV)
    print(f"Total combos: {len(grid)} | Completed in CSV: {len(done)}")

    best_auc = -1.0
    best_payload = None
    ran = 0
    t0 = time.time()

    for idx, raw in enumerate(grid):
        params = coerce_one({**raw, **constants})
        key = combo_key(params)
        if key in done:
            continue

        print(f"\n=== Trial {ran+1}/{max_trials} | idx={idx} ===")
        print({k: params[k] for k in REQ})

        t1 = time.time()
        try:
            auc, acc, state, vocab = train_eval_once_with_best(params, enable_tb)
            status = "ok"
            if SAVE_CHECKPOINTS and state is not None:
                torch.save({"state_dict": state, "params": params},
                           os.path.join(CHECKPOINT_DIR, f"{key}.pt"))
        except Exception as e:
            auc, acc = float("nan"), float("nan")
            state, vocab = None, None
            status = f"error: {e}"
            print("❌", e)
        dur = time.time() - t1

        row_out = {
            "timestamp": now_iso(),
            "key": key,
            "mode": "scratch",
            "device": DEVICE,
            "python": platform.python_version(),
            "grid_idx": idx,
            "val_auc": auc,
            "val_acc": acc,
            "runtime_sec": round(dur,2),
            "status": status,
            **{f"hp/{k}": params[k] for k in sorted(params)}
        }
        append_result_row(row_out, RESULTS_CSV)
        ran += 1

        if status == "ok" and auc > best_auc:
            best_auc = auc
            best_payload = (state, params, vocab)
            
            # Save submission after every successful trial (if enabled)
            if SAVE_EVERY_TRIAL_SUBMISSION:
                trial_submission_csv = f"submission_trial_{ran:03d}_auc_{auc:.4f}.csv"
                trial_submission_xlsx = f"submission_trial_{ran:03d}_auc_{auc:.4f}.xlsx"
                
                predict_test_with_state(state, params, vocab, out_csv=trial_submission_csv)
                try:
                    sub_df = pd.read_csv(trial_submission_csv)
                    with pd.ExcelWriter(trial_submission_xlsx, engine="xlsxwriter") as w:
                        sub_df.to_excel(w, sheet_name="submission", index=False)
                    print(f"✅ Wrote trial submission: {trial_submission_csv} and {trial_submission_xlsx}")
                except Exception as e:
                    print(f"Note: could not write XLSX for trial {ran}:", e)

        if ran >= max_trials:
            break

    print(f"\nSession done. Ran {ran} trial(s) in {round(time.time()-t0,2)}s.")
    if best_payload and save_best_submission:
        state, params, vocab = best_payload
        predict_test_with_state(state, params, vocab, out_csv=SUBMISSION_CSV)
        try:
            sub_df = pd.read_csv(SUBMISSION_CSV)
            with pd.ExcelWriter(SUBMISSION_XLSX, engine="xlsxwriter") as w:
                sub_df.to_excel(w, sheet_name="submission", index=False)
            print(f"✅ Wrote {SUBMISSION_XLSX}")
        except Exception as e:
            print("Note: could not write XLSX submission:", e)
    else:
        print("No submission written this session.")

# ============================================================
# DEFINE YOUR PARAM SPACE HERE (laptop-safe; resume lets you add more)
# 
# HOW TO ADD MORE PARAMETERS:
# 1. Add new parameter to PARAM_SPACE with a list of values to try
# 2. Update your model/training code to use the new parameter
# 3. Add parameter to REQ list if it's required for model creation
# 4. The system will automatically generate all combinations
#
# EXAMPLES:
# - Add new optimizers: optimizer=["adam", "adamw", "sgd", "rmsprop"]
# - Add new architectures: model_type=["cnn", "transformer", "lstm"]
# - Add new data augmentation: augmentation=["none", "backtranslation", "paraphrase"]
# ============================================================
PARAM_SPACE = dict(
    # Capacity/structure
    seq_len=[200, 224, 256, 288, 320],  # Expanded sequence lengths
    emb_dim=[96, 128, 160, 192, 224],   # More embedding dimensions
    conv_blocks=[1, 2, 3],              # More convolution blocks
    channels_start=[96, 128, 160, 192], # More starting channels
    channel_growth=["x1.2", "x1.5", "x2.0"],  # Different growth rates
    kernel_sizes=["3-5-7", "3-5-7-9", "5-7-9"],  # More kernel size combinations

    # Optimization/regularization
    optimizer=["adam", "adamw", "sgd"],  # More optimizers
    learning_rate=[5e-4, 8e-4, 1e-3, 1.2e-3, 1.5e-3],  # More learning rates
    batch_size=[32, 64, 128, 256],       # More batch sizes
    epochs=[6, 8, 10, 12],               # More epoch options
    dropout=[0.1, 0.15, 0.2, 0.25, 0.3], # More dropout rates
    weight_decay=[0, 1e-5, 1e-4, 2e-4, 5e-4],  # More weight decay options
    label_smoothing=[0.0, 0.01, 0.03, 0.05],   # More label smoothing options
    loss_fn=["bce_logits", "focal_loss"],       # More loss functions
    
    # NEW: Additional hyperparameters
    warmup_epochs=[0, 1, 2],             # Learning rate warmup
    scheduler_type=["none", "cosine", "step", "plateau"],  # Learning rate schedulers
    grad_clip=[0.5, 1.0, 1.5, 2.0],     # Gradient clipping values
    activation=["relu", "gelu", "swish"], # Activation functions
    pooling_type=["max", "avg", "attention"],  # Pooling methods
    use_batchnorm=[True, False],         # Batch normalization toggle
    use_residual=[True, False],          # Residual connections
    attention_heads=[1, 2, 4],           # Multi-head attention (if using attention pooling)
)

# Constants applied to every combo (change here if needed)
CONSTANTS = dict(
    vocab_size=30000,
    class_weighting="none",
    seed=42,
    # Note: use_batchnorm, pooling, grad_clip, scheduler are now in PARAM_SPACE
)

# ============================================================
# GO: run grid (or random sample) with resume
# Start TensorBoard in a terminal:  tensorboard --logdir tb_scratch_tune
# ============================================================
if __name__ == "__main__":
    run_param_space(
        PARAM_SPACE,
        constants=CONSTANTS,
        mode="grid",             # or "random"
        n_samples=None,          # only used for mode="random"
        max_trials=200, #MAX_TRIALS_PER_RUN
        enable_tb=ENABLE_TENSORBOARD,
        save_best_submission=SAVE_BEST_SESSION_SUBMISSION
    )


Device: cpu
Total combos: 1728 | Completed in CSV: 27

=== Trial 1/200 | idx=27 ===
{'seq_len': 200, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76391 ACC=0.6798 (best 0.76391, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79005 ACC=0.6921 (best 0.79005, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80017 ACC=0.7291 (best 0.80017, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.80735 ACC=0.7094 (best 0.80735, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.80282 ACC=0.7241 (best 0.80735, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.80473 ACC=0.7143 (best 0.80735, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.80294 ACC=0.7020 (best 0.80735, patience 3/3)
Early stopping: no improvement.

=== Trial 2/200 | idx=28 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.78012 ACC=0.7069 (best 0.78012, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76367 ACC=0.5739 (best 0.78012, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78784 ACC=0.7020 (best 0.78784, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78886 ACC=0.6847 (best 0.78886, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78481 ACC=0.7044 (best 0.78886, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.75070 ACC=0.6847 (best 0.78886, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78791 ACC=0.7143 (best 0.78886, patience 3/3)
Early stopping: no improvement.

=== Trial 3/200 | idx=29 ===
{'seq_len': 256, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.79507 ACC=0.6305 (best 0.79507, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78029 ACC=0.6897 (best 0.79507, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77803 ACC=0.6970 (best 0.79507, patience 2/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79085 ACC=0.7094 (best 0.79507, patience 3/3)
Early stopping: no improvement.

=== Trial 4/200 | idx=30 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74779 ACC=0.5665 (best 0.74779, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78502 ACC=0.7143 (best 0.78502, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75303 ACC=0.7020 (best 0.78502, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.73672 ACC=0.5345 (best 0.78502, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77330 ACC=0.6847 (best 0.78502, patience 3/3)
Early stopping: no improvement.

=== Trial 5/200 | idx=31 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76791 ACC=0.6995 (best 0.76791, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77735 ACC=0.6182 (best 0.77735, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79801 ACC=0.7143 (best 0.79801, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79869 ACC=0.6872 (best 0.79869, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79602 ACC=0.6921 (best 0.79869, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.79692 ACC=0.7143 (best 0.79869, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79786 ACC=0.7143 (best 0.79869, patience 3/3)
Early stopping: no improvement.

=== Trial 6/200 | idx=32 ===
{'seq_len': 200, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.79381 ACC=0.6084 (best 0.79381, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78371 ACC=0.6897 (best 0.79381, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78910 ACC=0.6921 (best 0.79381, patience 2/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79612 ACC=0.7143 (best 0.79612, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78510 ACC=0.6995 (best 0.79612, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77752 ACC=0.6921 (best 0.79612, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77813 ACC=0.6970 (best 0.79612, patience 3/3)
Early stopping: no improvement.

=== Trial 7/200 | idx=33 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75248 ACC=0.5148 (best 0.75248, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77954 ACC=0.5714 (best 0.77954, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80367 ACC=0.7167 (best 0.80367, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78782 ACC=0.7192 (best 0.80367, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79420 ACC=0.6946 (best 0.80367, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77269 ACC=0.6921 (best 0.80367, patience 3/3)
Early stopping: no improvement.

=== Trial 8/200 | idx=34 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73791 ACC=0.6675 (best 0.73791, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79913 ACC=0.6970 (best 0.79913, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80684 ACC=0.7217 (best 0.80684, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79073 ACC=0.7118 (best 0.80684, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78587 ACC=0.7044 (best 0.80684, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78206 ACC=0.7094 (best 0.80684, patience 3/3)
Early stopping: no improvement.

=== Trial 9/200 | idx=35 ===
{'seq_len': 256, 'emb_dim': 160, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74245 ACC=0.6650 (best 0.74245, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77396 ACC=0.7069 (best 0.77396, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77488 ACC=0.7118 (best 0.77488, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77891 ACC=0.6995 (best 0.77891, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.76796 ACC=0.6995 (best 0.77891, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77689 ACC=0.6970 (best 0.77891, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78583 ACC=0.6946 (best 0.78583, patience 0/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77803 ACC=0.6798 (best 0.78583, patience 1/3)

=== Trial 10/200 | idx=36 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73121 ACC=0.6724 (best 0.73121, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79570 ACC=0.7044 (best 0.79570, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80007 ACC=0.7291 (best 0.80007, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78840 ACC=0.7118 (best 0.80007, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78956 ACC=0.7044 (best 0.80007, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78512 ACC=0.7044 (best 0.80007, patience 3/3)
Early stopping: no improvement.

=== Trial 11/200 | idx=37 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73956 ACC=0.6995 (best 0.73956, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78434 ACC=0.7118 (best 0.78434, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80211 ACC=0.7118 (best 0.80211, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79080 ACC=0.7020 (best 0.80211, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79010 ACC=0.6970 (best 0.80211, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.79755 ACC=0.7217 (best 0.80211, patience 3/3)
Early stopping: no improvement.

=== Trial 12/200 | idx=38 ===
{'seq_len': 224, 'emb_dim': 160, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.68495 ACC=0.4901 (best 0.68495, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77100 ACC=0.6946 (best 0.77100, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78396 ACC=0.6970 (best 0.78396, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77704 ACC=0.7020 (best 0.78396, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.75864 ACC=0.6675 (best 0.78396, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.75951 ACC=0.6749 (best 0.78396, patience 3/3)
Early stopping: no improvement.

=== Trial 13/200 | idx=39 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76852 ACC=0.6207 (best 0.76852, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79150 ACC=0.6921 (best 0.79150, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79633 ACC=0.7365 (best 0.79633, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.80587 ACC=0.7192 (best 0.80587, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79638 ACC=0.7069 (best 0.80587, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.79536 ACC=0.7143 (best 0.80587, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78917 ACC=0.7069 (best 0.80587, patience 3/3)
Early stopping: no improvement.

=== Trial 14/200 | idx=40 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76978 ACC=0.6232 (best 0.76978, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78701 ACC=0.6946 (best 0.78701, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79558 ACC=0.7266 (best 0.79558, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.80012 ACC=0.7118 (best 0.80012, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79022 ACC=0.7241 (best 0.80012, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78447 ACC=0.7192 (best 0.80012, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79311 ACC=0.7217 (best 0.80012, patience 3/3)
Early stopping: no improvement.

=== Trial 15/200 | idx=41 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76765 ACC=0.6232 (best 0.76765, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79189 ACC=0.7020 (best 0.79189, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79328 ACC=0.7167 (best 0.79328, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.80199 ACC=0.7266 (best 0.80199, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78925 ACC=0.7020 (best 0.80199, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.79602 ACC=0.7167 (best 0.80199, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79046 ACC=0.7094 (best 0.80199, patience 3/3)
Early stopping: no improvement.

=== Trial 16/200 | idx=42 ===
{'seq_len': 200, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.78391 ACC=0.6478 (best 0.78391, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79519 ACC=0.7143 (best 0.79519, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78947 ACC=0.7044 (best 0.79519, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79667 ACC=0.7217 (best 0.79667, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79354 ACC=0.7044 (best 0.79667, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78107 ACC=0.6995 (best 0.79667, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78376 ACC=0.7069 (best 0.79667, patience 3/3)
Early stopping: no improvement.

=== Trial 17/200 | idx=43 ===
{'seq_len': 256, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76891 ACC=0.5394 (best 0.76891, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78660 ACC=0.6576 (best 0.78660, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78510 ACC=0.6872 (best 0.78660, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79655 ACC=0.7094 (best 0.79655, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79274 ACC=0.7044 (best 0.79655, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78762 ACC=0.6921 (best 0.79655, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79092 ACC=0.6946 (best 0.79655, patience 3/3)
Early stopping: no improvement.

=== Trial 18/200 | idx=44 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.77811 ACC=0.5714 (best 0.77811, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78100 ACC=0.7020 (best 0.78100, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77859 ACC=0.7094 (best 0.78100, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77333 ACC=0.7020 (best 0.78100, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.75985 ACC=0.6478 (best 0.78100, patience 3/3)
Early stopping: no improvement.

=== Trial 19/200 | idx=45 ===
{'seq_len': 200, 'emb_dim': 160, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75104 ACC=0.6823 (best 0.75104, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.74345 ACC=0.6650 (best 0.75104, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.76954 ACC=0.6946 (best 0.76954, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76325 ACC=0.6749 (best 0.76954, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.75694 ACC=0.6429 (best 0.76954, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.76398 ACC=0.6798 (best 0.76954, patience 3/3)
Early stopping: no improvement.

=== Trial 20/200 | idx=46 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74206 ACC=0.6429 (best 0.74206, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78706 ACC=0.6823 (best 0.78706, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.74415 ACC=0.6700 (best 0.78706, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.75454 ACC=0.6355 (best 0.78706, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.75197 ACC=0.6650 (best 0.78706, patience 3/3)
Early stopping: no improvement.

=== Trial 21/200 | idx=47 ===
{'seq_len': 200, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74971 ACC=0.6773 (best 0.74971, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79925 ACC=0.6970 (best 0.79925, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.81485 ACC=0.7340 (best 0.81485, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79595 ACC=0.7118 (best 0.81485, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79973 ACC=0.7094 (best 0.81485, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78507 ACC=0.6995 (best 0.81485, patience 3/3)
Early stopping: no improvement.

=== Trial 22/200 | idx=48 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73816 ACC=0.6626 (best 0.73816, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76760 ACC=0.7118 (best 0.76760, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78282 ACC=0.7389 (best 0.78282, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79522 ACC=0.7488 (best 0.79522, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78774 ACC=0.7340 (best 0.79522, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78971 ACC=0.7241 (best 0.79522, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79512 ACC=0.6946 (best 0.79522, patience 3/3)
Early stopping: no improvement.

=== Trial 23/200 | idx=49 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76757 ACC=0.6970 (best 0.76757, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.80891 ACC=0.7241 (best 0.80891, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.76682 ACC=0.7192 (best 0.80891, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76590 ACC=0.5640 (best 0.80891, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78282 ACC=0.6823 (best 0.80891, patience 3/3)
Early stopping: no improvement.

=== Trial 24/200 | idx=50 ===
{'seq_len': 224, 'emb_dim': 160, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.78973 ACC=0.6872 (best 0.78973, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78874 ACC=0.6626 (best 0.78973, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78859 ACC=0.5936 (best 0.78973, patience 2/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.75318 ACC=0.6897 (best 0.78973, patience 3/3)
Early stopping: no improvement.

=== Trial 25/200 | idx=51 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76595 ACC=0.6970 (best 0.76595, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77799 ACC=0.7241 (best 0.77799, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75813 ACC=0.6946 (best 0.77799, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76194 ACC=0.5714 (best 0.77799, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.76323 ACC=0.6355 (best 0.77799, patience 3/3)
Early stopping: no improvement.

=== Trial 26/200 | idx=52 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74772 ACC=0.6847 (best 0.74772, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78961 ACC=0.6921 (best 0.78961, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75354 ACC=0.6798 (best 0.78961, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76796 ACC=0.5640 (best 0.78961, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.76842 ACC=0.6700 (best 0.78961, patience 3/3)
Early stopping: no improvement.

=== Trial 27/200 | idx=53 ===
{'seq_len': 200, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.78167 ACC=0.6872 (best 0.78167, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79150 ACC=0.6182 (best 0.79150, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78978 ACC=0.7118 (best 0.79150, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78578 ACC=0.7192 (best 0.79150, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79070 ACC=0.7094 (best 0.79150, patience 3/3)
Early stopping: no improvement.

=== Trial 28/200 | idx=54 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74932 ACC=0.6872 (best 0.74932, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78971 ACC=0.7192 (best 0.78971, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79862 ACC=0.7217 (best 0.79862, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79915 ACC=0.7217 (best 0.79915, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79563 ACC=0.7118 (best 0.79915, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.79716 ACC=0.6970 (best 0.79915, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.80245 ACC=0.7266 (best 0.80245, patience 0/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.80126 ACC=0.7315 (best 0.80245, patience 1/3)

=== Trial 29/200 | idx=55 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76374 ACC=0.6798 (best 0.76374, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78998 ACC=0.6946 (best 0.78998, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80228 ACC=0.7266 (best 0.80228, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.80852 ACC=0.7020 (best 0.80852, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.80689 ACC=0.7266 (best 0.80852, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.80774 ACC=0.7020 (best 0.80852, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.80624 ACC=0.7069 (best 0.80852, patience 3/3)
Early stopping: no improvement.

=== Trial 30/200 | idx=56 ===
{'seq_len': 200, 'emb_dim': 160, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.71699 ACC=0.4951 (best 0.71699, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78002 ACC=0.7044 (best 0.78002, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78641 ACC=0.6773 (best 0.78641, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.75886 ACC=0.6527 (best 0.78641, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.76311 ACC=0.6897 (best 0.78641, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.76364 ACC=0.6552 (best 0.78641, patience 3/3)
Early stopping: no improvement.

=== Trial 31/200 | idx=57 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76311 ACC=0.5025 (best 0.76311, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77617 ACC=0.6084 (best 0.77617, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79704 ACC=0.7118 (best 0.79704, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79500 ACC=0.7365 (best 0.79704, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79898 ACC=0.7069 (best 0.79898, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78282 ACC=0.6823 (best 0.79898, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79379 ACC=0.6946 (best 0.79898, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.76614 ACC=0.7118 (best 0.79898, patience 3/3)
Early stopping: no improvement.

=== Trial 32/200 | idx=58 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76381 ACC=0.6330 (best 0.76381, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79660 ACC=0.6552 (best 0.79660, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80066 ACC=0.7094 (best 0.80066, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79061 ACC=0.6847 (best 0.80066, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79556 ACC=0.6823 (best 0.80066, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.76648 ACC=0.6749 (best 0.80066, patience 3/3)
Early stopping: no improvement.

=== Trial 33/200 | idx=59 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74677 ACC=0.6773 (best 0.74677, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76930 ACC=0.7094 (best 0.76930, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77408 ACC=0.7069 (best 0.77408, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77796 ACC=0.6872 (best 0.77796, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78330 ACC=0.6921 (best 0.78330, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.76830 ACC=0.6798 (best 0.78330, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77667 ACC=0.6970 (best 0.78330, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.76214 ACC=0.6921 (best 0.78330, patience 3/3)
Early stopping: no improvement.

=== Trial 34/200 | idx=60 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75296 ACC=0.6650 (best 0.75296, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76760 ACC=0.7118 (best 0.76760, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78757 ACC=0.7438 (best 0.78757, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79667 ACC=0.7562 (best 0.79667, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78818 ACC=0.7291 (best 0.79667, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78942 ACC=0.7069 (best 0.79667, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79051 ACC=0.6872 (best 0.79667, patience 3/3)
Early stopping: no improvement.

=== Trial 35/200 | idx=61 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76114 ACC=0.5591 (best 0.76114, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77813 ACC=0.7143 (best 0.77813, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75420 ACC=0.6872 (best 0.77813, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.75117 ACC=0.5961 (best 0.77813, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77556 ACC=0.7020 (best 0.77813, patience 3/3)
Early stopping: no improvement.

=== Trial 36/200 | idx=62 ===
{'seq_len': 200, 'emb_dim': 160, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74714 ACC=0.6626 (best 0.74714, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76641 ACC=0.6970 (best 0.76641, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.76811 ACC=0.6872 (best 0.76811, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76845 ACC=0.6897 (best 0.76845, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.75002 ACC=0.6773 (best 0.76845, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.76981 ACC=0.7044 (best 0.76981, patience 0/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77240 ACC=0.7069 (best 0.77240, patience 0/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.78226 ACC=0.7069 (best 0.78226, patience 0/3)

=== Trial 37/200 | idx=63 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76913 ACC=0.4975 (best 0.76913, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78837 ACC=0.6872 (best 0.78837, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79066 ACC=0.6601 (best 0.79066, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79291 ACC=0.7118 (best 0.79291, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.80636 ACC=0.7069 (best 0.80636, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.80381 ACC=0.7118 (best 0.80636, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78905 ACC=0.6946 (best 0.80636, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.78138 ACC=0.6798 (best 0.80636, patience 3/3)
Early stopping: no improvement.

=== Trial 38/200 | idx=64 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.78602 ACC=0.6995 (best 0.78602, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.81000 ACC=0.7217 (best 0.81000, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.81182 ACC=0.7094 (best 0.81182, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.81274 ACC=0.7143 (best 0.81274, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.81248 ACC=0.7315 (best 0.81274, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.80112 ACC=0.7266 (best 0.81274, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.80847 ACC=0.7488 (best 0.81274, patience 3/3)
Early stopping: no improvement.

=== Trial 39/200 | idx=65 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.71711 ACC=0.4926 (best 0.71711, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76313 ACC=0.6626 (best 0.76313, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77250 ACC=0.6059 (best 0.77250, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77478 ACC=0.6700 (best 0.77478, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77769 ACC=0.7143 (best 0.77769, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77364 ACC=0.7069 (best 0.77769, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77177 ACC=0.6626 (best 0.77769, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.76704 ACC=0.6970 (best 0.77769, patience 3/3)
Early stopping: no improvement.

=== Trial 40/200 | idx=66 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76784 ACC=0.7069 (best 0.76784, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78886 ACC=0.6207 (best 0.78886, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75556 ACC=0.6527 (best 0.78886, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76990 ACC=0.5961 (best 0.78886, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77109 ACC=0.6897 (best 0.78886, patience 3/3)
Early stopping: no improvement.

=== Trial 41/200 | idx=67 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76434 ACC=0.6355 (best 0.76434, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79303 ACC=0.6478 (best 0.79303, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79854 ACC=0.6995 (best 0.79854, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79595 ACC=0.6946 (best 0.79854, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.80058 ACC=0.6823 (best 0.80058, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.80231 ACC=0.6995 (best 0.80231, patience 0/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79626 ACC=0.7094 (best 0.80231, patience 1/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77833 ACC=0.7241 (best 0.80231, patience 2/3)

=== Trial 42/200 | idx=68 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73500 ACC=0.6798 (best 0.73500, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76447 ACC=0.7020 (best 0.76447, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.76915 ACC=0.7069 (best 0.76915, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78607 ACC=0.7069 (best 0.78607, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78333 ACC=0.6970 (best 0.78607, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77510 ACC=0.6724 (best 0.78607, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78141 ACC=0.7069 (best 0.78607, patience 3/3)
Early stopping: no improvement.

=== Trial 43/200 | idx=69 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75328 ACC=0.6675 (best 0.75328, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78760 ACC=0.6872 (best 0.78760, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75133 ACC=0.6675 (best 0.78760, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77252 ACC=0.6232 (best 0.78760, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77158 ACC=0.6404 (best 0.78760, patience 3/3)
Early stopping: no improvement.

=== Trial 44/200 | idx=70 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75505 ACC=0.6724 (best 0.75505, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77269 ACC=0.6995 (best 0.77269, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79614 ACC=0.7315 (best 0.79614, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79966 ACC=0.7167 (best 0.79966, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79039 ACC=0.7217 (best 0.79966, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78592 ACC=0.7389 (best 0.79966, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78988 ACC=0.7044 (best 0.79966, patience 3/3)
Early stopping: no improvement.

=== Trial 45/200 | idx=71 ===
{'seq_len': 200, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76767 ACC=0.6823 (best 0.76767, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.72677 ACC=0.6823 (best 0.76767, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.73718 ACC=0.6921 (best 0.76767, patience 2/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76320 ACC=0.7020 (best 0.76767, patience 3/3)
Early stopping: no improvement.

=== Trial 46/200 | idx=72 ===
{'seq_len': 200, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.78340 ACC=0.7118 (best 0.78340, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77061 ACC=0.5961 (best 0.78340, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79342 ACC=0.7020 (best 0.79342, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79306 ACC=0.6798 (best 0.79342, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79706 ACC=0.7044 (best 0.79706, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.76279 ACC=0.6700 (best 0.79706, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78840 ACC=0.7192 (best 0.79706, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.78706 ACC=0.7167 (best 0.79706, patience 3/3)
Early stopping: no improvement.

=== Trial 47/200 | idx=73 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.72876 ACC=0.6626 (best 0.72876, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76150 ACC=0.6897 (best 0.76150, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77335 ACC=0.6897 (best 0.77335, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77488 ACC=0.6675 (best 0.77488, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77869 ACC=0.7044 (best 0.77869, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77760 ACC=0.6946 (best 0.77869, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78354 ACC=0.7020 (best 0.78354, patience 0/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77660 ACC=0.7044 (best 0.78354, patience 1/3)

=== Trial 48/200 | idx=74 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.72238 ACC=0.4926 (best 0.72238, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77534 ACC=0.6650 (best 0.77534, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79129 ACC=0.6108 (best 0.79129, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78966 ACC=0.7266 (best 0.79129, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79337 ACC=0.6897 (best 0.79337, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77699 ACC=0.7094 (best 0.79337, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77631 ACC=0.6847 (best 0.79337, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77206 ACC=0.7020 (best 0.79337, patience 3/3)
Early stopping: no improvement.

=== Trial 49/200 | idx=75 ===
{'seq_len': 200, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73012 ACC=0.6626 (best 0.73012, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78883 ACC=0.7167 (best 0.78883, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80449 ACC=0.7291 (best 0.80449, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79816 ACC=0.7291 (best 0.80449, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79296 ACC=0.7291 (best 0.80449, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.79393 ACC=0.7020 (best 0.80449, patience 3/3)
Early stopping: no improvement.

=== Trial 50/200 | idx=76 ===
{'seq_len': 200, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73362 ACC=0.6626 (best 0.73362, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77845 ACC=0.6897 (best 0.77845, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75728 ACC=0.7069 (best 0.77845, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76432 ACC=0.7020 (best 0.77845, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.76833 ACC=0.6872 (best 0.77845, patience 3/3)
Early stopping: no improvement.

=== Trial 51/200 | idx=77 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76034 ACC=0.6847 (best 0.76034, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79112 ACC=0.7020 (best 0.79112, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79937 ACC=0.7192 (best 0.79937, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.80425 ACC=0.7143 (best 0.80425, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79296 ACC=0.7020 (best 0.80425, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.80223 ACC=0.6995 (best 0.80425, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79854 ACC=0.7167 (best 0.80425, patience 3/3)
Early stopping: no improvement.

=== Trial 52/200 | idx=78 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.71340 ACC=0.4926 (best 0.71340, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76881 ACC=0.6724 (best 0.76881, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77369 ACC=0.5271 (best 0.77369, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78041 ACC=0.6675 (best 0.78041, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79049 ACC=0.7167 (best 0.79049, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77590 ACC=0.7192 (best 0.79049, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78039 ACC=0.7118 (best 0.79049, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77439 ACC=0.6995 (best 0.79049, patience 3/3)
Early stopping: no improvement.

=== Trial 53/200 | idx=79 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73461 ACC=0.6724 (best 0.73461, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76653 ACC=0.7069 (best 0.76653, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77189 ACC=0.7094 (best 0.77189, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78039 ACC=0.6946 (best 0.78039, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77998 ACC=0.6921 (best 0.78039, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77471 ACC=0.7020 (best 0.78039, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78068 ACC=0.7094 (best 0.78068, patience 0/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.76893 ACC=0.6626 (best 0.78068, patience 1/3)

=== Trial 54/200 | idx=80 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.70740 ACC=0.4926 (best 0.70740, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77228 ACC=0.6724 (best 0.77228, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77845 ACC=0.5542 (best 0.77845, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78114 ACC=0.6527 (best 0.78114, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78068 ACC=0.6847 (best 0.78114, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78095 ACC=0.7143 (best 0.78114, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77927 ACC=0.6946 (best 0.78114, patience 3/3)
Early stopping: no improvement.

=== Trial 55/200 | idx=81 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75481 ACC=0.5172 (best 0.75481, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76638 ACC=0.7118 (best 0.76638, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75956 ACC=0.6921 (best 0.76638, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.74119 ACC=0.5764 (best 0.76638, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.75405 ACC=0.6502 (best 0.76638, patience 3/3)
Early stopping: no improvement.

=== Trial 56/200 | idx=82 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73883 ACC=0.6650 (best 0.73883, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76318 ACC=0.6872 (best 0.76318, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77328 ACC=0.6970 (best 0.77328, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76675 ACC=0.6724 (best 0.77328, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77320 ACC=0.6970 (best 0.77328, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77070 ACC=0.6921 (best 0.77328, patience 3/3)
Early stopping: no improvement.

=== Trial 57/200 | idx=83 ===
{'seq_len': 256, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.77718 ACC=0.6773 (best 0.77718, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.80078 ACC=0.6921 (best 0.80078, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78731 ACC=0.6872 (best 0.80078, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79694 ACC=0.7266 (best 0.80078, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79364 ACC=0.7094 (best 0.80078, patience 3/3)
Early stopping: no improvement.

=== Trial 58/200 | idx=84 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76917 ACC=0.6305 (best 0.76917, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76180 ACC=0.6897 (best 0.76917, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.76648 ACC=0.6232 (best 0.76917, patience 2/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76726 ACC=0.6946 (best 0.76917, patience 3/3)
Early stopping: no improvement.

=== Trial 59/200 | idx=85 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.77012 ACC=0.5025 (best 0.77012, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78274 ACC=0.6626 (best 0.78274, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80005 ACC=0.7167 (best 0.80005, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.80150 ACC=0.7044 (best 0.80150, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79968 ACC=0.7094 (best 0.80150, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77677 ACC=0.6921 (best 0.80150, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79133 ACC=0.6970 (best 0.80150, patience 3/3)
Early stopping: no improvement.

=== Trial 60/200 | idx=86 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75502 ACC=0.6921 (best 0.75502, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78301 ACC=0.7167 (best 0.78301, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.79066 ACC=0.7266 (best 0.79066, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78454 ACC=0.7167 (best 0.79066, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77799 ACC=0.7118 (best 0.79066, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78369 ACC=0.6798 (best 0.79066, patience 3/3)
Early stopping: no improvement.

=== Trial 61/200 | idx=87 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.71359 ACC=0.6281 (best 0.71359, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.75398 ACC=0.6724 (best 0.75398, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77350 ACC=0.6995 (best 0.77350, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78002 ACC=0.6847 (best 0.78002, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77947 ACC=0.7167 (best 0.78002, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77905 ACC=0.7167 (best 0.78002, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78238 ACC=0.7044 (best 0.78238, patience 0/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77454 ACC=0.6921 (best 0.78238, patience 1/3)

=== Trial 62/200 | idx=88 ===
{'seq_len': 224, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.77182 ACC=0.4951 (best 0.77182, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.79022 ACC=0.5985 (best 0.79022, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78930 ACC=0.6823 (best 0.79022, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78956 ACC=0.7118 (best 0.79022, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.80158 ACC=0.7044 (best 0.80158, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78903 ACC=0.6946 (best 0.80158, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.79342 ACC=0.6995 (best 0.80158, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77393 ACC=0.6946 (best 0.80158, patience 3/3)
Early stopping: no improvement.

=== Trial 63/200 | idx=89 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.70910 ACC=0.6552 (best 0.70910, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.75738 ACC=0.7020 (best 0.75738, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77073 ACC=0.7094 (best 0.77073, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77563 ACC=0.6995 (best 0.77563, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78044 ACC=0.6995 (best 0.78044, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77743 ACC=0.6872 (best 0.78044, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77716 ACC=0.6798 (best 0.78044, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77005 ACC=0.6921 (best 0.78044, patience 3/3)
Early stopping: no improvement.

=== Trial 64/200 | idx=90 ===
{'seq_len': 256, 'emb_dim': 160, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.71063 ACC=0.6207 (best 0.71063, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.73828 ACC=0.6675 (best 0.73828, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.76017 ACC=0.6897 (best 0.76017, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76313 ACC=0.7020 (best 0.76313, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.76930 ACC=0.6921 (best 0.76930, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.75114 ACC=0.6700 (best 0.76930, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.76286 ACC=0.6601 (best 0.76930, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.74840 ACC=0.6872 (best 0.76930, patience 3/3)
Early stopping: no improvement.

=== Trial 65/200 | idx=91 ===
{'seq_len': 200, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75558 ACC=0.6158 (best 0.75558, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77468 ACC=0.6798 (best 0.77468, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.75073 ACC=0.7143 (best 0.77468, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.76718 ACC=0.7167 (best 0.77468, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.76214 ACC=0.6897 (best 0.77468, patience 3/3)
Early stopping: no improvement.

=== Trial 66/200 | idx=92 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.72769 ACC=0.6552 (best 0.72769, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76053 ACC=0.6675 (best 0.76053, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.76850 ACC=0.7069 (best 0.76850, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77010 ACC=0.6847 (best 0.77010, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77823 ACC=0.6872 (best 0.77823, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77510 ACC=0.6946 (best 0.77823, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77359 ACC=0.7020 (best 0.77823, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.76738 ACC=0.7020 (best 0.77823, patience 3/3)
Early stopping: no improvement.

=== Trial 67/200 | idx=93 ===
{'seq_len': 224, 'emb_dim': 160, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76184 ACC=0.6872 (best 0.76184, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.71381 ACC=0.6552 (best 0.76184, patience 1/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.74400 ACC=0.6724 (best 0.76184, patience 2/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77723 ACC=0.6995 (best 0.77723, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.75636 ACC=0.6724 (best 0.77723, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.76148 ACC=0.6773 (best 0.77723, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77245 ACC=0.6823 (best 0.77723, patience 3/3)
Early stopping: no improvement.

=== Trial 68/200 | idx=94 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73189 ACC=0.6773 (best 0.73189, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76502 ACC=0.7044 (best 0.76502, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77532 ACC=0.7143 (best 0.77532, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78117 ACC=0.6946 (best 0.78117, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78532 ACC=0.6921 (best 0.78532, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77806 ACC=0.6970 (best 0.78532, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78124 ACC=0.7094 (best 0.78532, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.76687 ACC=0.6798 (best 0.78532, patience 3/3)
Early stopping: no improvement.

=== Trial 69/200 | idx=95 ===
{'seq_len': 256, 'emb_dim': 192, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74993 ACC=0.5690 (best 0.74993, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78590 ACC=0.6429 (best 0.78590, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78223 ACC=0.7020 (best 0.78590, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79167 ACC=0.6847 (best 0.79167, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79133 ACC=0.7020 (best 0.79167, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77932 ACC=0.7069 (best 0.79167, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78546 ACC=0.7020 (best 0.79167, patience 3/3)
Early stopping: no improvement.

=== Trial 70/200 | idx=96 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.0, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74272 ACC=0.6872 (best 0.74272, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76660 ACC=0.7044 (best 0.76660, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.77468 ACC=0.6995 (best 0.77468, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.77199 ACC=0.6872 (best 0.77468, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.77799 ACC=0.6995 (best 0.77799, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77726 ACC=0.6921 (best 0.77799, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78388 ACC=0.6970 (best 0.78388, patience 0/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.78109 ACC=0.7020 (best 0.78388, patience 1/3)

=== Trial 71/200 | idx=97 ===
{'seq_len': 200, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0001, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.71143 ACC=0.6601 (best 0.71143, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.75653 ACC=0.6946 (best 0.75653, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.76915 ACC=0.6970 (best 0.76915, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78284 ACC=0.7020 (best 0.78284, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78612 ACC=0.6921 (best 0.78612, patience 0/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78269 ACC=0.6970 (best 0.78612, patience 1/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78342 ACC=0.6970 (best 0.78612, patience 2/3)


                                                          

[SCRATCH] Epoch 8/8 AUC=0.77808 ACC=0.7020 (best 0.78612, patience 3/3)
Early stopping: no improvement.

=== Trial 72/200 | idx=98 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.25, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0008, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.70600 ACC=0.4926 (best 0.70600, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77367 ACC=0.6601 (best 0.77367, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78430 ACC=0.6281 (best 0.78430, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.78658 ACC=0.7044 (best 0.78658, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78240 ACC=0.6823 (best 0.78658, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78240 ACC=0.5788 (best 0.78658, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.77197 ACC=0.7167 (best 0.78658, patience 3/3)
Early stopping: no improvement.

=== Trial 73/200 | idx=99 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.68485 ACC=0.4926 (best 0.68485, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78546 ACC=0.6847 (best 0.78546, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80410 ACC=0.6576 (best 0.80410, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79704 ACC=0.7291 (best 0.80410, patience 1/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79663 ACC=0.7118 (best 0.80410, patience 2/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.79388 ACC=0.7094 (best 0.80410, patience 3/3)
Early stopping: no improvement.

=== Trial 74/200 | idx=100 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 1, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.74580 ACC=0.6650 (best 0.74580, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.76532 ACC=0.7094 (best 0.76532, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78175 ACC=0.7389 (best 0.78175, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79313 ACC=0.7365 (best 0.79313, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.78240 ACC=0.7217 (best 0.79313, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.78160 ACC=0.7094 (best 0.79313, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78672 ACC=0.6946 (best 0.79313, patience 3/3)
Early stopping: no improvement.

=== Trial 75/200 | idx=101 ===
{'seq_len': 224, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 160, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0012, 'batch_size': 64, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.76915 ACC=0.5714 (best 0.76915, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78704 ACC=0.7118 (best 0.78704, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.78097 ACC=0.7241 (best 0.78704, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.79517 ACC=0.6897 (best 0.79517, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.76155 ACC=0.6995 (best 0.79517, patience 1/3)


                                                          

[SCRATCH] Epoch 6/8 AUC=0.77299 ACC=0.6773 (best 0.79517, patience 2/3)


                                                          

[SCRATCH] Epoch 7/8 AUC=0.78568 ACC=0.7241 (best 0.79517, patience 3/3)
Early stopping: no improvement.

=== Trial 76/200 | idx=102 ===
{'seq_len': 256, 'emb_dim': 128, 'conv_blocks': 2, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.03, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.73949 ACC=0.5172 (best 0.73949, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.77184 ACC=0.7192 (best 0.77184, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.73694 ACC=0.6773 (best 0.77184, patience 1/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.75095 ACC=0.6232 (best 0.77184, patience 2/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.74726 ACC=0.6010 (best 0.77184, patience 3/3)
Early stopping: no improvement.

=== Trial 77/200 | idx=103 ===
{'seq_len': 224, 'emb_dim': 192, 'conv_blocks': 1, 'channels_start': 128, 'channel_growth': 'x1.5', 'kernel_sizes': '3-5-7', 'dropout': 0.2, 'weight_decay': 0.0002, 'label_smoothing': 0.0, 'learning_rate': 0.0012, 'batch_size': 128, 'epochs': 8, 'loss_fn': 'bce_logits'}


                                                          

[SCRATCH] Epoch 1/8 AUC=0.75546 ACC=0.6724 (best 0.75546, patience 0/3)


                                                          

[SCRATCH] Epoch 2/8 AUC=0.78024 ACC=0.6995 (best 0.78024, patience 0/3)


                                                          

[SCRATCH] Epoch 3/8 AUC=0.80279 ACC=0.7389 (best 0.80279, patience 0/3)


                                                          

[SCRATCH] Epoch 4/8 AUC=0.80425 ACC=0.7217 (best 0.80425, patience 0/3)


                                                          

[SCRATCH] Epoch 5/8 AUC=0.79510 ACC=0.7217 (best 0.80425, patience 1/3)


Epoch 6/8:   0%|          | 0/13 [00:00<?, ?it/s]