In [2]:
# Setup:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from torch.utils.data import Dataset
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset
from google.colab import drive
drive.mount('/content/drive')
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings("ignore")

Mounted at /content/drive


In [None]:
!pip install -q kaggle # Install the Kaggle library
from google.colab import files
files.upload() # A button will appear to browse and upload the kaggle.json file

{}

In [3]:
test_set = pd.read_csv("/content/drive/My Drive/cs 155/Project_1/data/test.csv")   ### Change the file path
training_set = pd.read_csv("/content/drive/My Drive/cs 155/Project_1/data/train.csv") ### Change the file path

In [4]:
lb = LabelBinarizer()
lb.fit(training_set["Popularity_Type"])

In [5]:
means = None
stdevs = None

# Define preprocessing transform
def conversion_transform(df, lb=None, is_test = False):
    # drop text columns
    new_df = df.drop(columns=['track_href', 'uri', 'type', 'analysis_url'])

    if is_test == True:
        new_df = new_df.drop(columns=['ID'])

    # Handle dates
    date_column = new_df["track_album_release_date"].astype(str)

    parsed = pd.to_datetime(date_column, format='%Y-%m-%d', errors='coerce')
    new_df["year"] = parsed.dt.year
    new_df["month"] = parsed.dt.month
    new_df["day"] = parsed.dt.day

    # For year-only dates, extract year and default month/day to 1
    year_only_mask = new_df["year"].isna()
    new_df.loc[year_only_mask, "year"] = pd.to_numeric(date_column[year_only_mask], errors='coerce')
    new_df.loc[year_only_mask, "month"] = 1
    new_df.loc[year_only_mask, "day"] = 1

    new_df = new_df.drop(columns='track_album_release_date')

    if not is_test:
        # Binarization of the output
        column_names = list(new_df.columns.values)
        column_names.append(column_names.pop(column_names.index('Popularity_Type')))
        new_df = new_df[column_names]

        # don't fit per fold
        new_df["Popularity_Type"] = lb.transform(new_df["Popularity_Type"]).astype(int)

    # replace all the nan values with zero
    new_df = new_df.fillna(0)

    return new_df

def fit_normalizer(train_df_converted):
  X = train_df_converted.iloc[:,:-1]
  means = X.mean(axis=0)
  stdevs = X.std(axis=0).replace(0,1) # avoid dividing by 0
  feature_columns = train_df_converted.columns[:-1].tolist()
  return means, stdevs, feature_columns

def preprocessing_with_stats(df, lb, means, stdevs, feature_columns, is_test = False):
    conv = conversion_transform(df, lb=lb, is_test=is_test)
    if is_test:
        X = conv[feature_columns]
        X = (X-means)/stdevs
        return X
    else:
        X = conv.iloc[:,:-1]
        y=conv.iloc[:,-1]
        X = (X-means)/stdevs
        return pd.concat([X,y], axis=1)

# Save IDs
test_IDs = test_set['ID'].values

In [6]:
# Music Dataloader
class MusicDataset(Dataset):
    """Music dataset."""

    def __init__(self, dataset, is_test, transform=None):
        """
        Arguments:
            dataset: Pandas dataframe
            transform: Transformation to data
        """

        data = dataset
        if transform:
            data = transform(dataset, is_test)

        # If it's the test set, there are no labels
        if is_test:
            self.X = torch.tensor( data.values, dtype=torch.float32)
            self.y = None

        else:
        # example: last column is label, rest are features
            self.X = torch.tensor(
                data.iloc[:, :-1].values,
                dtype=torch.float32
            )
            self.y = torch.tensor(
                data.iloc[:, -1].values,
                dtype=torch.long
            )

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [9]:
# ---------- Model ----------
def build_model(num_features):
    return nn.Sequential(
        nn.Linear(num_features, 256),
        nn.GELU(),
        nn.Dropout(0.25),

        nn.Linear(256, 128),
        nn.GELU(),
        nn.Dropout(0.25),

        nn.Linear(128, 32),
        nn.GELU(),
        nn.Dropout(0.25),

        nn.Linear(32, 1)   # logits
    )

# ---------- Metrics (convert logits -> probs via sigmoid) ----------
def get_probs_from_logits(model, X):
    model.eval()
    with torch.no_grad():
        logits = model(X).squeeze()
        probs = torch.sigmoid(logits)  # convert to probabilities
    model.train()
    return probs

def get_accuracy(model, dataset, threshold=0.5):
    probs = get_probs_from_logits(model, dataset.X)
    preds = (probs >= threshold).float()
    y = dataset.y.float().squeeze()
    return (preds == y).float().mean().item()

def compute_auc(model, dataset):
    probs = get_probs_from_logits(model, dataset.X).detach().cpu().numpy()
    y_true = dataset.y.detach().cpu().numpy()
    return roc_auc_score(y_true, probs)

def best_threshold_and_acc(model, dataset):
    probs = get_probs_from_logits(model, dataset.X).detach().cpu().numpy()
    y = dataset.y.detach().cpu().numpy()

    best_t, best_acc = 0.5, 0.0
    for t in np.linspace(0.05, 0.95, 91):
        preds = (probs >= t).astype(int)
        acc = (preds == y).mean()
        if acc > best_acc:
            best_acc, best_t = acc, t
    return best_t, best_acc

# ---------- Training helper ----------
def train_one_fold(model, train_loader, loss_fn, optimizer, scheduler=None, device="cpu", epochs=70):
    model.to(device)
    for epoch in range(epochs):
        for data, target in train_loader:
            data = data.to(device)
            target = target.float().to(device)

            optimizer.zero_grad()
            logits = model(data).squeeze()
            loss = loss_fn(logits, target)  # logits + BCEWithLogitsLoss
            loss.backward()
            optimizer.step()

        if scheduler is not None:
            scheduler.step()

# ---------- CV ----------
device = "cpu"
y_strat = training_set["Popularity_Type"].values
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

fold_accs = []
fold_aucs = []
fold_tuned_accs = []
fold_best_ts = []

for fold, (train_idx, val_idx) in enumerate(skf.split(training_set, y_strat), start=1):
    train_df = training_set.iloc[train_idx].reset_index(drop=True)
    val_df   = training_set.iloc[val_idx].reset_index(drop=True)

    # Fold-specific stats (no leakage)
    train_conv = conversion_transform(train_df, lb=lb, is_test=False)
    means, stdevs, feature_cols = fit_normalizer(train_conv)

    train_processed = preprocessing_with_stats(train_df, lb=lb, means=means, stdevs=stdevs,
                                               feature_columns=feature_cols, is_test=False)
    val_processed   = preprocessing_with_stats(val_df, lb=lb, means=means, stdevs=stdevs,
                                               feature_columns=feature_cols, is_test=False)

    train_ds = MusicDataset(train_processed, is_test=False, transform=None)
    val_ds   = MusicDataset(val_processed,   is_test=False, transform=None)

    # Move dataset tensors to device for faster eval
    train_ds.X = train_ds.X.to(device)
    train_ds.y = train_ds.y.to(device)
    val_ds.X = val_ds.X.to(device)
    val_ds.y = val_ds.y.to(device)

    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

    model = build_model(num_features=len(feature_cols))

    # ---- Class imbalance handling via pos_weight ----
    y_train = train_ds.y.float()
    pos = y_train.sum()
    neg = len(y_train) - pos
    pos_weight = (neg / (pos + 1e-8)).detach().cpu()  # scalar tensor on CPU for loss init

    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))

    # ---- Optimizer + weight decay ----
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

    # ---- Scheduler helps often ----
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

    # Train
    train_one_fold(model, train_loader, loss_fn, optimizer, scheduler=scheduler, device=device, epochs=70)

    # Evaluate (after epoch 70)
    val_acc = get_accuracy(model, val_ds, threshold=0.5)
    val_auc = compute_auc(model, val_ds)
    best_t, tuned_acc = best_threshold_and_acc(model, val_ds)

    fold_accs.append(val_acc)
    fold_aucs.append(val_auc)
    fold_tuned_accs.append(tuned_acc)
    fold_best_ts.append(best_t)

    print(f"Fold {fold}: acc@0.5={val_acc:.4f} | best_t={best_t:.2f} | tuned_acc={tuned_acc:.4f} | AUC={val_auc:.4f}")

print("\nCV mean acc:", float(np.mean(fold_accs)))
print("CV std acc:", float(np.std(fold_accs)))
print("CV mean tuned acc:", float(np.mean(fold_tuned_accs)))
print("CV mean AUC:", float(np.mean(fold_aucs)))
print("CV std AUC:", float(np.std(fold_aucs)))
print("Mean best threshold:", float(np.mean(fold_best_ts)))


Fold 1: acc@0.5=0.7206 | best_t=0.31 | tuned_acc=0.7516 | AUC=0.8175
Fold 2: acc@0.5=0.6740 | best_t=0.35 | tuned_acc=0.7115 | AUC=0.7685
Fold 3: acc@0.5=0.6831 | best_t=0.31 | tuned_acc=0.7426 | AUC=0.7835
Fold 4: acc@0.5=0.6934 | best_t=0.34 | tuned_acc=0.7413 | AUC=0.7968
Fold 5: acc@0.5=0.7073 | best_t=0.30 | tuned_acc=0.7448 | AUC=0.8024

CV mean acc: 0.6956551671028137
CV std acc: 0.01666245391822917
CV mean tuned acc: 0.7383557098713712
CV mean AUC: 0.793736081667791
CV std AUC: 0.016703616589952095
Mean best threshold: 0.32199999999999995


Try changing the threshold to 0.32

In [10]:
# ---------- Model ----------
def build_model(num_features):
    return nn.Sequential(
        nn.Linear(num_features, 256),
        nn.GELU(),
        nn.Dropout(0.25),

        nn.Linear(256, 128),
        nn.GELU(),
        nn.Dropout(0.25),

        nn.Linear(128, 32),
        nn.GELU(),
        nn.Dropout(0.25),

        nn.Linear(32, 1)   # logits
    )

# ---------- Helpers ----------
def probs_from_logits(model, X):
    model.eval()
    with torch.no_grad():
        logits = model(X).squeeze()
        probs = torch.sigmoid(logits)
    model.train()
    return probs

def get_accuracy(model, dataset, threshold=0.5):
    probs = probs_from_logits(model, dataset.X)
    preds = (probs >= threshold).float()
    y = dataset.y.float().squeeze()
    return (preds == y).float().mean().item()

def compute_auc(model, dataset):
    probs = probs_from_logits(model, dataset.X).detach().cpu().numpy()
    y_true = dataset.y.detach().cpu().numpy()
    return roc_auc_score(y_true, probs)

def best_threshold_and_acc(model, dataset):
    probs = probs_from_logits(model, dataset.X).detach().cpu().numpy()
    y = dataset.y.detach().cpu().numpy()

    best_t, best_acc = 0.5, 0.0
    for t in np.linspace(0.05, 0.95, 91):
        preds = (probs >= t).astype(int)
        acc = (preds == y).mean()
        if acc > best_acc:
            best_acc, best_t = acc, t
    return best_t, best_acc

def train_one_fold(model, train_loader, loss_fn, optimizer, scheduler=None, device="cpu", epochs=70):
    model.to(device)
    for epoch in range(epochs):
        for data, target in train_loader:
            data = data.to(device)
            target = target.float().to(device)

            optimizer.zero_grad()
            logits = model(data).squeeze()
            loss = loss_fn(logits, target)  # logits + BCEWithLogitsLoss
            loss.backward()
            optimizer.step()

        if scheduler is not None:
            scheduler.step()

# ---------- CV ----------
device = "cpu"
y_strat = training_set["Popularity_Type"].values

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# want one consistent threshold across folds, so use the mean we found:
GLOBAL_THRESHOLD = 0.322

fold_acc_05 = []
fold_acc_global = []
fold_acc_best = []
fold_auc = []
fold_best_t = []

for fold, (train_idx, val_idx) in enumerate(skf.split(training_set, y_strat), start=1):
    train_df = training_set.iloc[train_idx].reset_index(drop=True)
    val_df   = training_set.iloc[val_idx].reset_index(drop=True)

    # Fold-specific stats (no leakage)
    train_conv = conversion_transform(train_df, lb=lb, is_test=False)
    means, stdevs, feature_cols = fit_normalizer(train_conv)

    train_processed = preprocessing_with_stats(train_df, lb=lb, means=means, stdevs=stdevs,
                                               feature_columns=feature_cols, is_test=False)
    val_processed   = preprocessing_with_stats(val_df, lb=lb, means=means, stdevs=stdevs,
                                               feature_columns=feature_cols, is_test=False)

    train_ds = MusicDataset(train_processed, is_test=False, transform=None)
    val_ds   = MusicDataset(val_processed,   is_test=False, transform=None)

    # Move tensors to device
    train_ds.X = train_ds.X.to(device); train_ds.y = train_ds.y.to(device)
    val_ds.X   = val_ds.X.to(device);   val_ds.y   = val_ds.y.to(device)

    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

    model = build_model(num_features=len(feature_cols))

    # Class imbalance handling
    y_train = train_ds.y.float()
    pos = y_train.sum()
    neg = len(y_train) - pos
    pos_weight = (neg / (pos + 1e-8)).detach().cpu()

    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

    train_one_fold(model, train_loader, loss_fn, optimizer, scheduler=scheduler, device=device, epochs=70)

    # Metrics
    auc = compute_auc(model, val_ds)
    acc05 = get_accuracy(model, val_ds, threshold=0.5)

    best_t, acc_best = best_threshold_and_acc(model, val_ds)
    acc_global = get_accuracy(model, val_ds, threshold=GLOBAL_THRESHOLD)

    fold_auc.append(auc)
    fold_acc_05.append(acc05)
    fold_best_t.append(best_t)
    fold_acc_best.append(acc_best)
    fold_acc_global.append(acc_global)

    print(
        f"Fold {fold}: "
        f"acc@0.5={acc05:.4f} | "
        f"acc@global({GLOBAL_THRESHOLD:.3f})={acc_global:.4f} | "
        f"best_t={best_t:.2f} | acc@best_t={acc_best:.4f} | "
        f"AUC={auc:.4f}"
    )

print("\nCV mean AUC:", float(np.mean(fold_auc)))
print("CV std AUC:", float(np.std(fold_auc)))

print("CV mean acc@0.5:", float(np.mean(fold_acc_05)))
print("CV mean acc@global:", float(np.mean(fold_acc_global)))

print("Mean best threshold:", float(np.mean(fold_best_t)))
print("CV mean acc@best_t:", float(np.mean(fold_acc_best)))

Fold 1: acc@0.5=0.7361 | acc@global(0.322)=0.7671 | best_t=0.33 | acc@best_t=0.7684 | AUC=0.8278
Fold 2: acc@0.5=0.6818 | acc@global(0.322)=0.7206 | best_t=0.36 | acc@best_t=0.7296 | AUC=0.7762
Fold 3: acc@0.5=0.6895 | acc@global(0.322)=0.7348 | best_t=0.29 | acc@best_t=0.7490 | AUC=0.7855
Fold 4: acc@0.5=0.7025 | acc@global(0.322)=0.7245 | best_t=0.22 | acc@best_t=0.7309 | AUC=0.7909
Fold 5: acc@0.5=0.7008 | acc@global(0.322)=0.7474 | best_t=0.33 | acc@best_t=0.7487 | AUC=0.8022

CV mean AUC: 0.7965376415608115
CV std AUC: 0.017742677766221654
CV mean acc@0.5: 0.7021218061447143
CV mean acc@global: 0.7388738393783569
Mean best threshold: 0.30599999999999994
CV mean acc@best_t: 0.74534248503576


thresholds vary by fold (0.22 to 0.36), suggesting calibration instability. A simple fix is to choose threshold based on the training folds only (not the validation fold), which avoids peeking and produces a more stable threshold.

In [11]:
def build_model(num_features):
    return nn.Sequential(
        nn.Linear(num_features, 256),
        nn.GELU(),
        nn.Dropout(0.25),
        nn.Linear(256, 128),
        nn.GELU(),
        nn.Dropout(0.25),
        nn.Linear(128, 32),
        nn.GELU(),
        nn.Dropout(0.25),
        nn.Linear(32, 1)   # logits
    )

# ---------- Helpers ----------
def probs_from_logits(model, X):
    model.eval()
    with torch.no_grad():
        logits = model(X).squeeze()
        probs = torch.sigmoid(logits)
    model.train()
    return probs

def get_accuracy_from_probs(probs_np, y_np, threshold):
    preds = (probs_np >= threshold).astype(int)
    return (preds == y_np).mean()

def best_threshold_from_oof_probs(oof_probs, oof_y, grid=None):
    if grid is None:
        grid = np.linspace(0.05, 0.95, 91)
    best_t, best_acc = 0.5, 0.0
    for t in grid:
        acc = get_accuracy_from_probs(oof_probs, oof_y, t)
        if acc > best_acc:
            best_acc, best_t = acc, t
    return best_t, best_acc

def compute_auc(model, dataset):
    probs = probs_from_logits(model, dataset.X).detach().cpu().numpy()
    y_true = dataset.y.detach().cpu().numpy()
    return roc_auc_score(y_true, probs)

def train_epochs(model, train_loader, loss_fn, optimizer, scheduler=None, device="cpu", epochs=70):
    model.to(device)
    for _ in range(epochs):
        for Xb, yb in train_loader:
            Xb = Xb.to(device)
            yb = yb.float().to(device)

            optimizer.zero_grad()
            logits = model(Xb).squeeze()
            loss = loss_fn(logits, yb)
            loss.backward()
            optimizer.step()

        if scheduler is not None:
            scheduler.step()

def make_fold_datasets(train_df, val_df, lb, means, stdevs, feature_cols, device):
    train_processed = preprocessing_with_stats(
        train_df, lb=lb, means=means, stdevs=stdevs,
        feature_columns=feature_cols, is_test=False
    )
    val_processed = preprocessing_with_stats(
        val_df, lb=lb, means=means, stdevs=stdevs,
        feature_columns=feature_cols, is_test=False
    )

    train_ds = MusicDataset(train_processed, is_test=False, transform=None)
    val_ds   = MusicDataset(val_processed,   is_test=False, transform=None)

    # Move tensors onto device
    train_ds.X = train_ds.X.to(device); train_ds.y = train_ds.y.to(device)
    val_ds.X   = val_ds.X.to(device);   val_ds.y   = val_ds.y.to(device)
    return train_ds, val_ds

def make_loss_and_optim(model, y_train_tensor, device, lr=1e-3, weight_decay=1e-4):
    # pos_weight for imbalance: (neg/pos)
    y_train = y_train_tensor.float()
    pos = y_train.sum()
    neg = len(y_train) - pos
    pos_weight = (neg / (pos + 1e-8)).detach().cpu()

    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    return loss_fn, optimizer, scheduler

# ---------- NO-PEEK threshold selection on outer TRAIN only ----------
def select_threshold_no_peek(train_df_outer, y_outer_raw, lb, device,
                             inner_splits=4, epochs=70, batch_size=64, seed=123):
    """
    Returns a threshold chosen using ONLY outer-training data.
    Uses inner StratifiedKFold to create out-of-fold probabilities,
    then chooses threshold maximizing OOF accuracy.
    """
    inner_skf = StratifiedKFold(n_splits=inner_splits, shuffle=True, random_state=seed)

    oof_probs = np.zeros(len(train_df_outer), dtype=np.float64)

    # Inner splits are on outer training set
    for inner_train_idx, inner_holdout_idx in inner_skf.split(train_df_outer, y_outer_raw):
        inner_train_df = train_df_outer.iloc[inner_train_idx].reset_index(drop=True)
        inner_hold_df  = train_df_outer.iloc[inner_holdout_idx].reset_index(drop=True)

        # Fit fold-specific normalizer on INNER TRAIN ONLY
        inner_train_conv = conversion_transform(inner_train_df, lb=lb, is_test=False)
        means, stdevs, feature_cols = fit_normalizer(inner_train_conv)

        inner_train_ds, inner_hold_ds = make_fold_datasets(
            inner_train_df, inner_hold_df, lb, means, stdevs, feature_cols, device
        )

        inner_loader = DataLoader(inner_train_ds, batch_size=batch_size, shuffle=True)

        model = build_model(num_features=len(feature_cols)).to(device)
        loss_fn, optimizer, scheduler = make_loss_and_optim(model, inner_train_ds.y, device)

        train_epochs(model, inner_loader, loss_fn, optimizer, scheduler=scheduler, device=device, epochs=epochs)

        # Get probabilities for inner holdout; store into correct positions of oof_probs
        probs_hold = probs_from_logits(model, inner_hold_ds.X).detach().cpu().numpy().reshape(-1)
        # Map back to the correct indices in the outer train array:
        oof_probs[inner_holdout_idx] = probs_hold

    # Convert outer raw labels -> numeric 0/1 using lb
    oof_y = lb.transform(train_df_outer["Popularity_Type"]).astype(int).reshape(-1)

    best_t, best_acc = best_threshold_from_oof_probs(oof_probs, oof_y)
    return best_t, best_acc

# ---------- Outer CV using no-peek threshold ----------
device = "cpu"

y_strat = training_set["Popularity_Type"].values
outer_skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

outer_acc_at_05 = []
outer_acc_at_nop = []
outer_auc = []
outer_thresholds = []

for fold, (train_idx, val_idx) in enumerate(outer_skf.split(training_set, y_strat), start=1):
    train_df_outer = training_set.iloc[train_idx].reset_index(drop=True)
    val_df_outer   = training_set.iloc[val_idx].reset_index(drop=True)

    # 1) Choose threshold using ONLY outer training data (no-peek)
    best_t_nop, oof_acc = select_threshold_no_peek(
        train_df_outer,
        y_outer_raw=train_df_outer["Popularity_Type"].values,
        lb=lb,
        device=device,
        inner_splits=4,
        epochs=70,
        batch_size=64,
        seed=100 + fold
    )
    outer_thresholds.append(best_t_nop)

    # 2) Train final model on FULL outer training fold
    train_conv = conversion_transform(train_df_outer, lb=lb, is_test=False)
    means, stdevs, feature_cols = fit_normalizer(train_conv)

    train_ds, val_ds = make_fold_datasets(
        train_df_outer, val_df_outer, lb, means, stdevs, feature_cols, device
    )

    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

    model = build_model(num_features=len(feature_cols)).to(device)
    loss_fn, optimizer, scheduler = make_loss_and_optim(model, train_ds.y, device)

    train_epochs(model, train_loader, loss_fn, optimizer, scheduler=scheduler, device=device, epochs=70)

    # 3) Evaluate on outer val using threshold chosen without peeking
    probs_val = probs_from_logits(model, val_ds.X).detach().cpu().numpy().reshape(-1)
    y_val = val_ds.y.detach().cpu().numpy().reshape(-1)

    acc05 = get_accuracy_from_probs(probs_val, y_val, 0.5)
    acc_nop = get_accuracy_from_probs(probs_val, y_val, best_t_nop)
    auc = roc_auc_score(y_val, probs_val)

    outer_acc_at_05.append(acc05)
    outer_acc_at_nop.append(acc_nop)
    outer_auc.append(auc)

    print(
        f"Fold {fold}: "
        f"no-peek_t={best_t_nop:.2f} (OOF acc~{oof_acc:.4f}) | "
        f"val acc@0.5={acc05:.4f} | val acc@no-peek={acc_nop:.4f} | "
        f"val AUC={auc:.4f}"
    )

print("\nNo-peek threshold results")
print("Mean no-peek threshold:", float(np.mean(outer_thresholds)))
print("CV mean acc@0.5:", float(np.mean(outer_acc_at_05)))
print("CV mean acc@no-peek:", float(np.mean(outer_acc_at_nop)))
print("CV mean AUC:", float(np.mean(outer_auc)))
print("CV std AUC:", float(np.std(outer_auc)))

Fold 1: no-peek_t=0.32 (OOF acc~0.7299) | val acc@0.5=0.7296 | val acc@no-peek=0.7633 | val AUC=0.8260
Fold 2: no-peek_t=0.30 (OOF acc~0.7379) | val acc@0.5=0.6688 | val acc@no-peek=0.7050 | val AUC=0.7622
Fold 3: no-peek_t=0.29 (OOF acc~0.7273) | val acc@0.5=0.6882 | val acc@no-peek=0.7413 | val AUC=0.7862
Fold 4: no-peek_t=0.32 (OOF acc~0.7496) | val acc@0.5=0.7050 | val acc@no-peek=0.7296 | val AUC=0.7937
Fold 5: no-peek_t=0.33 (OOF acc~0.7377) | val acc@0.5=0.7047 | val acc@no-peek=0.7474 | val AUC=0.8023

No-peek threshold results
Mean no-peek threshold: 0.312
CV mean acc@0.5: 0.6992767563292199
CV mean acc@no-peek: 0.7373214513134346
CV mean AUC: 0.7940838395923586
CV std AUC: 0.020826642123863846
