In [1]:
# ─── Cell 1: Imports + wandb Initialization ───────────────────────────────────

import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from hmmlearn.hmm import GaussianHMM

import wandb

# We’ll define our train() function below, which calls wandb.init() internally.
# No global wandb.init() here—each run gets its own init inside train().


In [11]:
# ─── Cell 2 (UPDATED): Define the `train()` function with the fix for classification report logging ────────────────────────────────────

def train():
    """
    This function will be called by wandb.agent() for each combination of hyperparameters.
    Inside, it pulls everything from wandb.config, loads the appropriate CSV, builds sequences,
    trains the HMM, sweeps over alpha, and logs metrics to wandb.
    """
    # 1) Initialize a new wandb run (with config = wandb.config auto‐populated by the agent).
    run = wandb.init(project="hmm-sequence-classification", job_type="training")
    config = run.config

    # 2) Load / Aggregate data by window_size_seconds.
    feat_file = f"./features_csvs/features_segments_with_audio_{config.window_size_seconds}s.csv"
    if not os.path.exists(feat_file):
        raise FileNotFoundError(f"Expected {feat_file} to exist.")

    df = pd.read_csv(feat_file)
    drop_cols    = ["video_id", "segment", "participant", "window_i", "window_s", "label"]
    feature_cols = [c for c in df.columns if c not in drop_cols]

    # 3) Build sequences of length T = config.T
    T = config.T
    seqs, lengths, seq_labels = [], [], []

    for _, grp in df.groupby(["video_id", "segment", "participant"]):
        grp = grp.sort_values("window_i")
        feats = grp[feature_cols].values
        labs  = grp["label"].values
        n_chunks = len(feats) // T

        for i in range(n_chunks):
            seqs.append(feats[i*T : (i+1)*T])
            lengths.append(T)
            seq_labels.append(int(labs[i*T : (i+1)*T].sum() > 0))

    n_seqs = len(seqs)
    y_seqs = np.array(seq_labels)

    # 4) Stratified train/val split (by‐sequence)
    seq_ids = np.arange(n_seqs)
    train_ids, val_ids = train_test_split(
        seq_ids,
        test_size=0.2,
        stratify=y_seqs,
        random_state=config.random_state
    )

    def stack_by_ids(ids_list):
        X_list, L_list, Y_list = [], [], []
        for idx in ids_list:
            X_list.append(seqs[idx])
            L_list.append(lengths[idx])
            Y_list.append(y_seqs[idx])
        return np.vstack(X_list), np.array(L_list), np.array(Y_list)

    X_tr, len_tr, y_tr = stack_by_ids(train_ids)
    X_val, len_val, y_val = stack_by_ids(val_ids)

    # 5) Optional feature scaling
    if config.scaler_type == "standard":
        scaler = StandardScaler()
    elif config.scaler_type == "minmax":
        scaler = MinMaxScaler()
    else:
        scaler = None

    if scaler is not None:
        X_tr = scaler.fit_transform(X_tr)
        X_val = scaler.transform(X_val)

    wandb.log({"preprocessing/scaler_type": config.scaler_type})

    # 6) Train the GaussianHMM
    # When constructing the HMM:
    model = GaussianHMM(
        n_components=config.n_components,
        covariance_type=config.covariance_type,      # always diagonal
        n_iter=config.n_iter,
        random_state=config.random_state,
        verbose=False
    )
    model.fit(X_tr, lengths=len_tr)

    # Log the final training log‐likelihood
    train_loglik = model.score(X_tr, lengths=len_tr)
    wandb.log({
        "model/n_components": config.n_components,
        "model/covariance_type": config.covariance_type,
        "train/log_likelihood": train_loglik
    })

    # 7) Predict on validation, compute prop1, sweep alpha
    states_val = []
    idx = 0
    for L in len_val:
        st = model.predict(X_val[idx : idx + L])
        states_val.append(st)
        idx += L

    prop1 = np.array([s.sum() / len(s) for s in states_val])

    alphas = np.linspace(0, 1, config.alpha_steps)
    best_f1, best_alpha = -1, None

    for alpha in alphas:
        y_hat = (prop1 >= alpha).astype(int)
        f1 = f1_score(y_val, y_hat, average="macro")
        wandb.log({"val/alpha": alpha, "val/f1_macro": f1})
        if f1 > best_f1:
            best_f1, best_alpha = f1, alpha

    wandb.log({
        "val/best_alpha": best_alpha,
        "val/best_f1_macro": best_f1
    })
    print(f"✅ Mejor α = {best_alpha:.2f}, F1-macro = {best_f1:.3f}")

    # 8) Classification report logging (per‐class and overall metrics)
    y_pred_best = (prop1 >= best_alpha).astype(int)
    report_dict = classification_report(
        y_val, y_pred_best,
        output_dict=True, zero_division=0
    )

    for label, metrics in report_dict.items():
        if isinstance(metrics, dict):
            # For labels "0" and "1", or macro/weighted averages:
            for metric_name, metric_value in metrics.items():
                wandb.log({f"{label}/{metric_name}": metric_value})
        else:
            # For entries like 'accuracy' that are floats:
            wandb.log({label: metrics})

    run.finish()


In [12]:
# ─── Cell 3: Define your sweep configuration in‐notebook ────────────────────────

# This is equivalent to having a sweep.yaml, but as a Python dict:
sweep_config = {
    "method": "bayes", # or "grid" / "random"
    "metric": {
        "name": "val/best_f1_macro",
        "goal": "maximize"
    },
    "parameters": {
        "window_size_seconds": {
            "values": [5, 10, 20]
        },
        "T": {
            "values": [5, 10, 20]
        },
        "n_components": {
            "values": [2, 3, 4]
        },
        "covariance_type": {
            "values": ["diag", "full"]
        },
        "random_state": {
            "value": 42
        },
        "alpha_steps": {
            "value": 51
        },
        "scaler_type": {
            "values": ["none", "standard", "minmax"]
        },
        "n_iter": {
            "value": 100
        }
    }
}

# Create the sweep in‐notebook (returns a sweep_id string)
sweep_id = wandb.sweep(sweep_config, project="hmm-sequence-classification")
print("Sweep ID:", sweep_id)


Create sweep with ID: 0jn8ip4f
Sweep URL: https://wandb.ai/knezevicoluka-tu-delft/hmm-sequence-classification/sweeps/0jn8ip4f
Sweep ID: 0jn8ip4f


In [13]:
# ─── Cell 4: Launch agents directly from the notebook ─────────────────────────

# You can spin up multiple agents (in parallel or sequentially) by running this cell multiple times.
# Each agent will pull one new config from the sweep and run train() under that config.

wandb.agent(sweep_id, function=train, count=10)

# - `count=10` means “run 10 different trials” (or until the sweep ends).
# - Omit `count` if you want to keep going until you manually stop it or exhausting the search space.


[34m[1mwandb[0m: Agent Starting Run: 31931sek with config:
[34m[1mwandb[0m: 	T: 10
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: full
[34m[1mwandb[0m: 	n_components: 3
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 5


✅ Mejor α = 0.70, F1-macro = 0.500


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0.63596
0/precision,0.63227
0/recall,0.63971
0/support,680
1/f1-score,0.3648
1/precision,0.36856
1/recall,0.36111
1/support,396
accuracy,0.53717
macro avg/f1-score,0.50038


[34m[1mwandb[0m: Agent Starting Run: 3zwofrn9 with config:
[34m[1mwandb[0m: 	T: 10
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: full
[34m[1mwandb[0m: 	n_components: 2
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: none
[34m[1mwandb[0m: 	window_size_seconds: 20


Model is not converging.  Current: 164913.1369780108 is not greater than 164943.42064255945. Delta is -30.283664548653178


✅ Mejor α = 0.00, F1-macro = 0.408


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0
0/precision,0
0/recall,0
0/support,69
1/f1-score,0.81501
1/precision,0.68778
1/recall,1
1/support,152
accuracy,0.68778
macro avg/f1-score,0.40751


[34m[1mwandb[0m: Agent Starting Run: 69dv2b32 with config:
[34m[1mwandb[0m: 	T: 20
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: full
[34m[1mwandb[0m: 	n_components: 3
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


✅ Mejor α = 0.46, F1-macro = 0.517


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0.32061
0/precision,0.33871
0/recall,0.30435
0/support,69
1/f1-score,0.71383
1/precision,0.69811
1/recall,0.73026
1/support,152
accuracy,0.59729
macro avg/f1-score,0.51722


[34m[1mwandb[0m: Agent Starting Run: jhe0hsh6 with config:
[34m[1mwandb[0m: 	T: 20
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: full
[34m[1mwandb[0m: 	n_components: 4
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 20


[34m[1mwandb[0m: [32m[41mERROR[0m Run jhe0hsh6 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/75/lrrp6ghd2csd748_l4sy8y0c0000gn/T/ipykernel_29366/276456944.py", line 42, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     train_ids, val_ids = train_test_split(
[34m[1mwandb[0m: [32m[41mERROR[0m                          ^^^^^^^^^^^^^^^^^
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/_param_validation.py", line 214, in wrapper
[34m[1mwandb[0m: [32m[41mERROR[0m     return func(*args, **kwargs)
[34m[1mwandb[0m: [32m[41mERROR[0m            ^^^^

✅ Mejor α = 0.12, F1-macro = 0.552


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0.35484
0/precision,0.4
0/recall,0.31884
0/support,69
1/f1-score,0.74843
1/precision,0.71687
1/recall,0.78289
1/support,152
accuracy,0.63801
macro avg/f1-score,0.55163


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cswlpo5z with config:
[34m[1mwandb[0m: 	T: 5
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: diag
[34m[1mwandb[0m: 	n_components: 4
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: none
[34m[1mwandb[0m: 	window_size_seconds: 5


✅ Mejor α = 0.82, F1-macro = 0.312


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0.23051
0/precision,0.77244
0/recall,0.13547
0/support,1779
1/f1-score,0.39352
1/precision,0.2534
1/recall,0.88027
1/support,593
accuracy,0.32167
macro avg/f1-score,0.31201


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 75tjua30 with config:
[34m[1mwandb[0m: 	T: 20
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: diag
[34m[1mwandb[0m: 	n_components: 3
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: minmax
[34m[1mwandb[0m: 	window_size_seconds: 5


✅ Mejor α = 0.92, F1-macro = 0.498


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0.42932
0/precision,0.48521
0/recall,0.38498
0/support,213
1/f1-score,0.56574
1/precision,0.52015
1/recall,0.62009
1/support,229
accuracy,0.50679
macro avg/f1-score,0.49753


[34m[1mwandb[0m: Agent Starting Run: ovhi2i8a with config:
[34m[1mwandb[0m: 	T: 20
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: full
[34m[1mwandb[0m: 	n_components: 2
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: minmax
[34m[1mwandb[0m: 	window_size_seconds: 10


✅ Mejor α = 0.16, F1-macro = 0.541


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0.32479
0/precision,0.39583
0/recall,0.27536
0/support,69
1/f1-score,0.75692
1/precision,0.71098
1/recall,0.80921
1/support,152
accuracy,0.64253
macro avg/f1-score,0.54085


[34m[1mwandb[0m: Agent Starting Run: l5y6qg5n with config:
[34m[1mwandb[0m: 	T: 20
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: full
[34m[1mwandb[0m: 	n_components: 3
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: minmax
[34m[1mwandb[0m: 	window_size_seconds: 10


✅ Mejor α = 0.42, F1-macro = 0.554


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0.4
0/precision,0.38158
0/recall,0.42029
0/support,69
1/f1-score,0.70707
1/precision,0.72414
1/recall,0.69079
1/support,152
accuracy,0.60633
macro avg/f1-score,0.55354


[34m[1mwandb[0m: Agent Starting Run: qo5sftnm with config:
[34m[1mwandb[0m: 	T: 20
[34m[1mwandb[0m: 	alpha_steps: 51
[34m[1mwandb[0m: 	covariance_type: full
[34m[1mwandb[0m: 	n_components: 2
[34m[1mwandb[0m: 	n_iter: 100
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: minmax
[34m[1mwandb[0m: 	window_size_seconds: 10


✅ Mejor α = 0.16, F1-macro = 0.541


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
macro avg/f1-score,▁

0,1
0/f1-score,0.32479
0/precision,0.39583
0/recall,0.27536
0/support,69
1/f1-score,0.75692
1/precision,0.71098
1/recall,0.80921
1/support,152
accuracy,0.64253
macro avg/f1-score,0.54085
