In [3]:
import os
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import wandb


In [4]:
def train():
    run = wandb.init(project="rf-window-classification", job_type="training")
    config = run.config

    # 1) Cargar CSV
    feat_file = f"./features_csvs/features_full_tree.csv"
    if not os.path.exists(feat_file):
        raise FileNotFoundError(f"Archivo no encontrado: {feat_file}")

    df = pd.read_csv(feat_file)
    print(df.columns.tolist())

    # 2) Eliminar columnas de metadata si están presentes
    candidate_cols = ["video_id", "segment", "participant", "start_s", "end_s", "modality", "run_count"]
    drop_cols = [c for c in candidate_cols if c in df.columns]
    df = df.drop(columns=drop_cols)

    # 3) Separar X e y
    df["label"] = df["label"].astype(int)
    feature_cols = df.select_dtypes(include=[np.number]).columns.drop("label")
    X = df[feature_cols].values
    y = df["label"].values

    # 4) Escalado opcional
    if config.scaler_type == "standard":
        scaler = StandardScaler()
    elif config.scaler_type == "minmax":
        scaler = MinMaxScaler()
    else:
        scaler = None

    if scaler is not None:
        X = scaler.fit_transform(X)
    wandb.log({"preprocessing/scaler_type": config.scaler_type})

    # 5) Split en entrenamiento y validación
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=config.random_state
    )

    # 6) Entrenar modelo Random Forest
    clf = RandomForestClassifier(
        n_estimators=config.n_estimators,
        max_depth=config.max_depth,
        class_weight=config.class_weight,
        random_state=config.random_state
    )
    clf.fit(X_train, y_train)

    # 7) Evaluación
    y_pred = clf.predict(X_val)
    f1 = f1_score(y_val, y_pred, average="macro")
    wandb.log({"val/f1_macro": f1})
    print(f"✅ F1-macro = {f1:.3f}")

    # 8) Classification report
    report = classification_report(y_val, y_pred, output_dict=True, zero_division=0)
    for label, metrics in report.items():
        if isinstance(metrics, dict):
            for metric_name, value in metrics.items():
                wandb.log({f"{label}/{metric_name}": value})
        else:
            wandb.log({label: metrics})

    # 9) Feature Importances
    feat_importances = pd.Series(clf.feature_importances_, index=feature_cols)
    feat_importances_sorted = feat_importances.sort_values(ascending=False)

    # Log full importances to wandb
    wandb.log({"feature_importance": feat_importances_sorted.to_dict()})
    wandb.log({"feature_importance/duration_s": feat_importances.get("duration_s", 0)})

    run.finish()


In [5]:
sweep_config = {
    "method": "bayes",
    "metric": {
        "name": "val/f1_macro",
        "goal": "maximize"
    },
    "parameters": {
        "window_size_seconds": {
            "values": [10]
        },
        "n_estimators": {
            "values": [50, 100, 200]
        },
        "max_depth": {
            "values": [5, 10, 20]
        },
        "random_state": {
            "value": 42
        },
        "class_weight": {
            "values": ["balanced", "balanced_subsample"]
        },
        "scaler_type": {
            "values": ["standard"]
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="rf-sequence-classification")
print("Sweep ID:", sweep_id)


Create sweep with ID: jncqpahd
Sweep URL: https://wandb.ai/knezevicoluka-tu-delft/rf-sequence-classification/sweeps/jncqpahd
Sweep ID: jncqpahd


In [6]:
# ─── Cell 4: Launch agents directly from the notebook ─────────────────────────

# You can spin up multiple agents (in parallel or sequentially) by running this cell multiple times.
# Each agent will pull one new config from the sweep and run train() under that config.

wandb.agent(sweep_id, function=train, count=10)

# - `count=10` means “run 10 different trials” (or until the sweep ends).
# - Omit `count` if you want to keep going until you manually stop it or exhausting the search space.


[34m[1mwandb[0m: Agent Starting Run: ptynqiz9 with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 20
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10
[34m[1mwandb[0m: Currently logged in as: [33mknezevicoluka[0m ([33mknezevicoluka-tu-delft[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.624


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.79132
0/precision,0.8164
0/recall,0.76773
0/support,31085
1/f1-score,0.45621
1/precision,0.4224
1/recall,0.49591
1/support,10647
accuracy,0.69838
feature_importance/duration_s,0.11003


[34m[1mwandb[0m: Agent Starting Run: fk126hc6 with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.639


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.81301
0/precision,0.81705
0/recall,0.80901
0/support,31085
1/f1-score,0.46444
1/precision,0.45796
1/recall,0.47112
1/support,10647
accuracy,0.7228
feature_importance/duration_s,0.28446


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0mss5x4x with config:
[34m[1mwandb[0m: 	class_weight: balanced
[34m[1mwandb[0m: 	max_depth: 20
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.624


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.79197
0/precision,0.8158
0/recall,0.7695
0/support,31085
1/f1-score,0.45503
1/precision,0.42269
1/recall,0.49272
1/support,10647
accuracy,0.69889
feature_importance/duration_s,0.10959


[34m[1mwandb[0m: Agent Starting Run: 8zekr7qs with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.639


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.81301
0/precision,0.81705
0/recall,0.80901
0/support,31085
1/f1-score,0.46444
1/precision,0.45796
1/recall,0.47112
1/support,10647
accuracy,0.7228
feature_importance/duration_s,0.28446


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8by6wm03 with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.639


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.81301
0/precision,0.81705
0/recall,0.80901
0/support,31085
1/f1-score,0.46444
1/precision,0.45796
1/recall,0.47112
1/support,10647
accuracy,0.7228
feature_importance/duration_s,0.28446


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a2d55s8v with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.639


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.81301
0/precision,0.81705
0/recall,0.80901
0/support,31085
1/f1-score,0.46444
1/precision,0.45796
1/recall,0.47112
1/support,10647
accuracy,0.7228
feature_importance/duration_s,0.28446


[34m[1mwandb[0m: Agent Starting Run: 6omcbrm7 with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.639


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.81301
0/precision,0.81705
0/recall,0.80901
0/support,31085
1/f1-score,0.46444
1/precision,0.45796
1/recall,0.47112
1/support,10647
accuracy,0.7228
feature_importance/duration_s,0.28446


[34m[1mwandb[0m: Agent Starting Run: d52ix83b with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.639


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.81301
0/precision,0.81705
0/recall,0.80901
0/support,31085
1/f1-score,0.46444
1/precision,0.45796
1/recall,0.47112
1/support,10647
accuracy,0.7228
feature_importance/duration_s,0.28446


[34m[1mwandb[0m: Agent Starting Run: kos01heg with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.639


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.81301
0/precision,0.81705
0/recall,0.80901
0/support,31085
1/f1-score,0.46444
1/precision,0.45796
1/recall,0.47112
1/support,10647
accuracy,0.7228
feature_importance/duration_s,0.28446


[34m[1mwandb[0m: Agent Starting Run: yz4rafqm with config:
[34m[1mwandb[0m: 	class_weight: balanced_subsample
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	scaler_type: standard
[34m[1mwandb[0m: 	window_size_seconds: 10


['accelX_filtered_mean', 'accelX_filtered_var', 'accelX_filtered_energy', 'accelY_filtered_mean', 'accelY_filtered_var', 'accelY_filtered_energy', 'accelZ_filtered_mean', 'accelZ_filtered_var', 'accelZ_filtered_energy', 'accelX_filtered_deriv_mean', 'accelX_filtered_deriv_std', 'accelY_filtered_deriv_mean', 'accelY_filtered_deriv_std', 'accelZ_filtered_deriv_mean', 'accelZ_filtered_deriv_std', 'SMA', 'corr_xy', 'corr_xz', 'corr_yz', 'modality', 'video_id', 'segment', 'participant', 'start_s', 'end_s', 'duration_s', 'run_count', 'level', 'label']
✅ F1-macro = 0.639


0,1
0/f1-score,▁
0/precision,▁
0/recall,▁
0/support,▁
1/f1-score,▁
1/precision,▁
1/recall,▁
1/support,▁
accuracy,▁
feature_importance/duration_s,▁

0,1
0/f1-score,0.81301
0/precision,0.81705
0/recall,0.80901
0/support,31085
1/f1-score,0.46444
1/precision,0.45796
1/recall,0.47112
1/support,10647
accuracy,0.7228
feature_importance/duration_s,0.28446
