In [None]:
# =============================================================================
#  EMG Gesture Classification – Stimulus-1  (Ninapro DB-1)
#  SOTA pipeline (determinista) / Israel Huentecura ✨  • Mayo 2025
# =============================================================================
#  • Filtra únicamente los 12 primeros gestos (restimulus ∈ 1…12, Stimulus 1)
#  • Segmenta en sub-ventanas (win_len=20) y agrupa T_SUBWIN=5 → una “secuencia”
#  • Extrae 10 rasgos manuales × canal  (100 features/ventana)
#  • Modelo híbrido:  CNN-2D por ventana  +  Dense(64) sobre rasgos  →
#                     Concatenate → 2 bloques Transformer Encoder →
#                     GlobalAvgPool → Dense → Softmax(12)
#  • Buenas prácticas: determinismo, escalar sólo TRAIN, tf.data, EarlyStop,
#                      ReduceLROnPlateau, checkpoints en ./models
# =============================================================================
import os, random, datetime as dt, shutil, json
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import (Input, Dense, Conv2D, GlobalAveragePooling2D,
                                     BatchNormalization, Activation, Dropout,
                                     TimeDistributed, Concatenate,
                                     LayerNormalization, MultiHeadAttention,
                                     GlobalAveragePooling1D)
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from scipy import signal
from tqdm import tqdm
import datetime

# ---------- 0. CONFIGURACIÓN GLOBAL ------------------------------------------
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)
os.environ["TF_DETERMINISTIC_OPS"] = "1"        # kernels cuDNN deterministas

DATA_ROOT  = "data/ninapro/db1_processed"
RUNS_DIR   = "runs"
MODELS_DIR = "models"
for d in (RUNS_DIR, MODELS_DIR):
    os.makedirs(d, exist_ok=True)

T_SUBWIN   = 5          # nº sub-ventanas por “secuencia” (gesto)
WIN_LEN    = 20         # muestras por sub-ventana (200 ms si fs=100 Hz)
HANDCRAFT_PER_CH = 10   # número de rasgos tiempo-dominio por canal
BATCH_SIZE = 64
EPOCHS     = 100
TEST_PCT   = 0.2
SUBJECTS   = list(range(1, 28))   # 27 sujetos de DB-1
FS         = 100                  # Hz

# ---------- 1. PREPROCESADO ---------------------------------------------------
def butter_lowpass_1hz(fs):
    return signal.butter(1, 1/(0.5*fs), btype="low")
B_LP, A_LP = butter_lowpass_1hz(FS)

def mu_law(x, u=256):
    return np.sign(x) * np.log1p(u*np.abs(x)) / np.log1p(u)

def compute_handcrafted_features(segment, thr=0.01):
    """10 rasgos básicos por canal (std, RMS, min, max, ZC, MAV-diff, max 1/4,
       MAV, WL, Count(Larger than thr)."""
    f = []
    for ch in range(segment.shape[1]):
        s = segment[:, ch]
        f.extend([
            np.std(s), np.sqrt(np.mean(s**2)), np.min(s), np.max(s),
            np.sum(np.diff(np.sign(s)) != 0), np.mean(np.abs(np.diff(s))),
            np.max(s[:max(1, len(s)//4)]), np.mean(np.abs(s)),
            np.sum(np.abs(np.diff(s))), np.sum(np.abs(np.diff(s)) > thr)
        ])
    return np.asarray(f, dtype=np.float32)

def extract_features(X_seq):
    n_seq, T, win, n_ch, _ = X_seq.shape   # X_seq siempre 5-D
    feats = np.zeros((n_seq, T, n_ch*HANDCRAFT_PER_CH), np.float32)
    for i in tqdm(range(n_seq), desc="Extracting features"):
        for t in range(T):
            seg = X_seq[i, t, :, :, 0]     # (win, ch)
            feats[i, t] = compute_handcrafted_features(seg)
    return feats

def load_subject(path):
    emgs        = np.loadtxt(os.path.join(path, "emg.txt"))
    labels      = np.loadtxt(os.path.join(path, "restimulus.txt"))
    repetitions = np.loadtxt(os.path.join(path, "rerepetition.txt"))

    # pre-filtrado por gesto (solo Stimulus 1 → restimulus 1-12)
    mask = (labels >= 1) & (labels <= 12)
    emgs, labels, repetitions = emgs[mask], labels[mask], repetitions[mask]

    # filtrado y companding
    emgs = signal.filtfilt(B_LP, A_LP, emgs, axis=0)
    emgs = mu_law(emgs)

    data_tr, lbl_tr, data_val, lbl_val = [], [], [], []
    for i in range(0, len(labels) - WIN_LEN, 1):      # step=1
        lab = int(labels[i])
        # requiere que las WIN_LEN muestras pertenezcan al mismo gesto
        if lab != int(labels[i + WIN_LEN - 1]): 
            continue
        win = emgs[i : i+WIN_LEN]                     # (20,10)
        rep = int(repetitions[i])
        (data_val if rep in (2,5,7) else data_tr).append(win)
        (lbl_val  if rep in (2,5,7) else lbl_tr ).append(lab)
    return (np.asarray(data_tr), np.asarray(lbl_tr)), \
           (np.asarray(data_val), np.asarray(lbl_val))

def load_all_subjects(sids):
    tr_d, tr_l, va_d, va_l = [], [], [], []
    for sid in sids:
        path = os.path.join(DATA_ROOT, f"s{sid}")
        (dt, lb), (dv, lv) = load_subject(path)
        tr_d.append(dt); tr_l.append(lb)
        va_d.append(dv); va_l.append(lv)
    tr_d, tr_l = np.concatenate(tr_d), np.concatenate(tr_l)
    va_d, va_l = np.concatenate(va_d), np.concatenate(va_l)
    return (tr_d, tr_l), (va_d, va_l)

# ---------- 2. CARGA + SEGMENTACIÓN A SECUENCIAS -----------------------------
(train_data, train_lbl), (val_data, val_lbl) = load_all_subjects(SUBJECTS)

# Añadir eje canal-extra para Conv2D más adelante → (N, win, ch, 1)
train_data = train_data[..., np.newaxis]
val_data   = val_data  [..., np.newaxis]

def to_sequences(segs, lbls, T=T_SUBWIN):
    n_seq = segs.shape[0] // T
    seqs = segs[:n_seq*T].reshape(n_seq, T, *segs.shape[1:]).astype(np.float32)
    # etiqueta de la secuencia = modo de las T etiquetas
    seq_labels = np.array([np.bincount(lbls[i*T : (i+1)*T]).argmax()
                           for i in range(n_seq)])
    return seqs, seq_labels

train_X, train_y_lbl = to_sequences(train_data, train_lbl)
val_X,   val_y_lbl   = to_sequences(val_data,   val_lbl)

# ---------- 3. CARACTERÍSTICAS MANUALES --------------------------------------
train_feats = extract_features(train_X)
val_feats   = extract_features(val_X)

# ---------- 4. SPLIT TRAIN/TEST  + ESCALADO ROBUSTO --------------------------
X_tr_raw, X_te_raw, F_tr, F_te, y_tr_lbl, y_te_lbl = train_test_split(
    train_X, train_feats, train_y_lbl,
    test_size=TEST_PCT, stratify=train_y_lbl, random_state=SEED
)
feat_dim = F_tr.shape[-1]                          # 100 (=10×10)
scaler   = StandardScaler().fit(F_tr.reshape(-1, feat_dim))
def scale(F): return scaler.transform(F.reshape(-1, feat_dim)).reshape(F.shape)
F_tr, F_te, val_feats = scale(F_tr), scale(F_te), scale(val_feats)

# ---------- 5. ETIQUETAS ONE-HOT --------------------------------------------
le = LabelEncoder().fit(np.concatenate([y_tr_lbl, y_te_lbl, val_y_lbl]))
NUM_CLASSES = len(le.classes_)                    # = 12
y_tr = to_categorical(le.transform(y_tr_lbl), NUM_CLASSES)
y_val= to_categorical(le.transform(val_y_lbl), NUM_CLASSES)
y_te = to_categorical(le.transform(y_te_lbl), NUM_CLASSES)

# ---------- 6. tf.data Pipelines ---------------------------------------------
def make_ds(inputs, labels, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((inputs, labels))
    if shuffle:
        ds = ds.shuffle(len(labels), seed=SEED, reshuffle_each_iteration=True)
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

def dictify(ds):
    """Convierte ((raw, feats), y) → ({raw_input:raw, feat_input:feats}, y)."""
    return ds.map(lambda x, y: ({"raw_input": x[0], "feat_input": x[1]}, y),
                  num_parallel_calls=tf.data.AUTOTUNE)

train_ds = dictify(make_ds((X_tr_raw, F_tr), y_tr, shuffle=True))
val_ds   = dictify(make_ds((val_X, val_feats), y_val))
test_ds  = dictify(make_ds((X_te_raw, F_te), y_te))



Extracting features: 100%|██████████| 165258/165258 [13:14<00:00, 208.03it/s]
Extracting features: 100%|██████████| 71353/71353 [04:59<00:00, 238.37it/s]


In [2]:
# =============================================================================
#  EMG Gesture Recognition – Stimulus-1  (Ninapro DB-1)
#  Multi-model benchmark  •  Israel Huentecura ✨  •  May 2025
# =============================================================================
#  - Pre-procesado y split idénticos a tu pipeline
#  - Compara 4 arquitecturas (ver tabla arriba)
# =============================================================================
import os, random, datetime as dt, json, shutil, math
import numpy as np, pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import (EarlyStopping, ReduceLROnPlateau,
                                        ModelCheckpoint, TensorBoard)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from scipy import signal
from tqdm import tqdm
import matplotlib.pyplot as plt
import datetime

# ---------- 0. GLOBAL CFG -----------------------------------------------------
SEED          = 42
np.random.seed(SEED); random.seed(SEED); tf.random.set_seed(SEED)
os.environ["TF_DETERMINISTIC_OPS"] = "1"
DATA_ROOT     = "data/ninapro/db1_processed"
SPLIT_DIR     = "splits"; RUNS_DIR = "runs"; MODELS_DIR = "models"
for d in (SPLIT_DIR, RUNS_DIR, MODELS_DIR): os.makedirs(d, exist_ok=True)

# ---------- 1. PRE-PROCESADO --------------------------------------------------
WIN_LEN   = 20          # 200 ms
T_SUBWIN  = 5
HANDCRAFT_PER_CH = 10
FS        = 100
TEST_PCT  = 0.20

def butter_lowpass_1hz(fs):  return signal.butter(1, 1/(0.5*fs), "low")
B_LP, A_LP = butter_lowpass_1hz(FS)
def mu_law(x, u=256):  return np.sign(x)*np.log1p(u*np.abs(x))/np.log1p(u)

def compute_handcrafted(seg, thr=0.01):
    f = []
    for s in seg.T:
        f += [
            np.std(s), math.sqrt(np.mean(s**2)), s.min(), s.max(),
            np.sum(np.diff(np.sign(s))!=0), np.mean(np.abs(np.diff(s))),
            s[:max(1,len(s)//4)].max(), np.mean(np.abs(s)),
            np.sum(np.abs(np.diff(s))), np.sum(np.abs(np.diff(s))>thr)
        ]
    return np.asarray(f, np.float32)

def extract_features(X5d):
    n, T, _, _, _ = X5d.shape
    feats = np.zeros((n, T, 10*HANDCRAFT_PER_CH), np.float32)
    for i in tqdm(range(n), desc="Features"):
        for t in range(T):
            feats[i, t] = compute_handcrafted(X5d[i,t,:,:,0])
    return feats

def load_subject(path):
    emg   = np.loadtxt(os.path.join(path,"emg.txt"))
    label = np.loadtxt(os.path.join(path,"restimulus.txt"))
    rep   = np.loadtxt(os.path.join(path,"rerepetition.txt"))
    mask  = (label>=1)&(label<=12)           # Stimulus-1
    emg, label, rep = emg[mask], label[mask], rep[mask]

    emg = mu_law(signal.filtfilt(B_LP,A_LP,emg,axis=0))
    d_tr,l_tr,d_v,l_v=[],[],[],[]
    for i in range(0,len(label)-WIN_LEN):
        if label[i]!=label[i+WIN_LEN-1]: continue
        w  = emg[i:i+WIN_LEN]
        r  = int(rep[i])
        (d_v if r in (2,5,7) else d_tr).append(w)
        (l_v if r in (2,5,7) else l_tr).append(int(label[i]))
    return (np.asarray(d_tr),np.asarray(l_tr)),(np.asarray(d_v),np.asarray(l_v))

SUBJECTS = list(range(1,28))
tr_d,tr_l,va_d,va_l=[],[],[],[]
for s in SUBJECTS:
    (dt,lb),(dv,lv) = load_subject(os.path.join(DATA_ROOT,f"s{s}"))
    tr_d.append(dt); tr_l.append(lb); va_d.append(dv); va_l.append(lv)
train_data, train_lbl = np.concatenate(tr_d), np.concatenate(tr_l)
val_data,   val_lbl   = np.concatenate(va_d), np.concatenate(va_l)

train_data = train_data[...,None]; val_data = val_data[...,None]

def to_sequences(segs,lbls,T):
    n = segs.shape[0]//T
    seq = segs[:n*T].reshape(n,T,*segs.shape[1:])
    lbl = np.array([np.bincount(lbls[i*T:(i+1)*T]).argmax() for i in range(n)])
    return seq.astype(np.float32), lbl
train_X, train_y_lbl = to_sequences(train_data,train_lbl,T_SUBWIN)
val_X,   val_y_lbl   = to_sequences(val_data,  val_lbl,  T_SUBWIN)

train_feats, val_feats = extract_features(train_X), extract_features(val_X)

# ---------- 2. SPLIT TRAIN/TEST CON ÍNDICES FIJOS ----------------------------
SPLIT_FILE = os.path.join(SPLIT_DIR,"stim1_seed42.npz")
if os.path.exists(SPLIT_FILE):
    idx = np.load(SPLIT_FILE); tr_idx, te_idx = idx["tr"], idx["te"]
else:
    idx_all = np.arange(train_X.shape[0])
    tr_idx, te_idx = train_test_split(idx_all,test_size=TEST_PCT,
                                      stratify=train_y_lbl,random_state=SEED)
    np.savez(SPLIT_FILE,tr=tr_idx,te=te_idx)
X_tr_raw,X_te_raw = train_X[tr_idx],train_X[te_idx]
F_tr,F_te         = train_feats[tr_idx],train_feats[te_idx]
y_tr_lbl,y_te_lbl = train_y_lbl[tr_idx],train_y_lbl[te_idx]

# ---------- 3. ESCALADO FEATURES Y ONE-HOT -----------------------------------
feat_dim = F_tr.shape[-1]
scaler = StandardScaler().fit(F_tr.reshape(-1,feat_dim))
def scale(F): return scaler.transform(F.reshape(-1,feat_dim)).reshape(F.shape)
F_tr,F_te,val_feats = scale(F_tr),scale(F_te),scale(val_feats)

le = LabelEncoder().fit(np.concatenate([y_tr_lbl,y_te_lbl,val_y_lbl]))
NUM_CLASSES = len(le.classes_)          # 12
y_tr = to_categorical(le.transform(y_tr_lbl),NUM_CLASSES)
y_val= to_categorical(le.transform(val_y_lbl),NUM_CLASSES)
y_te = to_categorical(le.transform(y_te_lbl),NUM_CLASSES)

# ---------- 4. tf.data --------------------------------------------------------
BATCH_SIZE = 128
def make_ds(x,y,shuffle=False):
    ds=tf.data.Dataset.from_tensor_slices((x,y))
    if shuffle: ds=ds.shuffle(len(y),seed=SEED,reshuffle_each_iteration=True)
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
def dictify(ds):
    return ds.map(lambda x,y: ({"raw":x[0],"feat":x[1]},y),
                  num_parallel_calls=tf.data.AUTOTUNE)

train_ds_h = dictify(make_ds((X_tr_raw,F_tr),y_tr,True))
val_ds_h   = dictify(make_ds((val_X,val_feats),y_val))
test_ds_h  = dictify(make_ds((X_te_raw,F_te),y_te))

train_ds_raw = make_ds(X_tr_raw,y_tr,True)
val_ds_raw   = make_ds(val_X,y_val)
test_ds_raw  = make_ds(X_te_raw,y_te)

RAW_SHAPE = X_tr_raw.shape[2:]   # (20,10,1)



Features: 100%|██████████| 165258/165258 [13:17<00:00, 207.24it/s]
Features: 100%|██████████| 71353/71353 [05:49<00:00, 204.04it/s]


In [3]:
# ╔══════════════════════════════════════════════════════════════════════════╗
# ║  CELDA 1 · GUARDADO COMPLETO                                             ║
# ║  (ejecútala una vez, justo después de generar los datasets y arrays)     ║
# ╚══════════════════════════════════════════════════════════════════════════╝
import os, json, numpy as np, tensorflow as tf

SAVE_ROOT = "saved_datasets"
os.makedirs(SAVE_ROOT, exist_ok=True)

# ---------- utilidades ----------
def save_tf_dataset(ds, name):
    path = os.path.join(SAVE_ROOT, name)
    if os.path.exists(path):
        print(f"⚠️  {name} ya existe → omitido")
    else:
        ds.save(path)
        print(f"✅ Dataset {name} guardado en {path}")

def save_np(name, array):
    path = os.path.join(SAVE_ROOT, f"{name}.npy")
    np.save(path, array)
    print(f"✅ Array {name}.npy guardado")

# ---------- datasets ----------
save_tf_dataset(train_ds_h,   "train_ds_h")
save_tf_dataset(val_ds_h,     "val_ds_h")
save_tf_dataset(test_ds_h,    "test_ds_h")
save_tf_dataset(train_ds_raw, "train_ds_raw")
save_tf_dataset(val_ds_raw,   "val_ds_raw")
save_tf_dataset(test_ds_raw,  "test_ds_raw")

# ---------- arrays numpy ----------
to_save = {
    "train_X_raw":   X_tr_raw,
    "train_X_feats": F_tr,
    "train_y_lbl":   y_tr_lbl,
    "val_X_raw":     val_X,
    "val_X_feats":   val_feats,
    "val_y_lbl":     val_y_lbl,
    "test_X_raw":    X_te_raw,
    "test_X_feats":  F_te,
    "test_y_lbl":    y_te_lbl,
    "y_tr":          y_tr,
    "y_val":         y_val,
    "y_te":          y_te,
}
for k, v in to_save.items():
    save_np(k, v)

# ---------- metadatos ----------
meta = {
    "RAW_SHAPE":  list(RAW_SHAPE),
    "feat_dim":   feat_dim,
    "num_classes": NUM_CLASSES,
}
with open(os.path.join(SAVE_ROOT, "meta.json"), "w") as fp:
    json.dump(meta, fp)
print("✅ meta.json guardado")


⚠️  train_ds_h ya existe → omitido
⚠️  val_ds_h ya existe → omitido
⚠️  test_ds_h ya existe → omitido
⚠️  train_ds_raw ya existe → omitido
⚠️  val_ds_raw ya existe → omitido
⚠️  test_ds_raw ya existe → omitido
✅ Array train_X_raw.npy guardado
✅ Array train_X_feats.npy guardado
✅ Array train_y_lbl.npy guardado
✅ Array val_X_raw.npy guardado
✅ Array val_X_feats.npy guardado
✅ Array val_y_lbl.npy guardado
✅ Array test_X_raw.npy guardado
✅ Array test_X_feats.npy guardado
✅ Array test_y_lbl.npy guardado
✅ Array y_tr.npy guardado
✅ Array y_val.npy guardado
✅ Array y_te.npy guardado
✅ meta.json guardado


In [1]:
# ╔══════════════════════════════════════════════════════════════════════════╗
# ║  CELDA 2 · CARGA COMPLETA                                                ║
# ║  (ejecútala al inicio de cualquier notebook/script que ya parte          ║
# ║   del dataset procesado)                                                 ║
# ╚══════════════════════════════════════════════════════════════════════════╝
import os, json, numpy as np, tensorflow as tf

SAVE_ROOT = "saved_datasets"

def load_np(name, mmap_mode=None):
    path = os.path.join(SAVE_ROOT, f"{name}.npy")
    return np.load(path, allow_pickle=True, mmap_mode=mmap_mode)

# ---------- metadatos ----------
with open(os.path.join(SAVE_ROOT, "meta.json")) as fp:
    meta = json.load(fp)
RAW_SHAPE   = tuple(meta["RAW_SHAPE"])
feat_dim    = meta["feat_dim"]
NUM_CLASSES = meta["num_classes"]

# ---------- datasets ----------
train_ds_h   = tf.data.Dataset.load(os.path.join(SAVE_ROOT, "train_ds_h"))
val_ds_h     = tf.data.Dataset.load(os.path.join(SAVE_ROOT, "val_ds_h"))
test_ds_h    = tf.data.Dataset.load(os.path.join(SAVE_ROOT, "test_ds_h"))
train_ds_raw = tf.data.Dataset.load(os.path.join(SAVE_ROOT, "train_ds_raw"))
val_ds_raw   = tf.data.Dataset.load(os.path.join(SAVE_ROOT, "val_ds_raw"))
test_ds_raw  = tf.data.Dataset.load(os.path.join(SAVE_ROOT, "test_ds_raw"))

# ---------- arrays numpy ----------
X_tr_raw   = load_np("train_X_raw")
F_tr       = load_np("train_X_feats")
y_tr_lbl   = load_np("train_y_lbl")

val_X      = load_np("val_X_raw")
val_feats  = load_np("val_X_feats")
y_val_lbl  = load_np("val_y_lbl")

X_te_raw   = load_np("test_X_raw")
F_te       = load_np("test_X_feats")
y_te_lbl   = load_np("test_y_lbl")

y_tr = load_np("y_tr")
y_val = load_np("y_val")
y_te = load_np("y_te")

print("✅ Todos los datasets, arrays y metadatos cargados correctamente")


✅ Todos los datasets, arrays y metadatos cargados correctamente


In [2]:
import os, random, datetime as dt, json, shutil, math
import numpy as np, pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import (EarlyStopping, ReduceLROnPlateau,
                                        ModelCheckpoint, TensorBoard)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from scipy import signal
from tqdm import tqdm
import matplotlib.pyplot as plt
import datetime
# =============================================================================
# Ninapro‑DB1 · 10‑Fold CV + Hold‑out Test
# Israel Huentecura ✨ · Junio 2025
# =============================================================================
# ‑‑ IMPORTS ------------------------------------------------------------------
import os, random, json, datetime as dt, shutil
from pathlib import Path
import numpy as np, pandas as pd, tensorflow as tf
from sklearn.model_selection import StratifiedKFold, train_test_split
import datetime

# ------------------------------------------------------------------------- #
# 0️⃣  CFG GLOBAL
# ------------------------------------------------------------------------- #
SEED          = 42
np.random.seed(SEED); random.seed(SEED); tf.random.set_seed(SEED)
os.environ["TF_DETERMINISTIC_OPS"] = "1"

DATA_ROOT     = "data/ninapro/db1_processed"
RUNS_DIR      = Path("runs")
MODELS_DIR    = Path("models")
BACKUP_DIR    = RUNS_DIR / "_backup"
for d in (RUNS_DIR, MODELS_DIR, BACKUP_DIR): d.mkdir(exist_ok=True, parents=True)

N_FOLDS     = 10
EPOCHS      = 100
BATCH_SIZE  = 128
TEST_PCT    = 0.20                    # proporción fija para el hold‑out



SEED          = 42
np.random.seed(SEED); random.seed(SEED); tf.random.set_seed(SEED)
os.environ["TF_DETERMINISTIC_OPS"] = "1"
DATA_ROOT     = "data/ninapro/db1_processed"
SPLIT_DIR     = "splits"; RUNS_DIR = "runs"; MODELS_DIR = "models"
for d in (SPLIT_DIR, RUNS_DIR, MODELS_DIR): os.makedirs(d, exist_ok=True)

WIN_LEN   = 20          # 200 ms
T_SUBWIN  = 5
HANDCRAFT_PER_CH = 10
FS        = 100
TEST_PCT  = 0.20

print("feat_dim =", feat_dim, "   NUM_CLASSES =", NUM_CLASSES)
# ---------- 5. MODELOS --------------------------------------------------------
#
# 5-A  Modelo propuesto   ------------------------------------------------------
def build_sota(raw_shape,feat_dim,T,n_cls,heads=4,dim=128,dp=0.3):
    seg_in = layers.Input(raw_shape)
    x=layers.Conv2D(64,(5,1),padding="same",activation="relu")(seg_in)
    x=layers.BatchNormalization()(x)
    x=layers.Conv2D(64,(3,1),padding="same",activation="relu")(x)
    x=layers.BatchNormalization()(x)
    seg_vec = layers.GlobalAveragePooling2D()(x)
    seg_cnn=models.Model(seg_in,seg_vec)

    in_raw  = layers.Input((T,*raw_shape),name="raw")
    in_feat = layers.Input((T,feat_dim),  name="feat")
    r = layers.TimeDistributed(seg_cnn)(in_raw)
    f = layers.TimeDistributed(layers.Dense(64,activation="relu"))(in_feat)
    concat = layers.Concatenate()([r,f])
    proj = layers.Dense(dim)(concat)

    def trans_block(z):
        a = layers.LayerNormalization(epsilon=1e-6)(z)
        a = layers.MultiHeadAttention(num_heads=heads,key_dim=dim,dropout=dp)(a,a)
        z = z+a
        b = layers.LayerNormalization(epsilon=1e-6)(z)
        b = layers.Dense(dim*4,activation="relu")(b)
        b = layers.Dense(dim)(b); b = layers.Dropout(dp)(b)
        return z+b
    z = trans_block(proj); z = trans_block(z)
    z = layers.GlobalAveragePooling1D()(z)
    z = layers.Dropout(dp)(z)
    z = layers.Dense(128,activation="relu")(z)
    z = layers.Dropout(dp)(z)
    out = layers.Dense(n_cls,activation="softmax")(z)
    m = models.Model([in_raw,in_feat],out,name="SOTA_Trans")
    m.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss="categorical_crossentropy",metrics=["accuracy"])
    return m
#
# 5-B  Modelos del usuario  ----------------------------------------------------
class Attention(layers.Layer):
    def build(self,inp_shape):
        u=inp_shape[-1]
        self.W=self.add_weight("W",(u,u),initializer="glorot_uniform")
        self.b=self.add_weight("b",(u,),initializer="zeros")
        self.u=self.add_weight("u",(u,1),initializer="glorot_uniform")
    def call(self,x):
        v=tf.tanh(tf.tensordot(x,self.W,1)+self.b)
        vu=tf.tensordot(v,self.u,1)
        al=tf.nn.softmax(tf.squeeze(vu,-1),1)
        return tf.reduce_sum(x*tf.expand_dims(al,-1),1)

def mobile_cnn(raw_shape,alpha=.75,dp=.2):
    inp=layers.Input(raw_shape)
    x=layers.SeparableConv2D(int(32*alpha),3,padding="same",activation="relu")(inp)
    x=layers.BatchNormalization()(x)
    x=layers.SeparableConv2D(int(64*alpha),3,padding="same",activation="relu")(x)
    x=layers.BatchNormalization()(x)
    x=layers.GlobalAveragePooling2D()(x)
    x=layers.Dropout(dp)(x)
    return models.Model(inp,x)

def build_hybrid_v2(raw_shape,feat_dim,T,n_cls,gru=128,dp=.3):
    cnn=mobile_cnn(raw_shape)
    in_r  = layers.Input((T,*raw_shape),name="raw")
    in_f  = layers.Input((T,feat_dim),  name="feat")
    r=layers.TimeDistributed(cnn)(in_r)
    f=layers.TimeDistributed(layers.Dense(96,activation="relu"))(in_f)
    mrg=layers.Concatenate()([r,f])
    x=layers.Bidirectional(layers.GRU(gru,return_sequences=True,dropout=dp,recurrent_dropout=dp*0.5))(mrg)
    x=Attention()(x); x=layers.Dropout(dp)(x)
    out=layers.Dense(n_cls,activation="softmax",
                     kernel_regularizer=regularizers.l2(1e-4))(x)
    m=models.Model([in_r,in_f],out,name="Hybrid_A2")
    m.compile(optimizer=tf.keras.optimizers.Adam(2e-3),
              loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=.1),
              metrics=["accuracy"])
    return m

def build_emghandnet(raw_shape,T,n_cls,filters=(64,128),lstm=128,dp=.3):
    inp=layers.Input((T,*raw_shape))
    x=inp
    for f in filters:
        x=layers.TimeDistributed(layers.Conv2D(f,(3,3),padding="same",activation="relu"))(x)
        x=layers.TimeDistributed(layers.BatchNormalization())(x)
        x=layers.TimeDistributed(layers.MaxPool2D((2,2)))(x)
    x=layers.TimeDistributed(layers.GlobalAveragePooling2D())(x)
    x=layers.TimeDistributed(layers.Dropout(dp))(x)
    x=layers.Bidirectional(layers.LSTM(lstm,return_sequences=False,dropout=dp))(x)
    out=layers.Dense(n_cls,activation="softmax")(x)
    m=models.Model(inp,out,name="EMGHand")
    m.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss="categorical_crossentropy",metrics=["accuracy"])
    return m

def build_dualstream(raw_shape, feat_dim, T, n_cls,
                     rf=64, ff=64, lstm=128, dp=.3):
    # ⬇⬇  añadimos name="raw" y name="feat"
    in_r = layers.Input((T, *raw_shape),     name="raw")
    in_f = layers.Input((T, feat_dim),       name="feat")

    r = layers.TimeDistributed(
            layers.Reshape((raw_shape[0], raw_shape[1])))(in_r)
    r = layers.TimeDistributed(
            layers.Conv1D(rf, 3, padding="same", activation="relu"))(r)
    r = layers.TimeDistributed(layers.GlobalAveragePooling1D())(r)

    f = layers.TimeDistributed(
            layers.Dense(ff, activation="relu"))(in_f)

    x = layers.Concatenate()([r, f])
    x = layers.TimeDistributed(layers.Dropout(dp))(x)
    x = layers.Bidirectional(
            layers.LSTM(lstm, return_sequences=False, dropout=dp))(x)

    out = layers.Dense(n_cls, activation="softmax")(x)

    m = models.Model([in_r, in_f], out, name="DualStr")
    m.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss="categorical_crossentropy",
              metrics=["accuracy"])
    return m

# ==============================================================================
#  MODELO 1: DualStream - Versión Original (Fiel al Paper) - CORREGIDO
# ==============================================================================

def build_dualstream_original(raw_shape, feat_dim, T, n_cls, lstm_units=200, conv_filters=256, dp=0.3):
    """
    Implementación fiel del modelo Dual Stream LSTM Feature Fusion de Zhang et al. (2024).
    Referencia: https://doi.org/10.3390/s24113631
    
    Características principales:
    - Flujo Raw: Conv1D -> LSTM -> Conv1D
    - Flujo Features: Conv1D -> Conv1D
    - Fusión y Bloque Temporal: Concatenate -> Bi-LSTM -> Bi-LSTM
    """
    # --- Input Streams ---
    in_raw = layers.Input(shape=(T, *raw_shape), name="raw")
    in_feat = layers.Input(shape=(T, feat_dim), name="feat")

    # --- Flujo de Datos Crudos (Raw Data Stream) ---
    r = layers.TimeDistributed(layers.Reshape((raw_shape[0], raw_shape[1])))(in_raw)
    r = layers.TimeDistributed(layers.Conv1D(conv_filters, 3, padding="same", activation="relu"))(r)
    r = layers.TimeDistributed(layers.LSTM(lstm_units, return_sequences=True, dropout=dp))(r)
    r = layers.TimeDistributed(layers.Conv1D(conv_filters, 3, padding="same", activation="relu"))(r)
    r = layers.TimeDistributed(layers.GlobalAveragePooling1D())(r)
    
    # --- Flujo de Características (Feature Stream) ---
    # ¡CORRECCIÓN AQUÍ! Añadimos una dimensión para que Conv1D funcione.
    # La forma pasa de (T, feat_dim) a (T, feat_dim, 1).
    f = layers.Reshape((T, feat_dim, 1))(in_feat)
    
    f = layers.TimeDistributed(layers.Conv1D(conv_filters, 3, padding="same", activation="relu"))(f)
    f = layers.TimeDistributed(layers.Conv1D(conv_filters, 3, padding="same", activation="relu"))(f)
    # GlobalAveragePooling1D reduce la dimensión de "steps" (la de 100), dejando (T, filters)
    f = layers.TimeDistributed(layers.GlobalAveragePooling1D())(f)

    # --- Fusión y Bloque Temporal ---
    x = layers.Concatenate()([r, f])
    x = layers.Bidirectional(layers.LSTM(lstm_units, return_sequences=True, dropout=dp))(x)
    x = layers.Bidirectional(layers.LSTM(lstm_units, return_sequences=False, dropout=dp))(x)

    # --- Clasificador ---
    x = layers.Dense(512, activation="relu")(x)
    x = layers.Dropout(dp)(x)
    out = layers.Dense(n_cls, activation="softmax", name="output")(x)
    
    model = models.Model(inputs=[in_raw, in_feat], outputs=out, name="DualStream_Original")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# ==============================================================================
#  MODELO 2: DualStream - Versión Adaptada y Ligera (Tu Implementación)
# ==============================================================================

def build_dualstream_adaptado(raw_shape, feat_dim, T, n_cls, rf=64, ff=64, lstm=128, dp=0.3):
    """
    Versión adaptada y ligera del concepto DualStream, propuesta en esta tesis.
    (Tu implementación original de 'build_dualstream').
    """
    in_r = layers.Input((T, *raw_shape), name="raw")
    in_f = layers.Input((T, feat_dim), name="feat")
    r = layers.TimeDistributed(layers.Reshape((raw_shape[0], raw_shape[1])))(in_r)
    r = layers.TimeDistributed(layers.Conv1D(rf, 3, padding="same", activation="relu"))(r)
    r = layers.TimeDistributed(layers.GlobalAveragePooling1D())(r)
    f = layers.TimeDistributed(layers.Dense(ff, activation="relu"))(in_f)
    x = layers.Concatenate()([r, f])
    x = layers.TimeDistributed(layers.Dropout(dp))(x)
    x = layers.Bidirectional(layers.LSTM(lstm, return_sequences=False, dropout=dp))(x)
    out = layers.Dense(n_cls, activation="softmax")(x)
    model = models.Model([in_r, in_f], out, name="DualStream_Adaptado")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model


# ==============================================================================
#  MODELO 3: EMGHandNet - Versión Original (Fiel al Paper)
# ==============================================================================

def build_emghandnet_original(raw_shape, T, n_cls, filters=(64, 64, 64, 64), lstm_units=200, dp=0.3):
    """
    Implementación fiel del modelo EMGHandNet de Karnam et al. (2022).
    Referencia: https://www.sciencedirect.com/science/article/abs/pii/S0208521622000080
    
    Características principales:
    - Utiliza exclusivamente convoluciones 1D (Conv1D) para los canales de EMG.
    - Dos capas Bi-LSTM apiladas para el procesamiento temporal.
    """
    inp = layers.Input(shape=(T, *raw_shape), name="raw")
    x = layers.TimeDistributed(layers.Reshape((raw_shape[0], raw_shape[1])))(inp)

    for i, f in enumerate(filters):
        x = layers.TimeDistributed(layers.Conv1D(f, 3, padding="same", activation="relu"), name=f'td_conv1d_{i}')(x)
        x = layers.TimeDistributed(layers.BatchNormalization(), name=f'td_bn_{i}')(x)
        x = layers.TimeDistributed(layers.MaxPool1D(2), name=f'td_pool_{i}')(x)
        
    x = layers.TimeDistributed(layers.Flatten(), name='td_flatten')(x)
    x = layers.Bidirectional(layers.LSTM(lstm_units, return_sequences=True, dropout=dp))(x)
    x = layers.Bidirectional(layers.LSTM(lstm_units, return_sequences=False, dropout=dp))(x)
    x = layers.Dense(512, activation="relu")(x)
    x = layers.Dropout(dp)(x)
    out = layers.Dense(n_cls, activation="softmax", name="output")(x)
    
    model = models.Model(inputs=inp, outputs=out, name="EMGHandNet_Original")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# ==============================================================================
#  MODELO 4: EMGHandNet - Versión Adaptada con Conv2D (Tu Implementación)
# ==============================================================================

def build_emghandnet_adaptado(raw_shape, T, n_cls, filters=(64,128), lstm=128, dp=.3):
    """
    Versión adaptada de EMGHandNet que utiliza Conv2D para tratar las ventanas como imágenes.
    (Tu implementación original de 'build_emghandnet').
    """
    inp = layers.Input((T, *raw_shape), name="raw")
    x = inp
    for f in filters:
        x = layers.TimeDistributed(layers.Conv2D(f, (3, 3), padding="same", activation="relu"))(x)
        x = layers.TimeDistributed(layers.BatchNormalization())(x)
        x = layers.TimeDistributed(layers.MaxPool2D((2, 2)))(x)
    x = layers.TimeDistributed(layers.GlobalAveragePooling2D())(x)
    x = layers.TimeDistributed(layers.Dropout(dp))(x)
    x = layers.Bidirectional(layers.LSTM(lstm, return_sequences=False, dropout=dp))(x)
    out = layers.Dense(n_cls, activation="softmax")(x)
    model = models.Model(inp, out, name="EMGHandNet_Adaptado")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy", metrics=["accuracy"])
    return model


# ==============================================================================
#  MODELO 5: HyT-Net - Modelo Híbrido CNN-Transformer (Propuesto por ti)
# ==============================================================================

def build_hyt_net_propuesto(raw_shape, feat_dim, T, n_cls, heads=4, dim=128, dp=0.3):
    """
    Arquitectura Híbrida CNN-Transformer (HyT-Net) propuesta en esta tesis.
    (Tu implementación original de 'build_sota').
    """
    # --- Rama CNN para ventanas 2D ---
    seg_in = layers.Input(raw_shape, name="seg_input")
    x_cnn = layers.Conv2D(64, (5,1), padding="same", activation="relu")(seg_in)
    x_cnn = layers.BatchNormalization()(x_cnn)
    x_cnn = layers.Conv2D(64, (3,1), padding="same", activation="relu")(x_cnn)
    x_cnn = layers.BatchNormalization()(x_cnn)
    seg_vec = layers.GlobalAveragePooling2D()(x_cnn)
    seg_cnn_model = models.Model(seg_in, seg_vec, name="segment_cnn")

    # --- Entradas y Procesamiento de Secuencias ---
    in_raw = layers.Input((T, *raw_shape), name="raw")
    in_feat = layers.Input((T, feat_dim), name="feat")
    raw_seq = layers.TimeDistributed(seg_cnn_model)(in_raw)
    feat_seq = layers.TimeDistributed(layers.Dense(64, activation="relu"))(in_feat)
    
    # --- Fusión y Proyección ---
    fusion = layers.Concatenate(axis=-1)([raw_seq, feat_seq])
    projection = layers.Dense(dim, activation="linear")(fusion)

    # --- Bloques Transformer ---
    def transformer_encoder_block(seq_input, key_dim, num_heads, dropout_rate):
        x = layers.LayerNormalization(epsilon=1e-6)(seq_input)
        x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, dropout=dropout_rate)(x, x)
        x = layers.Dropout(dropout_rate)(x)
        res = x + seq_input
        x = layers.LayerNormalization(epsilon=1e-6)(res)
        x = layers.Dense(key_dim * 4, activation="relu")(x)
        x = layers.Dropout(dropout_rate)(x)
        x = layers.Dense(key_dim)(x)
        return x + res

    z = transformer_encoder_block(projection, dim, heads, dp)
    z = transformer_encoder_block(z, dim, heads, dp)

    # --- Clasificador ---
    z = layers.GlobalAveragePooling1D()(z)
    z = layers.Dropout(dp)(z)
    z = layers.Dense(128, activation="relu")(z)
    z = layers.Dropout(dp)(z)
    out = layers.Dense(n_cls, activation="softmax", name="output")(z)
    
    model = models.Model(inputs=[in_raw, in_feat], outputs=out, name="HyT-Net_Propuesto")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model



# ---------- 6. CALLBACKS  & EPOCHS -------------------------------------------
def cb(model_name):
    ts= datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    ck=os.path.join(MODELS_DIR,f"{model_name}_{ts}.keras")
    return [
        EarlyStopping(monitor="val_loss",patience=15,restore_best_weights=True),
        ReduceLROnPlateau(monitor="val_loss",factor=.5,patience=5,min_lr=1e-6),
        ModelCheckpoint(ck,monitor="val_accuracy",save_best_only=True,verbose=0),
        TensorBoard(log_dir=os.path.join(RUNS_DIR,f"{model_name}_{ts}"))
    ]

EPOCHS = 100

# ---------- 7. TRAIN ----------------------------------------------------------
# models_to_train=[
#     ("DualStr"   , build_dualstream(RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES),True,  EPOCHS,  train_ds_h, val_ds_h),
#     ("SOTA_Trans", build_sota   (RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES), True,  EPOCHS,   train_ds_h, val_ds_h),
#     ("Hybrid_A2" , build_hybrid_v2(RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES), True,  EPOCHS, train_ds_h, val_ds_h),
#     ("EMGHand"   , build_emghandnet(RAW_SHAPE,T_SUBWIN,NUM_CLASSES),        False, EPOCHS,  train_ds_raw,val_ds_raw),
# ]

models_to_train=[
    ("DualStr-Lite", build_dualstream_adaptado(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True, EPOCHS, train_ds_h, val_ds_h),
    ("DualStr-Original", build_dualstream_original(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True, EPOCHS, train_ds_h, val_ds_h),
    ("EMGHandNet-2D", build_emghandnet(RAW_SHAPE, T_SUBWIN, NUM_CLASSES), False, EPOCHS, train_ds_raw, val_ds_raw),
    ("EMGHandNet-Original", build_emghandnet_original(RAW_SHAPE, T_SUBWIN, NUM_CLASSES), False, EPOCHS, train_ds_raw, val_ds_raw),
    ("HyT-Net", build_hyt_net_propuesto(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True, EPOCHS, train_ds_h, val_ds_h),
    ("CRNN-Attn", build_hybrid_v2(RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES), True,  EPOCHS, train_ds_h, val_ds_h),
]

# ---------- EXTRA: REPORTE DE PARÁMETROS -------------------------------------
def print_model_params(models):
    """Imprime nº de parámetros y tamaño aprox. en memoria para cada modelo."""
    print("\n🧮  Parámetros por modelo")
    for name, model, *_ in models:
        params = model.count_params()
        size_mb = params * 4 / (1024 ** 2)          # 4 bytes por parámetro fp32
        print(f"{name:10s}: {params:,}  (~{size_mb:.2f} MB)")

# Llama a la función para ver el reporte
print_model_params(models_to_train)

# =============================================================================
# cv_pipeline.py · 10‑Fold CV + Resumable Training for Ninapro‑DB1
# Israel Huentecura ✨ · Mayo 2025
# =============================================================================
import os, json, datetime as dt, numpy as np, pandas as pd, tensorflow as tf
from pathlib import Path
from sklearn.model_selection import StratifiedKFold

# ------------------------------------------------------------------------- #
# 0️⃣  RUTAS Y CONSTANTES GLOBALES
# ------------------------------------------------------------------------- #
SEED        = 42
N_FOLDS     = 10
EPOCHS      = 100
BATCH_SIZE  = 128

RUNS_DIR    = Path("runs")
MODELS_DIR  = Path("models")
BACKUP_DIR  = RUNS_DIR / "_backup"          # aquí se guardan estados por batch
STATS_FILE  = RUNS_DIR / "cv_progress.json" # bitácora para saltar folds/‑modelos

for d in (RUNS_DIR, MODELS_DIR, BACKUP_DIR):
    d.mkdir(parents=True, exist_ok=True)


# ------------------------------------------------------------------------- #
# 1️⃣  CARGA / SPLIT DE LOS ARRAYS EN MEMORIA
# ------------------------------------------------------------------------- #
def assemble_arrays_holdout():
    """
    Construye tres juegos:
      • (X_raw, F, y)           → train+val  (para la CV)
      • (X_test_raw, F_test, y_test) → test  (hold‑out)
    A partir de los arrays pre‑procesados que ya tengas en memoria
    (X_tr_raw, F_tr, y_tr, val_X, val_feats, y_val, X_te_raw, F_te, y_te).
    """
    g = globals()
    # --- train+val (se usará dentro de los folds) -------------------------
    tr_val_partsX = []; tr_val_partsF = []; tr_val_partsY = []

    def _add_tv(x, f, y):
        if x in g and y in g:                          # sólo si existen
            tr_val_partsX.append(g[x]); tr_val_partsY.append(g[y])
            if f in g: tr_val_partsF.append(g[f])

    _add_tv('X_tr_raw', 'F_tr',  'y_tr')
    _add_tv('val_X',    'val_feats', 'y_val')

    if not tr_val_partsX:
        raise RuntimeError("❌ No se encontraron arrays de train+val en memoria")

    global X_raw_all, F_all, y_all
    X_raw_all = np.concatenate(tr_val_partsX, 0)
    y_all     = np.concatenate(tr_val_partsY,  0)
    F_all     = (np.concatenate(tr_val_partsF, 0)
                 if tr_val_partsF else None)

    # --- test -------------------------------------------------------------
    global X_raw_test, F_test, y_test
    if 'X_te_raw' not in g or 'y_te' not in g:
        # → Si no tenías test pre‑calculado, házlo con train_test_split
        #   (estratificado y sin reemplazo)
        idx_trval = np.arange(len(y_all))
        X_raw_all, X_raw_test, y_all, y_test, idx_trval, idx_test = \
            train_test_split(X_raw_all, y_all,
                             np.arange(len(y_all)),
                             test_size=TEST_PCT, random_state=SEED,
                             stratify=np.argmax(y_all, 1))
        if F_all is not None:
            F_all, F_test = F_all[idx_trval], F_all[idx_test]
        else:
            F_test = None
    else:
        X_raw_test = g['X_te_raw']; y_test = g['y_te']
        F_test     = g.get('F_te', None)

    print(f"✅  Train+Val: {X_raw_all.shape}  Test: {X_raw_test.shape}")

# ------------------------------------------------------------------------- #
# 2️⃣  tf.data WRAPPER (sin cambios salvo docstring)
# ------------------------------------------------------------------------- #
def make_ds(x_raw, feats, y, uses_feat, shuffle):
    """
    • uses_feat=True  → se crea dict{'raw':…, 'feat':…}
    • uses_feat=False → sólo la rama raw
    """
    if uses_feat and feats is None:
        raise ValueError("El modelo requiere features pero feats=None")
    if uses_feat:
        ds = tf.data.Dataset.from_tensor_slices(({"raw": x_raw, "feat": feats},
                                                 y))
    else:
        ds = tf.data.Dataset.from_tensor_slices((x_raw, y))
    if shuffle:
        ds = ds.shuffle(len(y), seed=SEED, reshuffle_each_iteration=True)
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# ------------------------------------------------------------------------- #
# 3️⃣  CALLBACKS
# ------------------------------------------------------------------------- #
def make_callbacks(tag):
    ck_path   = MODELS_DIR / f"{tag}_BEST.keras"
    backup_to = BACKUP_DIR / tag
    log_dir   = RUNS_DIR  / f"{tag}_{dt.datetime.now():%Y%m%d-%H%M%S}"

    return [
        tf.keras.callbacks.BackupAndRestore(
            backup_to, delete_checkpoint=False),
        tf.keras.callbacks.ModelCheckpoint(
            ck_path, monitor="val_accuracy", save_best_only=True,
            save_weights_only=False, verbose=0),
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=15, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", patience=5, factor=0.5, min_lr=1e-6),
        tf.keras.callbacks.TensorBoard(log_dir=log_dir),
    ]

# ------------------------------------------------------------------------- #
# 4️⃣  BITÁCORA DE CURSOS TERMINADOS
# ------------------------------------------------------------------------- #
STATS_FILE  = RUNS_DIR / "cv_progress.json"
def load_stats():
    return json.loads(STATS_FILE.read_text()) if STATS_FILE.exists() else {}

def mark_done(tag, val_acc):
    stats = load_stats()
    stats[tag] = {"val_acc": float(val_acc)}
    STATS_FILE.write_text(json.dumps(stats, indent=2))

# ------------------------------------------------------------------------- #
# 5️⃣  K‑FOLD  · GUARDA EL MEJOR CKPT POR ARQUITECTURA  🆕
# ------------------------------------------------------------------------- #
def run_kfold(models_spec):
    """
    models_spec = [
        ("Nombre", build_fn, uses_feat_bool),
        ...
    ]
    Devuelve:
      • df_folds    → métrica de cada (modelo, fold)
      • summary_cv  → media ± std por modelo
      • best_ckpts  → {'Nombre': {'ckpt': Path, 'val_acc':…, 'uses_feat':…}}
    """
    skf        = StratifiedKFold(n_splits=N_FOLDS,
                                 shuffle=True, random_state=SEED)
    rows       = []
    done       = load_stats()
    best_ckpts = {name: {"val_acc": -np.inf, "ckpt": None,
                         "uses_feat": uses_feat}
                  for name, _, uses_feat in models_spec}

    for fold, (tr, va) in enumerate(skf.split(X_raw_all,
                                              np.argmax(y_all, 1)), 1):
        X_tr, X_va   = X_raw_all[tr], X_raw_all[va]
        y_tr, y_va   = y_all[tr],     y_all[va]
        F_tr = F_all[tr] if F_all is not None else None
        F_va = F_all[va] if F_all is not None else None

        for name, build_fn, uses_feat in models_spec:
            tag = f"{name}_fold{fold}"
            if tag in done:
                print(f"⏭️  {tag} ya entrenado, se salta")
                continue

            print(f"\n📚  Entrenando {tag}")
            # --- construcción del modelo ----------------------------------
            if uses_feat:
                model = build_fn(RAW_SHAPE, feat_dim,
                                 T_SUBWIN, NUM_CLASSES)
            else:
                model = build_fn(RAW_SHAPE, T_SUBWIN, NUM_CLASSES)

            train_ds = make_ds(X_tr, F_tr, y_tr, uses_feat, shuffle=True)
            val_ds   = make_ds(X_va, F_va, y_va, uses_feat, shuffle=False)

            history  = model.fit(
                train_ds, validation_data=val_ds,
                epochs=EPOCHS, callbacks=make_callbacks(tag), verbose=1).history
            best_va = max(history["val_accuracy"])
            rows.append([name, fold, best_va])

            # --- ¿Mejor ckpt de la arquitectura? -------------------------
            ck_file = MODELS_DIR / f"{tag}_BEST.keras"
            if best_va > best_ckpts[name]["val_acc"]:
                best_ckpts[name]["val_acc"] = best_va
                best_ckpts[name]["ckpt"]    = ck_file

            mark_done(tag, best_va)
            tf.keras.backend.clear_session()

    df_folds = pd.DataFrame(rows, columns=["Model", "Fold", "Val_Acc"])
    summary  = df_folds.groupby("Model").Val_Acc.agg(['mean','std']).round(4)

    print("\n📊  Resumen 10‑fold (Val_Acc)")
    print(summary)
    return df_folds, summary, best_ckpts

# ------------------------------------------------------------------------- #
# 6️⃣  EVALUACIÓN FINAL EN HOLD‑OUT TEST  🆕
# ------------------------------------------------------------------------- #
def evaluate_on_test(best_ckpts):
    """
    Carga el mejor checkpoint de cada arquitectura y lo evalúa sobre el test.
    Devuelve df_test con columnas: Model · Test_Acc · Test_Loss
    """
    test_results = []
    for name, meta in best_ckpts.items():
        ck = meta["ckpt"]
        if ck is None or not ck.exists():
            print(f"⚠️  {name}: no se encontró ckpt; se omite")
            continue
        print(f"🧪  Testing {name}  (ckpt={ck.name})")
        model = tf.keras.models.load_model(ck, compile=True)
        test_ds = make_ds(X_raw_test, F_test, y_test,
                          meta["uses_feat"], shuffle=False)
        loss, acc = model.evaluate(test_ds, verbose=0)
        test_results.append([name, acc, loss])
        tf.keras.backend.clear_session()

    df_test = (pd.DataFrame(test_results,
                            columns=["Model", "Test_Acc", "Test_Loss"])
               .sort_values("Test_Acc", ascending=False)
               .reset_index(drop=True))
    print("\n🎯  Rendimiento en HOLD‑OUT Test")
    print(df_test)
    return df_test

# ------------------------------------------------------------------------- #
# 7️⃣  EJECUCIÓN PRINCIPAL (ejemplo)
# ------------------------------------------------------------------------- #
# 1. Ensambla arrays → genera train+val y test
assemble_arrays_holdout()

# 2. Catálogo de modelos (tus constructores sin tocar)
models_spec = [
    ("DualStream-Lite",      build_dualstream_adaptado,  True),
    ("DualStream-Original",      build_dualstream_original,  True),
    ("CRNN-Attn",             build_hybrid_v2,            True),
    ("EMGHandNet-2D",   build_emghandnet_adaptado,  False),
    ("EMGHandNet-Original",   build_emghandnet_original,  False),
    ("HyT-Net",  build_hyt_net_propuesto,    True),
]

# 3. 10‑fold
df_folds, df_summary, best_ckpts = run_kfold(models_spec)

# 4. Evaluación en test externo
df_test = evaluate_on_test(best_ckpts)

# 5. Guarda DataFrames de resultados
df_folds.to_csv(RUNS_DIR / "folds_val_acc.csv",    index=False)
df_summary.to_csv(RUNS_DIR / "cv_summary.csv")
df_test.to_csv(RUNS_DIR / "test_summary.csv",      index=False)

feat_dim = 100    NUM_CLASSES = 12

🧮  Parámetros por modelo
DualStr-Lite: 274,700  (~1.05 MB)
DualStr-Original: 3,039,148  (~11.59 MB)
EMGHandNet-2D: 341,516  (~1.30 MB)
EMGHandNet-Original: 1,637,132  (~6.25 MB)
HyT-Net   : 846,348  (~3.23 MB)
CRNN-Attn : 291,021  (~1.11 MB)
✅  Train+Val: (203559, 5, 20, 10, 1)  Test: (33052, 5, 20, 10, 1)
⏭️  DualStream-Lite_fold1 ya entrenado, se salta
⏭️  DualStream-Original_fold1 ya entrenado, se salta
⏭️  CRNN-Attn_fold1 ya entrenado, se salta
⏭️  EMGHandNet-2D_fold1 ya entrenado, se salta
⏭️  EMGHandNet-Original_fold1 ya entrenado, se salta
⏭️  HyT-Net_fold1 ya entrenado, se salta
⏭️  DualStream-Lite_fold2 ya entrenado, se salta
⏭️  DualStream-Original_fold2 ya entrenado, se salta
⏭️  CRNN-Attn_fold2 ya entrenado, se salta
⏭️  EMGHandNet-2D_fold2 ya entrenado, se salta
⏭️  EMGHandNet-Original_fold2 ya entrenado, se salta
⏭️  HyT-Net_fold2 ya entrenado, se salta
⏭️  DualStream-Lite_fold3 ya entrenado, se salta
⏭️  DualStream-Original_fold3 ya en

In [1]:
# =============================================================================
# Ninapro‑DB1 · 10‑Fold CV + Hold‑out Test
# Israel Huentecura ✨ · Junio 2025
# =============================================================================
# ‑‑ IMPORTS ------------------------------------------------------------------
import os, random, json, datetime as dt, shutil
from pathlib import Path
import numpy as np, pandas as pd, tensorflow as tf
from sklearn.model_selection import StratifiedKFold, train_test_split
import datetime

# ------------------------------------------------------------------------- #
# 0️⃣  CFG GLOBAL
# ------------------------------------------------------------------------- #
SEED          = 42
np.random.seed(SEED); random.seed(SEED); tf.random.set_seed(SEED)
os.environ["TF_DETERMINISTIC_OPS"] = "1"

DATA_ROOT     = "data/ninapro/db1_processed"
RUNS_DIR      = Path("runs")
MODELS_DIR    = Path("models")
BACKUP_DIR    = RUNS_DIR / "_backup"
for d in (RUNS_DIR, MODELS_DIR, BACKUP_DIR): d.mkdir(exist_ok=True, parents=True)

N_FOLDS     = 10
EPOCHS      = 100
BATCH_SIZE  = 128
TEST_PCT    = 0.20                    # proporción fija para el hold‑out

# ---------- 0. GLOBAL CFG -----------------------------------------------------
SEED          = 42
np.random.seed(SEED); random.seed(SEED); tf.random.set_seed(SEED)
os.environ["TF_DETERMINISTIC_OPS"] = "1"
DATA_ROOT     = "data/ninapro/db1_processed"
SPLIT_DIR     = "splits"; RUNS_DIR = "runs"; MODELS_DIR = "models"
for d in (SPLIT_DIR, RUNS_DIR, MODELS_DIR): os.makedirs(d, exist_ok=True)

WIN_LEN   = 20          # 200 ms
T_SUBWIN  = 5
HANDCRAFT_PER_CH = 10
FS        = 100
TEST_PCT  = 0.20

print("feat_dim =", feat_dim, "   NUM_CLASSES =", NUM_CLASSES)
# ---------- 5. MODELOS --------------------------------------------------------
#
# 5-A  Modelo propuesto   ------------------------------------------------------
def build_sota(raw_shape,feat_dim,T,n_cls,heads=4,dim=128,dp=0.3):
    seg_in = layers.Input(raw_shape)
    x=layers.Conv2D(64,(5,1),padding="same",activation="relu")(seg_in)
    x=layers.BatchNormalization()(x)
    x=layers.Conv2D(64,(3,1),padding="same",activation="relu")(x)
    x=layers.BatchNormalization()(x)
    seg_vec = layers.GlobalAveragePooling2D()(x)
    seg_cnn=models.Model(seg_in,seg_vec)

    in_raw  = layers.Input((T,*raw_shape),name="raw")
    in_feat = layers.Input((T,feat_dim),  name="feat")
    r = layers.TimeDistributed(seg_cnn)(in_raw)
    f = layers.TimeDistributed(layers.Dense(64,activation="relu"))(in_feat)
    concat = layers.Concatenate()([r,f])
    proj = layers.Dense(dim)(concat)

    def trans_block(z):
        a = layers.LayerNormalization(epsilon=1e-6)(z)
        a = layers.MultiHeadAttention(num_heads=heads,key_dim=dim,dropout=dp)(a,a)
        z = z+a
        b = layers.LayerNormalization(epsilon=1e-6)(z)
        b = layers.Dense(dim*4,activation="relu")(b)
        b = layers.Dense(dim)(b); b = layers.Dropout(dp)(b)
        return z+b
    z = trans_block(proj); z = trans_block(z)
    z = layers.GlobalAveragePooling1D()(z)
    z = layers.Dropout(dp)(z)
    z = layers.Dense(128,activation="relu")(z)
    z = layers.Dropout(dp)(z)
    out = layers.Dense(n_cls,activation="softmax")(z)
    m = models.Model([in_raw,in_feat],out,name="SOTA_Trans")
    m.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss="categorical_crossentropy",metrics=["accuracy"])
    return m
#
# 5-B  Modelos del usuario  ----------------------------------------------------
class Attention(layers.Layer):
    def build(self,inp_shape):
        u=inp_shape[-1]
        self.W=self.add_weight("W",(u,u),initializer="glorot_uniform")
        self.b=self.add_weight("b",(u,),initializer="zeros")
        self.u=self.add_weight("u",(u,1),initializer="glorot_uniform")
    def call(self,x):
        v=tf.tanh(tf.tensordot(x,self.W,1)+self.b)
        vu=tf.tensordot(v,self.u,1)
        al=tf.nn.softmax(tf.squeeze(vu,-1),1)
        return tf.reduce_sum(x*tf.expand_dims(al,-1),1)

def mobile_cnn(raw_shape,alpha=.75,dp=.2):
    inp=layers.Input(raw_shape)
    x=layers.SeparableConv2D(int(32*alpha),3,padding="same",activation="relu")(inp)
    x=layers.BatchNormalization()(x)
    x=layers.SeparableConv2D(int(64*alpha),3,padding="same",activation="relu")(x)
    x=layers.BatchNormalization()(x)
    x=layers.GlobalAveragePooling2D()(x)
    x=layers.Dropout(dp)(x)
    return models.Model(inp,x)

def build_hybrid_v2(raw_shape,feat_dim,T,n_cls,gru=128,dp=.3):
    cnn=mobile_cnn(raw_shape)
    in_r  = layers.Input((T,*raw_shape),name="raw")
    in_f  = layers.Input((T,feat_dim),  name="feat")
    r=layers.TimeDistributed(cnn)(in_r)
    f=layers.TimeDistributed(layers.Dense(96,activation="relu"))(in_f)
    mrg=layers.Concatenate()([r,f])
    x=layers.Bidirectional(layers.GRU(gru,return_sequences=True,dropout=dp,recurrent_dropout=dp*0.5))(mrg)
    x=Attention()(x); x=layers.Dropout(dp)(x)
    out=layers.Dense(n_cls,activation="softmax",
                     kernel_regularizer=regularizers.l2(1e-4))(x)
    m=models.Model([in_r,in_f],out,name="Hybrid_A2")
    m.compile(optimizer=tf.keras.optimizers.Adam(2e-3),
              loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=.1),
              metrics=["accuracy"])
    return m

def build_emghandnet(raw_shape,T,n_cls,filters=(64,128),lstm=128,dp=.3):
    inp=layers.Input((T,*raw_shape))
    x=inp
    for f in filters:
        x=layers.TimeDistributed(layers.Conv2D(f,(3,3),padding="same",activation="relu"))(x)
        x=layers.TimeDistributed(layers.BatchNormalization())(x)
        x=layers.TimeDistributed(layers.MaxPool2D((2,2)))(x)
    x=layers.TimeDistributed(layers.GlobalAveragePooling2D())(x)
    x=layers.TimeDistributed(layers.Dropout(dp))(x)
    x=layers.Bidirectional(layers.LSTM(lstm,return_sequences=False,dropout=dp))(x)
    out=layers.Dense(n_cls,activation="softmax")(x)
    m=models.Model(inp,out,name="EMGHand")
    m.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss="categorical_crossentropy",metrics=["accuracy"])
    return m

def build_dualstream(raw_shape, feat_dim, T, n_cls,
                     rf=64, ff=64, lstm=128, dp=.3):
    # ⬇⬇  añadimos name="raw" y name="feat"
    in_r = layers.Input((T, *raw_shape),     name="raw")
    in_f = layers.Input((T, feat_dim),       name="feat")

    r = layers.TimeDistributed(
            layers.Reshape((raw_shape[0], raw_shape[1])))(in_r)
    r = layers.TimeDistributed(
            layers.Conv1D(rf, 3, padding="same", activation="relu"))(r)
    r = layers.TimeDistributed(layers.GlobalAveragePooling1D())(r)

    f = layers.TimeDistributed(
            layers.Dense(ff, activation="relu"))(in_f)

    x = layers.Concatenate()([r, f])
    x = layers.TimeDistributed(layers.Dropout(dp))(x)
    x = layers.Bidirectional(
            layers.LSTM(lstm, return_sequences=False, dropout=dp))(x)

    out = layers.Dense(n_cls, activation="softmax")(x)

    m = models.Model([in_r, in_f], out, name="DualStr")
    m.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss="categorical_crossentropy",
              metrics=["accuracy"])
    return m

# ==============================================================================
#  MODELO 1: DualStream - Versión Original (Fiel al Paper) - CORREGIDO
# ==============================================================================

def build_dualstream_original(raw_shape, feat_dim, T, n_cls, lstm_units=200, conv_filters=256, dp=0.3):
    """
    Implementación fiel del modelo Dual Stream LSTM Feature Fusion de Zhang et al. (2024).
    Referencia: https://doi.org/10.3390/s24113631
    
    Características principales:
    - Flujo Raw: Conv1D -> LSTM -> Conv1D
    - Flujo Features: Conv1D -> Conv1D
    - Fusión y Bloque Temporal: Concatenate -> Bi-LSTM -> Bi-LSTM
    """
    # --- Input Streams ---
    in_raw = layers.Input(shape=(T, *raw_shape), name="raw")
    in_feat = layers.Input(shape=(T, feat_dim), name="feat")

    # --- Flujo de Datos Crudos (Raw Data Stream) ---
    r = layers.TimeDistributed(layers.Reshape((raw_shape[0], raw_shape[1])))(in_raw)
    r = layers.TimeDistributed(layers.Conv1D(conv_filters, 3, padding="same", activation="relu"))(r)
    r = layers.TimeDistributed(layers.LSTM(lstm_units, return_sequences=True, dropout=dp))(r)
    r = layers.TimeDistributed(layers.Conv1D(conv_filters, 3, padding="same", activation="relu"))(r)
    r = layers.TimeDistributed(layers.GlobalAveragePooling1D())(r)
    
    # --- Flujo de Características (Feature Stream) ---
    # ¡CORRECCIÓN AQUÍ! Añadimos una dimensión para que Conv1D funcione.
    # La forma pasa de (T, feat_dim) a (T, feat_dim, 1).
    f = layers.Reshape((T, feat_dim, 1))(in_feat)
    
    f = layers.TimeDistributed(layers.Conv1D(conv_filters, 3, padding="same", activation="relu"))(f)
    f = layers.TimeDistributed(layers.Conv1D(conv_filters, 3, padding="same", activation="relu"))(f)
    # GlobalAveragePooling1D reduce la dimensión de "steps" (la de 100), dejando (T, filters)
    f = layers.TimeDistributed(layers.GlobalAveragePooling1D())(f)

    # --- Fusión y Bloque Temporal ---
    x = layers.Concatenate()([r, f])
    x = layers.Bidirectional(layers.LSTM(lstm_units, return_sequences=True, dropout=dp))(x)
    x = layers.Bidirectional(layers.LSTM(lstm_units, return_sequences=False, dropout=dp))(x)

    # --- Clasificador ---
    x = layers.Dense(512, activation="relu")(x)
    x = layers.Dropout(dp)(x)
    out = layers.Dense(n_cls, activation="softmax", name="output")(x)
    
    model = models.Model(inputs=[in_raw, in_feat], outputs=out, name="DualStream_Original")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# ==============================================================================
#  MODELO 2: DualStream - Versión Adaptada y Ligera (Tu Implementación)
# ==============================================================================

def build_dualstream_adaptado(raw_shape, feat_dim, T, n_cls, rf=64, ff=64, lstm=128, dp=0.3):
    """
    Versión adaptada y ligera del concepto DualStream, propuesta en esta tesis.
    (Tu implementación original de 'build_dualstream').
    """
    in_r = layers.Input((T, *raw_shape), name="raw")
    in_f = layers.Input((T, feat_dim), name="feat")
    r = layers.TimeDistributed(layers.Reshape((raw_shape[0], raw_shape[1])))(in_r)
    r = layers.TimeDistributed(layers.Conv1D(rf, 3, padding="same", activation="relu"))(r)
    r = layers.TimeDistributed(layers.GlobalAveragePooling1D())(r)
    f = layers.TimeDistributed(layers.Dense(ff, activation="relu"))(in_f)
    x = layers.Concatenate()([r, f])
    x = layers.TimeDistributed(layers.Dropout(dp))(x)
    x = layers.Bidirectional(layers.LSTM(lstm, return_sequences=False, dropout=dp))(x)
    out = layers.Dense(n_cls, activation="softmax")(x)
    model = models.Model([in_r, in_f], out, name="DualStream_Adaptado")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model


# ==============================================================================
#  MODELO 3: EMGHandNet - Versión Original (Fiel al Paper)
# ==============================================================================

def build_emghandnet_original(raw_shape, T, n_cls, filters=(64, 64, 64, 64), lstm_units=200, dp=0.3):
    """
    Implementación fiel del modelo EMGHandNet de Karnam et al. (2022).
    Referencia: https://www.sciencedirect.com/science/article/abs/pii/S0208521622000080
    
    Características principales:
    - Utiliza exclusivamente convoluciones 1D (Conv1D) para los canales de EMG.
    - Dos capas Bi-LSTM apiladas para el procesamiento temporal.
    """
    inp = layers.Input(shape=(T, *raw_shape), name="raw")
    x = layers.TimeDistributed(layers.Reshape((raw_shape[0], raw_shape[1])))(inp)

    for i, f in enumerate(filters):
        x = layers.TimeDistributed(layers.Conv1D(f, 3, padding="same", activation="relu"), name=f'td_conv1d_{i}')(x)
        x = layers.TimeDistributed(layers.BatchNormalization(), name=f'td_bn_{i}')(x)
        x = layers.TimeDistributed(layers.MaxPool1D(2), name=f'td_pool_{i}')(x)
        
    x = layers.TimeDistributed(layers.Flatten(), name='td_flatten')(x)
    x = layers.Bidirectional(layers.LSTM(lstm_units, return_sequences=True, dropout=dp))(x)
    x = layers.Bidirectional(layers.LSTM(lstm_units, return_sequences=False, dropout=dp))(x)
    x = layers.Dense(512, activation="relu")(x)
    x = layers.Dropout(dp)(x)
    out = layers.Dense(n_cls, activation="softmax", name="output")(x)
    
    model = models.Model(inputs=inp, outputs=out, name="EMGHandNet_Original")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# ==============================================================================
#  MODELO 4: EMGHandNet - Versión Adaptada con Conv2D (Tu Implementación)
# ==============================================================================

def build_emghandnet_adaptado(raw_shape, T, n_cls, filters=(64,128), lstm=128, dp=.3):
    """
    Versión adaptada de EMGHandNet que utiliza Conv2D para tratar las ventanas como imágenes.
    (Tu implementación original de 'build_emghandnet').
    """
    inp = layers.Input((T, *raw_shape), name="raw")
    x = inp
    for f in filters:
        x = layers.TimeDistributed(layers.Conv2D(f, (3, 3), padding="same", activation="relu"))(x)
        x = layers.TimeDistributed(layers.BatchNormalization())(x)
        x = layers.TimeDistributed(layers.MaxPool2D((2, 2)))(x)
    x = layers.TimeDistributed(layers.GlobalAveragePooling2D())(x)
    x = layers.TimeDistributed(layers.Dropout(dp))(x)
    x = layers.Bidirectional(layers.LSTM(lstm, return_sequences=False, dropout=dp))(x)
    out = layers.Dense(n_cls, activation="softmax")(x)
    model = models.Model(inp, out, name="EMGHandNet_Adaptado")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy", metrics=["accuracy"])
    return model


# ==============================================================================
#  MODELO 5: HyT-Net - Modelo Híbrido CNN-Transformer (Propuesto por ti)
# ==============================================================================

def build_hyt_net_propuesto(raw_shape, feat_dim, T, n_cls, heads=4, dim=128, dp=0.3):
    """
    Arquitectura Híbrida CNN-Transformer (HyT-Net) propuesta en esta tesis.
    (Tu implementación original de 'build_sota').
    """
    # --- Rama CNN para ventanas 2D ---
    seg_in = layers.Input(raw_shape, name="seg_input")
    x_cnn = layers.Conv2D(64, (5,1), padding="same", activation="relu")(seg_in)
    x_cnn = layers.BatchNormalization()(x_cnn)
    x_cnn = layers.Conv2D(64, (3,1), padding="same", activation="relu")(x_cnn)
    x_cnn = layers.BatchNormalization()(x_cnn)
    seg_vec = layers.GlobalAveragePooling2D()(x_cnn)
    seg_cnn_model = models.Model(seg_in, seg_vec, name="segment_cnn")

    # --- Entradas y Procesamiento de Secuencias ---
    in_raw = layers.Input((T, *raw_shape), name="raw")
    in_feat = layers.Input((T, feat_dim), name="feat")
    raw_seq = layers.TimeDistributed(seg_cnn_model)(in_raw)
    feat_seq = layers.TimeDistributed(layers.Dense(64, activation="relu"))(in_feat)
    
    # --- Fusión y Proyección ---
    fusion = layers.Concatenate(axis=-1)([raw_seq, feat_seq])
    projection = layers.Dense(dim, activation="linear")(fusion)

    # --- Bloques Transformer ---
    def transformer_encoder_block(seq_input, key_dim, num_heads, dropout_rate):
        x = layers.LayerNormalization(epsilon=1e-6)(seq_input)
        x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, dropout=dropout_rate)(x, x)
        x = layers.Dropout(dropout_rate)(x)
        res = x + seq_input
        x = layers.LayerNormalization(epsilon=1e-6)(res)
        x = layers.Dense(key_dim * 4, activation="relu")(x)
        x = layers.Dropout(dropout_rate)(x)
        x = layers.Dense(key_dim)(x)
        return x + res

    z = transformer_encoder_block(projection, dim, heads, dp)
    z = transformer_encoder_block(z, dim, heads, dp)

    # --- Clasificador ---
    z = layers.GlobalAveragePooling1D()(z)
    z = layers.Dropout(dp)(z)
    z = layers.Dense(128, activation="relu")(z)
    z = layers.Dropout(dp)(z)
    out = layers.Dense(n_cls, activation="softmax", name="output")(z)
    
    model = models.Model(inputs=[in_raw, in_feat], outputs=out, name="HyT-Net_Propuesto")
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model



# ---------- 6. CALLBACKS  & EPOCHS -------------------------------------------
def cb(model_name):
    ts= datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    ck=os.path.join(MODELS_DIR,f"{model_name}_{ts}.keras")
    return [
        EarlyStopping(monitor="val_loss",patience=15,restore_best_weights=True),
        ReduceLROnPlateau(monitor="val_loss",factor=.5,patience=5,min_lr=1e-6),
        ModelCheckpoint(ck,monitor="val_accuracy",save_best_only=True,verbose=0),
        TensorBoard(log_dir=os.path.join(RUNS_DIR,f"{model_name}_{ts}"))
    ]

EPOCHS = 100

# ---------- 7. TRAIN ----------------------------------------------------------
# models_to_train=[
#     ("DualStr"   , build_dualstream(RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES),True,  EPOCHS,  train_ds_h, val_ds_h),
#     ("SOTA_Trans", build_sota   (RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES), True,  EPOCHS,   train_ds_h, val_ds_h),
#     ("Hybrid_A2" , build_hybrid_v2(RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES), True,  EPOCHS, train_ds_h, val_ds_h),
#     ("EMGHand"   , build_emghandnet(RAW_SHAPE,T_SUBWIN,NUM_CLASSES),        False, EPOCHS,  train_ds_raw,val_ds_raw),
# ]

models_to_train=[
    ("DualStr_Adaptado", build_dualstream_adaptado(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True, EPOCHS, train_ds_h, val_ds_h),
    ("DualStr_Original", build_dualstream_original(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True, EPOCHS, train_ds_h, val_ds_h),
    ("EMGHandNet_Adaptado", build_emghandnet(RAW_SHAPE, T_SUBWIN, NUM_CLASSES), False, EPOCHS, train_ds_raw, val_ds_raw),
    ("EMGHandNet_Original", build_emghandnet_original(RAW_SHAPE, T_SUBWIN, NUM_CLASSES), False, EPOCHS, train_ds_raw, val_ds_raw),
    ("HyT-Net_Propuesto", build_hyt_net_propuesto(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True, EPOCHS, train_ds_h, val_ds_h),
    # Modelos antiguos (opcional):
    ("DualStr_Adaptado_v1"   , build_dualstream(RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES),True,  EPOCHS,  train_ds_h, val_ds_h),
    ("SOTA_Trans", build_sota   (RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES), True,  EPOCHS,   train_ds_h, val_ds_h),
    ("Hybrid_A2" , build_hybrid_v2(RAW_SHAPE,feat_dim,T_SUBWIN,NUM_CLASSES), True,  EPOCHS, train_ds_h, val_ds_h),
    ("EMGHand_Adaptado_v1"   , build_emghandnet(RAW_SHAPE,T_SUBWIN,NUM_CLASSES),        False, EPOCHS,  train_ds_raw,val_ds_raw),
]

# ---------- EXTRA: REPORTE DE PARÁMETROS -------------------------------------
def print_model_params(models_spec): # Changed parameter name for clarity
    """
    Imprime nº de parámetros y tamaño aprox. en memoria para cada modelo.
    Además, genera y guarda la imagen de la arquitectura del modelo.
    """
    print("\n🧮   Parámetros por modelo y diagramas de arquitectura")
    for name, build_fn, uses_feat in models_spec: # Iterate through the models_spec
        # Instantiate the model to get its parameters and plot it
        # We need to pass the correct arguments to the build_fn
        # This assumes RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES are globally accessible or passed
        try:
            if uses_feat:
                model = build_fn(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES)
            else:
                model = build_fn(RAW_SHAPE, T_SUBWIN, NUM_CLASSES)
            
            params = model.count_params()
            size_mb = params * 4 / (1024 ** 2)          # 4 bytes por parámetro fp32
            print(f"{name:25s}: {params:10,}   (~{size_mb:.2f} MB)")

            # Generate and save the model plot
            plot_path = PLOTS_DIR / f"{name}_architecture.png"
            plot_model(model, to_file=plot_path, show_shapes=True, show_layer_names=True, dpi=96)
            print(f"      Diagrama guardado en: {plot_path}")
            tf.keras.backend.clear_session() # Clear session after plotting
        except Exception as e:
            print(f"❌ Error al procesar el modelo {name}: {e}")
            print("   Asegúrate de que RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES estén definidos correctamente.")


# Llama a la función para ver el reporte
print_model_params(models_to_train)

# =============================================================================
# cv_pipeline.py · 10‑Fold CV + Resumable Training for Ninapro‑DB1
# Israel Huentecura ✨ · Mayo 2025
# =============================================================================
import os, json, datetime as dt, numpy as np, pandas as pd, tensorflow as tf
from pathlib import Path
from sklearn.model_selection import StratifiedKFold

# ------------------------------------------------------------------------- #
# 0️⃣  RUTAS Y CONSTANTES GLOBALES
# ------------------------------------------------------------------------- #
SEED        = 42
N_FOLDS     = 10
EPOCHS      = 100
BATCH_SIZE  = 128

RUNS_DIR    = Path("runs")
MODELS_DIR  = Path("models")
BACKUP_DIR  = RUNS_DIR / "_backup"          # aquí se guardan estados por batch
STATS_FILE  = RUNS_DIR / "cv_progress.json" # bitácora para saltar folds/‑modelos

for d in (RUNS_DIR, MODELS_DIR, BACKUP_DIR):
    d.mkdir(parents=True, exist_ok=True)



# ------------------------------------------------------------------------- #
# 1️⃣  CARGA / SPLIT DE LOS ARRAYS EN MEMORIA
# ------------------------------------------------------------------------- #
def assemble_arrays_holdout():
    """
    Construye tres juegos:
      • (X_raw, F, y)           → train+val  (para la CV)
      • (X_test_raw, F_test, y_test) → test  (hold‑out)
    A partir de los arrays pre‑procesados que ya tengas en memoria
    (X_tr_raw, F_tr, y_tr, val_X, val_feats, y_val, X_te_raw, F_te, y_te).
    """
    g = globals()
    # --- train+val (se usará dentro de los folds) -------------------------
    tr_val_partsX = []; tr_val_partsF = []; tr_val_partsY = []

    def _add_tv(x, f, y):
        if x in g and y in g:                          # sólo si existen
            tr_val_partsX.append(g[x]); tr_val_partsY.append(g[y])
            if f in g: tr_val_partsF.append(g[f])

    _add_tv('X_tr_raw', 'F_tr',  'y_tr')
    _add_tv('val_X',    'val_feats', 'y_val')

    if not tr_val_partsX:
        raise RuntimeError("❌ No se encontraron arrays de train+val en memoria")

    global X_raw_all, F_all, y_all
    X_raw_all = np.concatenate(tr_val_partsX, 0)
    y_all     = np.concatenate(tr_val_partsY,  0)
    F_all     = (np.concatenate(tr_val_partsF, 0)
                 if tr_val_partsF else None)

    # --- test -------------------------------------------------------------
    global X_raw_test, F_test, y_test
    if 'X_te_raw' not in g or 'y_te' not in g:
        # → Si no tenías test pre‑calculado, házlo con train_test_split
        #   (estratificado y sin reemplazo)
        idx_trval = np.arange(len(y_all))
        X_raw_all, X_raw_test, y_all, y_test, idx_trval, idx_test = \
            train_test_split(X_raw_all, y_all,
                             np.arange(len(y_all)),
                             test_size=TEST_PCT, random_state=SEED,
                             stratify=np.argmax(y_all, 1))
        if F_all is not None:
            F_all, F_test = F_all[idx_trval], F_all[idx_test]
        else:
            F_test = None
    else:
        X_raw_test = g['X_te_raw']; y_test = g['y_te']
        F_test     = g.get('F_te', None)

    print(f"✅  Train+Val: {X_raw_all.shape}  Test: {X_raw_test.shape}")

# ------------------------------------------------------------------------- #
# 2️⃣  tf.data WRAPPER (sin cambios salvo docstring)
# ------------------------------------------------------------------------- #
def make_ds(x_raw, feats, y, uses_feat, shuffle):
    """
    • uses_feat=True  → se crea dict{'raw':…, 'feat':…}
    • uses_feat=False → sólo la rama raw
    """
    if uses_feat and feats is None:
        raise ValueError("El modelo requiere features pero feats=None")
    if uses_feat:
        ds = tf.data.Dataset.from_tensor_slices(({"raw": x_raw, "feat": feats},
                                                 y))
    else:
        ds = tf.data.Dataset.from_tensor_slices((x_raw, y))
    if shuffle:
        ds = ds.shuffle(len(y), seed=SEED, reshuffle_each_iteration=True)
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# ------------------------------------------------------------------------- #
# 3️⃣  CALLBACKS
# ------------------------------------------------------------------------- #
def make_callbacks(tag):
    ck_path   = MODELS_DIR / f"{tag}_BEST.keras"
    backup_to = BACKUP_DIR / tag
    log_dir   = RUNS_DIR  / f"{tag}_{dt.datetime.now():%Y%m%d-%H%M%S}"

    return [
        tf.keras.callbacks.BackupAndRestore(
            backup_to, delete_checkpoint=False),
        tf.keras.callbacks.ModelCheckpoint(
            ck_path, monitor="val_accuracy", save_best_only=True,
            save_weights_only=False, verbose=0),
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=15, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", patience=5, factor=0.5, min_lr=1e-6),
        tf.keras.callbacks.TensorBoard(log_dir=log_dir),
    ]

# ------------------------------------------------------------------------- #
# 4️⃣  BITÁCORA DE CURSOS TERMINADOS
# ------------------------------------------------------------------------- #
STATS_FILE  = RUNS_DIR / "cv_progress.json"
def load_stats():
    return json.loads(STATS_FILE.read_text()) if STATS_FILE.exists() else {}

def mark_done(tag, val_acc):
    stats = load_stats()
    stats[tag] = {"val_acc": float(val_acc)}
    STATS_FILE.write_text(json.dumps(stats, indent=2))

# ------------------------------------------------------------------------- #
# 5️⃣  K‑FOLD  · GUARDA EL MEJOR CKPT POR ARQUITECTURA  🆕
# ------------------------------------------------------------------------- #
def run_kfold(models_spec):
    """
    models_spec = [
        ("Nombre", build_fn, uses_feat_bool),
        ...
    ]
    Devuelve:
      • df_folds    → métrica de cada (modelo, fold)
      • summary_cv  → media ± std por modelo
      • best_ckpts  → {'Nombre': {'ckpt': Path, 'val_acc':…, 'uses_feat':…}}
    """
    skf        = StratifiedKFold(n_splits=N_FOLDS,
                                 shuffle=True, random_state=SEED)
    rows       = []
    done       = load_stats()
    best_ckpts = {name: {"val_acc": -np.inf, "ckpt": None,
                         "uses_feat": uses_feat}
                  for name, _, uses_feat in models_spec}

    for fold, (tr, va) in enumerate(skf.split(X_raw_all,
                                              np.argmax(y_all, 1)), 1):
        X_tr, X_va   = X_raw_all[tr], X_raw_all[va]
        y_tr, y_va   = y_all[tr],     y_all[va]
        F_tr = F_all[tr] if F_all is not None else None
        F_va = F_all[va] if F_all is not None else None

        for name, build_fn, uses_feat in models_spec:
            tag = f"{name}_fold{fold}"
            if tag in done:
                print(f"⏭️  {tag} ya entrenado, se salta")
                continue

            print(f"\n📚  Entrenando {tag}")
            # --- construcción del modelo ----------------------------------
            if uses_feat:
                model = build_fn(RAW_SHAPE, feat_dim,
                                 T_SUBWIN, NUM_CLASSES)
            else:
                model = build_fn(RAW_SHAPE, T_SUBWIN, NUM_CLASSES)

            train_ds = make_ds(X_tr, F_tr, y_tr, uses_feat, shuffle=True)
            val_ds   = make_ds(X_va, F_va, y_va, uses_feat, shuffle=False)

            history  = model.fit(
                train_ds, validation_data=val_ds,
                epochs=EPOCHS, callbacks=make_callbacks(tag), verbose=1).history
            best_va = max(history["val_accuracy"])
            rows.append([name, fold, best_va])

            # --- ¿Mejor ckpt de la arquitectura? -------------------------
            ck_file = MODELS_DIR / f"{tag}_BEST.keras"
            if best_va > best_ckpts[name]["val_acc"]:
                best_ckpts[name]["val_acc"] = best_va
                best_ckpts[name]["ckpt"]    = ck_file

            mark_done(tag, best_va)
            tf.keras.backend.clear_session()

    df_folds = pd.DataFrame(rows, columns=["Model", "Fold", "Val_Acc"])
    summary  = df_folds.groupby("Model").Val_Acc.agg(['mean','std']).round(4)

    print("\n📊  Resumen 10‑fold (Val_Acc)")
    print(summary)
    return df_folds, summary, best_ckpts

# ------------------------------------------------------------------------- #
# 6️⃣  EVALUACIÓN FINAL EN HOLD‑OUT TEST  🆕
# ------------------------------------------------------------------------- #
def evaluate_on_test(best_ckpts):
    """
    Carga el mejor checkpoint de cada arquitectura y lo evalúa sobre el test.
    Devuelve df_test con columnas: Model · Test_Acc · Test_Loss
    """
    test_results = []
    for name, meta in best_ckpts.items():
        ck = meta["ckpt"]
        if ck is None or not ck.exists():
            print(f"⚠️  {name}: no se encontró ckpt; se omite")
            continue
        print(f"🧪  Testing {name}  (ckpt={ck.name})")
        model = tf.keras.models.load_model(ck, compile=True)
        test_ds = make_ds(X_raw_test, F_test, y_test,
                          meta["uses_feat"], shuffle=False)
        loss, acc = model.evaluate(test_ds, verbose=0)
        test_results.append([name, acc, loss])
        tf.keras.backend.clear_session()

    df_test = (pd.DataFrame(test_results,
                            columns=["Model", "Test_Acc", "Test_Loss"])
               .sort_values("Test_Acc", ascending=False)
               .reset_index(drop=True))
    print("\n🎯  Rendimiento en HOLD‑OUT Test")
    print(df_test)
    return df_test

# ------------------------------------------------------------------------- #
# 7️⃣  EJECUCIÓN PRINCIPAL (ejemplo)
# ------------------------------------------------------------------------- #
if __name__ == "__main__":
    # 1. Ensambla arrays → genera train+val y test
    assemble_arrays_holdout()

    # 2. Catálogo de modelos (tus constructores sin tocar)
    models_spec = [
        ("DualStr_Adaptado",      build_dualstream_adaptado,  True),
        ("DualStr_Original",      build_dualstream_original,  True),
        ("DualStr_Adaptado_v1",   build_dualstream,           True),
        ("HyT_Net_v1",            build_sota,                 True),
        ("Hybrid_A2",             build_hybrid_v2,            True),
        ("EMGHandNet_Adaptado",   build_emghandnet_adaptado,  False),
        ("EMGHandNet_Original",   build_emghandnet_original,  False),
        ("EMGHand_Adaptado_v1",   build_emghandnet,           False),
        ("HyT_Net_Propuesto_v2",  build_hyt_net_propuesto,    True),
    ]

    # 3. 10‑fold CV
    df_folds, df_summary, best_ckpts = run_kfold(models_spec)

    # 4. Evaluación en test externo
    df_test = evaluate_on_test(best_ckpts)

    # 5. Guarda DataFrames de resultados
    df_folds.to_csv(RUNS_DIR / "folds_val_acc.csv",    index=False)
    df_summary.to_csv(RUNS_DIR / "cv_summary.csv")
    df_test.to_csv(RUNS_DIR / "test_summary.csv",      index=False)


NameError: name 'feat_dim' is not defined

In [8]:
"""
Extension of existing EMG training pipeline to support **10-fold cross-validation with resumability**.

🔄 **What’s new (v3)**
-------------------------------------------------
* **Automatic checkpoint collection** already handled by `ModelCheckpoint`.
* Added **incremental persistence & resume**:
  * Metrics for every *(model, fold)* are appended to `cv_progress.csv` **immediately after training**.
  * If you rerun the script (e.g. after un corte de luz) it **skips** the folds that are already in the CSV and resumes where it left off.
* Helper `collect_best_checkpoints()` remains to gather the *.keras* files once all folds are done.

"""
def assemble_full_arrays():
    """Concatenate the existing pre-processed splits into *X_raw_all*, *F_all*, *y_all*.

    Looks for the following globals and concatenates them if present:
        • X_tr_raw , F_tr , y_tr  (train)
        • val_X    , val_feats , y_val  (validation)
        • X_te_raw , F_te , y_te  (test)

    Prints shapes once done. Call exactly once **before** `run_kfold()` if you
    haven’t already defined *X_raw_all* / *F_all* / *y_all*.
    """
    g = globals()
    parts_raw, parts_feat, parts_y = [], [], []

    def _add(raw_name, feat_name, y_name):
        if raw_name in g and y_name in g:
            parts_raw.append(g[raw_name])
            parts_y.append(g[y_name])
            if feat_name in g:
                parts_feat.append(g[feat_name])

    _add('X_tr_raw', 'F_tr', 'y_tr')
    _add('val_X',    'val_feats', 'y_val')
    _add('X_te_raw', 'F_te', 'y_te')

    if not parts_raw:
        raise RuntimeError("No pre-split arrays found. Define X_raw_all, F_all, y_all manually or ensure X_tr_raw / val_X / X_te_raw are in scope.")

    global X_raw_all, F_all, y_all
    X_raw_all = np.concatenate(parts_raw, axis=0)
    y_all     = np.concatenate(parts_y,   axis=0)
    F_all     = np.concatenate(parts_feat, axis=0) if parts_feat else None

    print(f"✅  Assembled arrays → X_raw_all {X_raw_all.shape} | "
          f"F_all {'None' if F_all is None else F_all.shape} | "
          f"y_all {y_all.shape}")
    
assemble_full_arrays()               # run once per session

models_spec = [
    ("DualStr_Adaptado",      build_dualstream_adaptado,  True),
    ("DualStr_Original",      build_dualstream_original,  True),
    ("DualStr_Adaptado_v1",   build_dualstream,           True),
    ("SOTA_Trans",            build_sota,                 True),
    ("Hybrid_A2",             build_hybrid_v2,            True),
    ("EMGHandNet_Adaptado",   build_emghandnet_adaptado,  False),
    ("EMGHandNet_Original",   build_emghandnet_original,  False),
    ("EMGHand_Adaptado_v1",   build_emghandnet,           False),
    ("HyT_Net_Propuesto",     build_hyt_net_propuesto,    True),
]

run_kfold(models_spec, X_raw_all, F_all, y_all,
          n_splits=10, batch=128, epochs=EPOCHS,
          progress_file="cv_progress.csv")

collect_best_checkpoints()            # optional, after CV finishes


import os
import glob
import shutil
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold

# -----------------------------------------------------------------------------
# 🧰  DATA HELPERS
# -----------------------------------------------------------------------------

def assemble_full_arrays():
    """Concatenate the existing pre-processed splits into *X_raw_all*, *F_all*, *y_all*."""
    g = globals()
    parts_raw, parts_feat, parts_y = [], [], []

    def _add(raw_name, feat_name, y_name):
        if raw_name in g and y_name in g:
            parts_raw.append(g[raw_name])
            parts_y.append(g[y_name])
            if feat_name in g:
                parts_feat.append(g[feat_name])

    _add('X_tr_raw', 'F_tr', 'y_tr')
    _add('val_X',    'val_feats', 'y_val')
    _add('X_te_raw', 'F_te', 'y_te')

    if not parts_raw:
        raise RuntimeError("No pre-split arrays found. Define the raw/feat/label arrays or ensure X_tr_raw / val_X / X_te_raw are in scope.")

    global X_raw_all, F_all, y_all
    X_raw_all = np.concatenate(parts_raw, axis=0)
    y_all     = np.concatenate(parts_y,   axis=0)
    F_all     = np.concatenate(parts_feat, axis=0) if parts_feat else None

    print(f"✅  Assembled arrays → X_raw_all {X_raw_all.shape} | "
          f"F_all {'None' if F_all is None else F_all.shape} | "
          f"y_all {y_all.shape}")

# -----------------------------------------------------------------------------
# 🏗️  DATASET CONSTRUCTION
# -----------------------------------------------------------------------------

def make_tf_dataset(x_raw, f, y, uses_feat, batch=64, shuffle=True):
    if uses_feat and f is None:
        raise ValueError("Feature array is None but the model expects features (uses_feat=True).")

    if uses_feat:
        ds = tf.data.Dataset.from_tensor_slices(({"raw": x_raw, "feat": f}, y))
    else:
        ds = tf.data.Dataset.from_tensor_slices((x_raw, y))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(y), seed=SEED)
    return ds.batch(batch).prefetch(tf.data.AUTOTUNE)

# -----------------------------------------------------------------------------
# 🔁  K-FOLD CROSS-VALIDATION WITH RESUME
# -----------------------------------------------------------------------------

def run_kfold(models_spec,
              X_raw_all,
              F_all,
              y_all,
              *,
              n_splits=10,
              batch=64,
              epochs=EPOCHS,
              progress_file="cv_progress.csv"):
    """Train each model with Stratified K-fold CV and **resume** if interrupted.

    Parameters
    ----------
    models_spec : list of (name, builder_fn, uses_feat)
    progress_file : str
        CSV file that stores completed (model, fold) pairs with their metrics.
    """
    # Load existing progress ---------------------------------------------------
    if os.path.exists(progress_file):
        df_prog = pd.read_csv(progress_file)
        print(f"🔄  Resuming from {progress_file} (completed rows = {len(df_prog)})")
    else:
        df_prog = pd.DataFrame(columns=['Model', 'Fold', 'Val_Acc', 'Val_Loss', 'Best_Epoch'])

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    new_rows = []

    for fold, (tr_idx, val_idx) in enumerate(skf.split(X_raw_all, np.argmax(y_all, axis=1)), start=1):
        print(f"\n📂  Fold {fold}/{n_splits}: train={len(tr_idx)}  val={len(val_idx)}")

        X_tr_raw, X_val_raw = X_raw_all[tr_idx], X_raw_all[val_idx]
        y_tr, y_val         = y_all[tr_idx],   y_all[val_idx]
        F_tr = F_all[tr_idx] if F_all is not None else None
        F_val = F_all[val_idx] if F_all is not None else None

        for name, build_fn, uses_feat in models_spec:
            if ((df_prog['Model'] == name) & (df_prog['Fold'] == fold)).any():
                print(f"⏩  Skipping {name} (fold {fold}) — already completed.")
                continue

            print(f"\n─── Training {name} (fold {fold}) ───")

            model = (build_fn(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES)
                     if uses_feat else build_fn(RAW_SHAPE, T_SUBWIN, NUM_CLASSES))

            train_ds = make_tf_dataset(X_tr_raw, F_tr, y_tr, uses_feat, batch, shuffle=True)
            val_ds   = make_tf_dataset(X_val_raw, F_val, y_val, uses_feat, batch, shuffle=False)

            cbs = cb(f"{name}_fold{fold}")

            hist = model.fit(train_ds,
                             validation_data=val_ds,
                             epochs=epochs,
                             callbacks=cbs,
                             verbose=1).history

            best_val_acc  = max(hist['val_accuracy'])
            best_val_loss = min(hist['val_loss'])
            best_epoch    = int(np.argmin(hist['val_loss']) + 1)

            row = {"Model": name, "Fold": fold, "Val_Acc": best_val_acc,
                   "Val_Loss": best_val_loss, "Best_Epoch": best_epoch}
            df_prog = pd.concat([df_prog, pd.DataFrame([row])], ignore_index=True)
            df_prog.to_csv(progress_file, index=False)
            print(f"💾  Progress saved → {progress_file}")

            new_rows.append(row)
            tf.keras.backend.clear_session()

    # summary -----------------------------------------------------------------
    if not df_prog.empty:
        summary = df_prog.groupby('Model').agg(Mean_Val_Acc=('Val_Acc', 'mean'),
                                               Std_Val_Acc=('Val_Acc', 'std'))
        print("\n📊  Cross-validation summary (Val_Acc)")
        print(summary.round(4))
        return df_prog, summary
    else:
        print("⚠️  Nothing trained.")
        return df_prog, None

# -----------------------------------------------------------------------------
# 💾  COLLECT & SAVE BEST MODELS FOR FUTURE FINE-TUNING
# -----------------------------------------------------------------------------

def collect_best_checkpoints(src_dir=MODELS_DIR, dest_dir="fine_tune_models"):
    """Copy all *.keras checkpoints saved during CV to *dest_dir* for later fine-tuning."""
    os.makedirs(dest_dir, exist_ok=True)
    ckpts = sorted(glob.glob(os.path.join(src_dir, "*.keras")))
    for ck in ckpts:
        tgt = os.path.join(dest_dir, os.path.basename(ck))
        shutil.copy2(ck, tgt)
        print(f"📦  {os.path.basename(ck)} → {dest_dir}")
    print(f"✅  Copied {len(ckpts)} models ready for fine-tuning.")

✅  Assembled arrays → X_raw_all (236611, 5, 20, 10, 1) | F_all (236611, 5, 100) | y_all (236611, 12)


TypeError: run_kfold() got an unexpected keyword argument 'progress_file'

In [None]:
# 💾  COLLECT & SAVE BEST MODELS FOR FUTURE FINE-TUNING
import glob, shutil, os

def collect_best_checkpoints(src_dir=MODELS_DIR, dest_dir="fine_tune_models"):
    """Copy all *.keras checkpoints saved during CV to *dest_dir* for later fine-tuning."""
    os.makedirs(dest_dir, exist_ok=True)
    ckpts = sorted(glob.glob(os.path.join(src_dir, "*.keras")))
    for ck in ckpts:
        tgt = os.path.join(dest_dir, os.path.basename(ck))
        shutil.copy2(ck, tgt)
        print(f"📦  {os.path.basename(ck)} → {dest_dir}")
    print(f"✅  Copied {len(ckpts)} models ready for fine-tuning.")
collect_best_checkpoints()


In [None]:
################ VAAAAAAAAAAL CRUZADAAAAAAAAA #########################

from sklearn.model_selection import StratifiedKFold
from tensorflow.keras import backend as K

# ---------- 6. CALLBACKS  & BUILDERS -----------------------------------------
def make_callbacks(model_name, fold):
    ts  = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tag = f"{model_name}_F{fold}_{ts}"
    ck  = os.path.join(MODELS_DIR, f"{tag}.keras")
    return [
        EarlyStopping(monitor="val_loss", patience=8, restore_best_weights=True),
        ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
        ModelCheckpoint(ck, monitor="val_accuracy", save_best_only=True, verbose=0),
        TensorBoard(log_dir=os.path.join(RUNS_DIR, tag))
    ]

model_builders = {
    "DualStr_Adaptado": (lambda: build_dualstream_adaptado(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True),
    "DualStr_Original": (lambda: build_dualstream_original(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True),
    "EMGHandNet_Adaptado": (lambda: build_emghandnet(RAW_SHAPE, T_SUBWIN, NUM_CLASSES), False),
    "EMGHandNet_Original": (lambda: build_emghandnet_original(RAW_SHAPE, T_SUBWIN, NUM_CLASSES), False),
    "HyT-Net_Propuesto": (lambda: build_hyt_net_propuesto(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True),
    # Modelos antiguos (opcional):
    "DualStr_Adaptado_v1": (lambda: build_dualstream(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True),
    "SOTA_trans": (lambda: build_sota(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True),
    "Hybrid_A2": (lambda: build_hybrid_v2(RAW_SHAPE, feat_dim, T_SUBWIN, NUM_CLASSES), True),   
    "EMGHand_Adaptado_v1": (lambda: build_emghandnet(RAW_SHAPE, T_SUBWIN, NUM_CLASSES), False),
}

EPOCHS = 60
KFOLDS = 10  # número de folds para CV

# ---------- 7. CROSS-VALIDATION TRAINING -------------------------------------
results = []   # acumula métricas por modelo
skf = StratifiedKFold(n_splits=KFOLDS, shuffle=True, random_state=SEED)

for model_name, (builder, uses_feat) in model_builders.items():
    print(f"\n################ {model_name} – {KFOLDS}-fold CV ################")
    fold_val_acc = []; fold_test_acc = []
    # indices para CV: sobre TRAIN (X_tr_raw) y sus etiquetas originales
    for fold, (idx_train, idx_val) in enumerate(skf.split(X_tr_raw, y_tr_lbl)):
        print(f"\n── Fold {fold+1}/{KFOLDS}")
        # datos fold
        Xtr_r, Xtr_f = X_tr_raw[idx_train], F_tr[idx_train]
        Xvl_r, Xvl_f = X_tr_raw[idx_val],   F_tr[idx_val]
        ytr,  yvl    = y_tr[idx_train],     y_tr[idx_val]

        # datasets tf.data
        if uses_feat:
            ds_tr = dictify(make_ds((Xtr_r, Xtr_f), ytr, True))
            ds_vl = dictify(make_ds((Xvl_r, Xvl_f), yvl))
        else:
            ds_tr = make_ds(Xtr_r, ytr, True)
            ds_vl = make_ds(Xvl_r, yvl)

        # construir y entrenar
        K.clear_session()
        model = builder()
        model.fit(ds_tr, validation_data=ds_vl,
                  epochs=EPOCHS, callbacks=make_callbacks(model_name, fold),
                  verbose=1)

        # evaluar fold-val
        val_acc = model.evaluate(ds_vl, verbose=0)[1]
        fold_val_acc.append(val_acc)

        # evaluar en TEST global
        test_acc = (model.evaluate({"raw":X_te_raw,"feat":F_te}, y_te, 0)[1]
                    if uses_feat else model.evaluate(X_te_raw, y_te, 0)[1])
        fold_test_acc.append(test_acc)

    # Estadísticas del modelo
    results.append([
        model_name,
        np.mean(fold_val_acc), np.std(fold_val_acc),
        np.mean(fold_test_acc)
    ])

# ---------- 8. RESUMEN -------------------------------------------------------
df = pd.DataFrame(results, columns=[
    "Model", "Val_Acc_Mean", "Val_Acc_SD", "Test_Acc_Mean"
]).sort_values("Val_Acc_Mean", ascending=False)

print("\n📊  Cross-validation summary")
print(df.to_string(index=False, float_format="%.4f"))

best = df.iloc[0]
print(f"\n🏆  Mejor modelo según CV: {best.Model} "
      f"(Val={best.Val_Acc_Mean:.3f}±{best.Val_Acc_SD:.3f}, "
      f"Test≈{best.Test_Acc_Mean:.3f})")


################ DualStr_Adaptado – 10-fold CV ################

── Fold 1/10
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60

── Fold 2/10
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
E

In [None]:
📊  Cross-validation summary
              Model  Val_Acc_Mean  Val_Acc_SD  Test_Acc_Mean
  HyT-Net_Propuesto        0.9976      0.0006         0.9976
EMGHandNet_Original        0.9974      0.0004         0.9973
EMGHandNet_Adaptado        0.9968      0.0004         0.9967
   DualStr_Original        0.9963      0.0005         0.9966
   DualStr_Adaptado        0.9930      0.0009         0.9926

🏆  Mejor modelo según CV: HyT-Net_Propuesto (Val=0.998±0.001, Test≈0.998)

In [None]:
df.to_csv('df_12_05.csv')
