In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Pre-procesamiento de DAiSEE (Engagement) con EfficientNet-B0 + TensorFlow 2.x
Autor: (tu nombre)
"""

from utils.extraction import *
import os, cv2, glob
import numpy as np
import tensorflow as tf
tf.get_logger().setLevel("ERROR")

2025-05-12 20:10:50.390060: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747098650.415022  410401 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747098650.422066  410401 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747098650.440267  410401 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747098650.441034  410401 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747098650.441037  410401 computation_placer.cc:177] computation placer alr

In [None]:
# ---------- 1.  Carga de EfficientNet solo una vez ------------------------
base_model = tf.keras.applications.EfficientNetB0(
    include_top=False, pooling="avg", weights="imagenet", input_shape=TARGET_SIZE + (3,)
)
preprocess = tf.keras.applications.efficientnet.preprocess_input

# ---------- 2.  Utilidades -------------------------------------------------
def sample_frame_indices(num_total, num_needed):
    """Devuelve índices equiespaciados (redondeados)"""
    if num_total <= num_needed:
        return list(range(num_total))  # no subsampling
    step = num_total / num_needed
    return [int(step * i) for i in range(num_needed)]

def load_and_preprocess_frames(video_path, num_frames=NUM_FRAMES):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise IOError(f"Imposible abrir {video_path}")

    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    idxs  = sample_frame_indices(total, num_frames)
    frames = []

    for i in range(total):
        ret, frame = cap.read()
        if not ret: break
        if i in idxs:
            # BGR → RGB
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Resize manteniendo aspecto (letterbox + crop/resize rápido)
            frame = tf.image.resize(frame, TARGET_SIZE, method="bilinear").numpy()
            frames.append(frame)
    cap.release()

    if len(frames) < num_frames:   # padding si un vídeo es muy corto
        pad = np.repeat(frames[-1][None, ...], num_frames - len(frames), axis=0)
        frames = np.concatenate([np.asarray(frames), pad], axis=0)
    else:
        frames = np.asarray(frames[:num_frames])

    frames = preprocess(frames.astype("float32"))
    return frames  # shape: (num_frames, H, W, 3)

def extract_features(frames):
    # Procesamos de golpe para velocidad (GPU friendly)
    feats = base_model.predict(frames, verbose=0)
    return feats.astype(FEATURE_DTYPE)  # shape: (num_frames, feat_dim)

def ensure_dir(path):
    os.makedirs(os.path.dirname(path), exist_ok=True)

# ---------- 3.  Bucle principal de extracción -----------------------------
for split in SPLITS:
    print(f"\n=== Procesando split: {split} ===")
    for cls in CLASSES:
        pattern = os.path.join(ROOT_DIR, cls, split, "*.[aA][vV][iI]")
        pattern_mp4 = os.path.join(ROOT_DIR, cls, split, "*.[mM][pP]4")
        video_files = glob.glob(pattern) + glob.glob(pattern_mp4)

        for vid in video_files:
            rel   = os.path.relpath(vid, ROOT_DIR)
            feat_path = os.path.join(OUTPUT_FEATURES, os.path.splitext(rel)[0] + ".npy")
            if os.path.exists(feat_path):   # omitir si ya se procesó
                continue

            try:
                frames  = load_and_preprocess_frames(vid)
                feats   = extract_features(frames)  # (NUM_FRAMES, 1280)
                ensure_dir(feat_path)
                np.save(feat_path, feats)
                print(f"✓ {rel} -> {feat_path}")
            except Exception as e:
                print(f"✗ {rel} -> error: {e}")

✓ 2/test/2100511059.avi -> ../../DAiSEE/features/2/test/2100511059.npy
✓ 2/test/3100771071.avi -> ../../DAiSEE/features/2/test/3100771071.npy
✓ 2/test/4100251011.avi -> ../../DAiSEE/features/2/test/4100251011.npy
✓ 2/test/3100752057.avi -> ../../DAiSEE/features/2/test/3100752057.npy
✓ 2/test/5100422058.avi -> ../../DAiSEE/features/2/test/5100422058.npy
✓ 2/test/3422270153.avi -> ../../DAiSEE/features/2/test/3422270153.npy
✓ 2/test/2100561079.avi -> ../../DAiSEE/features/2/test/2100561079.npy
✓ 2/test/4100322041.avi -> ../../DAiSEE/features/2/test/4100322041.npy
✓ 2/test/2100521013.avi -> ../../DAiSEE/features/2/test/2100521013.npy
✓ 2/test/240846023.avi -> ../../DAiSEE/features/2/test/240846023.npy
✓ 2/test/1100171002.avi -> ../../DAiSEE/features/2/test/1100171002.npy
✓ 2/test/3100822059.avi -> ../../DAiSEE/features/2/test/3100822059.npy
✓ 2/test/2100571057.avi -> ../../DAiSEE/features/2/test/2100571057.npy
✓ 2/test/4000231010.avi -> ../../DAiSEE/features/2/test/4000231010.npy
✓ 2/test

2025-05-12 19:44:08.502225: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [8]:
def make_dataset(split, shuffle=True, balance=True):
    datasets = []
    for cls in CLASSES:
        pattern = os.path.join(OUTPUT_FEATURES, cls, split, "*.npy")
        files_ds = tf.data.Dataset.list_files(pattern, shuffle=shuffle)

        def _load(path):
            # (NUM_FRAMES, 1280), label
            x = tf.numpy_function(np.load, [path], tf.float32)
            x.set_shape((NUM_FRAMES, 1280))
            y = tf.strings.split(path, os.sep)[-3]
            y = tf.strings.to_number(y, out_type=tf.int32)
            return x, y

        # Repetimos para no quedarnos sin datos cuando la clase minoritaria se agote
        ds = files_ds.map(_load, num_parallel_calls=tf.data.AUTOTUNE).repeat()
        datasets.append(ds)

    if balance:
        # Muestreo balanceado **por ejemplo** (no por batch)
        mixed = tf.data.Dataset.sample_from_datasets(
            datasets,
            weights=[1/len(CLASSES)]*len(CLASSES),
            stop_on_empty_dataset=False,
        )
    else:
        mixed = datasets[0]
        for extra in datasets[1:]:
            mixed = mixed.concatenate(extra)

    return (
        mixed
        .shuffle(1000)             # mezcla adicional
        .batch(BATCH_SIZE, drop_remainder=True)
        .prefetch(tf.data.AUTOTUNE)
    )

In [9]:
# Ejemplo de uso en tu ciclo de entrenamiento
train_ds = make_dataset("train", balance=True)
val_ds   = make_dataset("validation", balance=False)

for xb, yb in train_ds.take(5):
    print("Batch X:", xb.shape, "Batch y:", yb.numpy())

Batch X: (32, 32, 1280) Batch y: [0 2 0 2 2 1 0 2 3 1 2 2 0 2 1 2 2 2 0 2 2 0 1 1 0 0 0 0 2 0 3 3]
Batch X: (32, 32, 1280) Batch y: [0 2 3 2 0 0 1 0 3 0 3 2 3 3 3 1 2 0 1 2 1 2 2 1 2 0 2 3 2 3 2 3]
Batch X: (32, 32, 1280) Batch y: [0 2 0 2 1 0 2 1 3 1 2 1 3 1 3 3 0 1 3 3 3 3 0 1 0 1 0 1 3 1 0 1]
Batch X: (32, 32, 1280) Batch y: [0 1 0 2 0 2 1 3 3 2 0 3 1 2 3 1 3 2 1 0 1 0 1 3 0 1 3 0 0 0 3 0]
Batch X: (32, 32, 1280) Batch y: [3 1 1 1 2 1 1 2 0 2 0 1 1 2 1 3 3 0 3 3 0 3 3 2 1 1 3 3 1 2 1 0]


2025-05-12 20:12:49.365229: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
