In [None]:
import os
# # Point to the correct libdevice location (adjust path if needed)
os.environ["XLA_FLAGS"] = "--xla_gpu_cuda_data_dir=/usr/local/cuda/nvvm/libdevice"
# # export TF_XLA_FLAGS="--xla_gpu_cuda_data_dir=/usr/local/cuda/nvvm/libdevice"
# os.environ["TF_XLA_FLAGS"] = "--xla_gpu_cuda_data_dir=/usr/local/cuda/nvvm/libdevice"

In [7]:
# Place this at the top of your notebook
import tensorflow as tf

# Use standard precision instead of mixed
tf.keras.mixed_precision.set_global_policy('float32')

print("Using standard float32 precision")

print("TensorFlow version:", tf.__version__)

# Listar dispositivos físicos tipo GPU
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    print(f"✅ {len(gpus)} GPU(s) detectada(s):")
    for gpu in gpus:
        print(f"  - {gpu.name}")
else:
    print("❌ No se detectó ninguna GPU.")


Using standard float32 precision
TensorFlow version: 2.19.0
✅ 1 GPU(s) detectada(s):
  - /physical_device:GPU:0


In [3]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications.efficientnet import preprocess_input

# ---------------------------
# Parámetros de preprocessing
# ---------------------------
ROOT_DIR   = "../../DAiSEE/DataSet/Aug"
CLASSES    = [0, 1, 2, 3]
SPLITS     = ['train', 'validation', 'test']
IMG_SIZE   = 100       # tamaño de entrada de EfficientNet
MAX_FRAMES = 15     # número máximo de frames por vídeo
BATCH_SIZE = 4


def load_video(path, max_frames=MAX_FRAMES, img_size=IMG_SIZE):
    """
    Lee un vídeo (avi/mp4), extrae hasta max_frames, 
    redimensiona y normaliza.
    """
    try:
        cap = cv2.VideoCapture(path)
        if not cap.isOpened():
            print(f"Warning: Could not open video file {path}")
            # Return empty frame array if video can't be opened
            return np.zeros((max_frames, img_size, img_size, 3), dtype=np.float32)
            
        frames = []
        while len(frames) < max_frames:
            ret, frame = cap.read()
            if not ret:
                break
            # BGR -> RGB
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # resize a (img_size, img_size)
            frame = cv2.resize(frame, (img_size, img_size))
            frames.append(frame)
        cap.release()
        
        # si faltan frames, rellenar con ceros
        if len(frames) < max_frames:
            pad_count = max_frames - len(frames)
            pad_frame = np.zeros((img_size, img_size, 3), dtype=np.uint8)
            frames.extend([pad_frame] * pad_count)
        
        # array y preprocesado EfficientNet
        frames = np.array(frames, dtype=np.float32)
        frames = preprocess_input(frames)
        return frames  # shape = (MAX_FRAMES, IMG_SIZE, IMG_SIZE, 3)
    except Exception as e:
        print(f"Error processing video {path}: {e}")
        return np.zeros((max_frames, img_size, img_size, 3), dtype=np.float32)


def make_dataset(root_dir, split):
    """
    Crea un tf.data.Dataset para el split indicado.
    Cada elemento es (video_frames, label).
    """
    video_paths = []
    labels = []
    total_videos = 0
    
    for cls in CLASSES:
        folder = os.path.join(root_dir, str(cls), split)
        print(f"Procesando carpeta: {folder}")
        
        if not os.path.isdir(folder):
            print(f"⚠️ Carpeta no existe: {folder}")
            continue
            
        cls_videos = 0
        for fname in os.listdir(folder):
            if fname.lower().endswith(('.mp4', '.avi')):
                video_path = os.path.join(folder, fname)
                if os.path.getsize(video_path) > 0:  # Check file is not empty
                    video_paths.append(video_path)
                    labels.append(cls)
                    cls_videos += 1
                else:
                    print(f"⚠️ Archivo vacío ignorado: {video_path}")
        
        total_videos += cls_videos
        print(f"  → {cls_videos} videos encontrados para clase {cls}")
    
    print(f"Total de videos para {split}: {total_videos}")
    
    if not video_paths:
        print(f"⚠️ No se encontraron videos para {split}! Creando dataset vacío.")
        # Create an empty dataset with the correct signature
        dummy_frames = np.zeros((1, MAX_FRAMES, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32)
        dummy_labels = np.array([0], dtype=np.int32)
        return tf.data.Dataset.from_tensor_slices((dummy_frames, dummy_labels)).batch(BATCH_SIZE)

    def generator():
        for i, (path, lbl) in enumerate(zip(video_paths, labels)):
            try:
                frames = load_video(path)
                yield frames, lbl
            except Exception as e:
                print(f"Error en video {i+1}/{len(video_paths)} ({path}): {e}")
                # Yield zeros on error
                yield np.zeros((MAX_FRAMES, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32), lbl

    output_signature = (
        tf.TensorSpec(shape=(MAX_FRAMES, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int32),
    )
    
    ds = tf.data.Dataset.from_generator(generator, output_signature=output_signature)
    
    if split == 'train' and total_videos > 1:
        ds = ds.shuffle(buffer_size=min(100, total_videos))
        
    ds = ds.batch(BATCH_SIZE, drop_remainder=True).prefetch(1)
    return ds

In [4]:
# Crear y validar datasets
train_ds = make_dataset(ROOT_DIR, 'train')
val_ds   = make_dataset(ROOT_DIR, 'validation')
test_ds  = make_dataset(ROOT_DIR, 'test')

Procesando carpeta: ../../DAiSEE/DataSet/Aug/0/train
  → 989 videos encontrados para clase 0
Procesando carpeta: ../../DAiSEE/DataSet/Aug/1/train
  → 1908 videos encontrados para clase 1
Procesando carpeta: ../../DAiSEE/DataSet/Aug/2/train
  → 1050 videos encontrados para clase 2
Procesando carpeta: ../../DAiSEE/DataSet/Aug/3/train
  → 1050 videos encontrados para clase 3
Total de videos para train: 4997
Procesando carpeta: ../../DAiSEE/DataSet/Aug/0/validation
  → 10 videos encontrados para clase 0
Procesando carpeta: ../../DAiSEE/DataSet/Aug/1/validation
  → 68 videos encontrados para clase 1
Procesando carpeta: ../../DAiSEE/DataSet/Aug/2/validation
  → 225 videos encontrados para clase 2
Procesando carpeta: ../../DAiSEE/DataSet/Aug/3/validation
  → 225 videos encontrados para clase 3
Total de videos para validation: 528
Procesando carpeta: ../../DAiSEE/DataSet/Aug/0/test
  → 8 videos encontrados para clase 0
Procesando carpeta: ../../DAiSEE/DataSet/Aug/1/test
  → 69 videos encontrad

I0000 00:00:1747024562.295160   46536 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4080 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1660 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5


In [5]:
# Improved validate_dataset function with better error handling
def validate_dataset(ds, name):
    print(f"Validando dataset '{name}'...")
    
    # First check if dataset is empty
    try:
        # Check if iterator works at all
        iterator = iter(ds)
        
        try:
            # Try to get the first batch with timeout
            batch = next(iterator)
            print(f"  ✅ Dataset '{name}' es válido - forma del batch: {batch[0].shape}")
            print(f"     Etiquetas: {batch[1].numpy()}")
            return True
        except StopIteration:
            print(f"  ⚠️ Dataset '{name}' está vacío (no contiene datos)")
            return False
        except tf.errors.OutOfRangeError:
            print(f"  ⚠️ Dataset '{name}' está vacío (fuera de rango)")
            return False
        except Exception as e:
            print(f"  ❌ Error al obtener primer batch: {e}")
            print(f"  Tipo de error: {type(e).__name__}")
            return False
    except Exception as e:
        print(f"  ❌ Error al crear iterador: {e}")
        print(f"  Tipo de error: {type(e).__name__}")
        return False

# Execute validation with detailed feedback
print("VALIDANDO DATASETS")
print("-----------------")
valid_train = validate_dataset(train_ds, "Training")
valid_val = validate_dataset(val_ds, "Validation") 
valid_test = validate_dataset(test_ds, "Test")
print("-----------------")

# Check dataset access permissions and files
def check_dataset_path(root_dir):
    print(f"\nComprobando directorio de datos: {root_dir}")
    
    if not os.path.exists(root_dir):
        print(f"❌ ERROR: Directorio no existe: {root_dir}")
        return False
    
    print(f"✅ Directorio existe: {root_dir}")
    
    # Check for class directories
    for cls in CLASSES:
        cls_dir = os.path.join(root_dir, str(cls))
        if not os.path.exists(cls_dir):
            print(f"❌ ERROR: Directorio de clase {cls} no existe: {cls_dir}")
            continue
            
        print(f"✅ Directorio de clase {cls} existe")
        
        # Check split directories
        for split in SPLITS:
            split_dir = os.path.join(cls_dir, split)
            if not os.path.exists(split_dir):
                print(f"❌ ERROR: Directorio de split {split} no existe: {split_dir}")
                continue
                
            # Count video files
            video_files = [f for f in os.listdir(split_dir) 
                          if f.lower().endswith(('.mp4', '.avi'))]
            print(f"   - {split}: {len(video_files)} archivos de vídeo")
            
            # Check file sizes
            if video_files:
                sample_file = os.path.join(split_dir, video_files[0])
                size_mb = os.path.getsize(sample_file) / (1024 * 1024)
                print(f"     Ejemplo: {video_files[0]} ({size_mb:.2f} MB)")
    
    return True

# Check dataset structure
check_dataset_path(ROOT_DIR)

VALIDANDO DATASETS
-----------------
Validando dataset 'Training'...
  ✅ Dataset 'Training' es válido - forma del batch: (4, 15, 100, 100, 3)
     Etiquetas: [0 0 0 0]
Validando dataset 'Validation'...
  ✅ Dataset 'Validation' es válido - forma del batch: (4, 15, 100, 100, 3)
     Etiquetas: [0 0 0 0]
Validando dataset 'Test'...
  ✅ Dataset 'Test' es válido - forma del batch: (4, 15, 100, 100, 3)
     Etiquetas: [0 0 0 0]
-----------------

Comprobando directorio de datos: ../../DAiSEE/DataSet/Aug
✅ Directorio existe: ../../DAiSEE/DataSet/Aug
✅ Directorio de clase 0 existe
   - train: 989 archivos de vídeo
     Ejemplo: aug0008-3100822066.avi (10.35 MB)
   - validation: 10 archivos de vídeo
     Ejemplo: 2056010134.avi (0.86 MB)
   - test: 8 archivos de vídeo
     Ejemplo: 4100302064.avi (1.07 MB)
✅ Directorio de clase 1 existe
   - train: 1908 archivos de vídeo
     Ejemplo: aug0004-3100822011.avi (0.39 MB)
   - validation: 68 archivos de vídeo
     Ejemplo: 4000181015.avi (1.26 MB)
 

True

In [6]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, Model
import tensorflow as tf
# Disable XLA completely
# tf.config.optimizer.set_jit(False)
print("\nCreando modelo con Transformer...")

# Capa de extracción de características 
# base_cnn = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg')
base_cnn = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, pooling='avg', input_shape=(IMG_SIZE, IMG_SIZE, 3), alpha=0.75)
base_cnn.trainable = False

# Arquitectura con Transformer en lugar de LSTM/GRU
video_input = layers.Input(shape=(MAX_FRAMES, IMG_SIZE, IMG_SIZE, 3))
x = layers.TimeDistributed(base_cnn)(video_input)

# Reducir dimensionalidad antes del transformer
x = layers.TimeDistributed(layers.Dense(128, activation='relu'))(x)

# Añadir codificación posicional (importante para Transformer)
pos_encoding = tf.range(start=0, limit=MAX_FRAMES, delta=1)
pos_encoding = tf.expand_dims(pos_encoding, -1)
pos_encoding = tf.cast(pos_encoding, tf.float32)
pos_encoding = tf.tile(pos_encoding, [1, 128])  # Mismo tamaño que la dimensión de características
# Añadir posición a características
x = x + tf.expand_dims(pos_encoding, 0)

# Capa de Transformer (Multi-Head Attention)
attn_output = layers.MultiHeadAttention(
    num_heads=4, key_dim=16
)(x, x)
x = layers.LayerNormalization(epsilon=1e-6)(x + attn_output)

# Feed-forward network
ffn = layers.Dense(512, activation='relu')(x)
ffn = layers.Dense(128)(ffn)
x = layers.LayerNormalization(epsilon=1e-6)(x + ffn)

# Pooling de la secuencia completa
x = layers.GlobalAveragePooling1D()(x)

# Capa final de clasificación
output = layers.Dense(len(CLASSES), activation='softmax')(x)

model = Model(video_input, output)
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])
model.summary()
print("✅ Modelo creado correctamente")
print("\nEntrenando modelo...")

try:
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=5,
        verbose=1,
    )
    print("✅ Entrenamiento completado")
except Exception as e:
    print(f"❌ Error durante entrenamiento: {e}")
    print(f"Tipo de error: {type(e).__name__}")


Creando modelo con Transformer...


  base_cnn = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, pooling='avg', input_shape=(IMG_SIZE, IMG_SIZE, 3), alpha=0.75)


✅ Modelo creado correctamente

Entrenando modelo...
Epoch 1/5


I0000 00:00:1747024629.048080   46653 service.cc:152] XLA service 0x7f0a74052e30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747024629.048152   46653 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce GTX 1660 Ti, Compute Capability 7.5
2025-05-11 23:37:11.143179: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1747024644.509027   46653 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1747024660.299202   46653 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


   1249/Unknown [1m207s[0m 94ms/step - accuracy: 0.9533 - loss: 0.1310

2025-05-11 23:39:38.311438: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-05-11 23:39:38.311520: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 13447698645864955378
2025-05-11 23:39:38.311549: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_4]]


KeyboardInterrupt: 