# Notebook 04: Entrenamiento y Comparación de Modelos

Este notebook entrena y compara múltiples arquitecturas de CNN para reconocimiento de señas ASL:

**Modelos a entrenar:**
1. SimpleCNN (Baseline)
2. ImprovedCNN (Con BatchNormalization)
3. DeepCNN (Arquitectura más profunda)
4. EfficientNetB0 (Transfer Learning)
5. MobileNetV2 (Transfer Learning - Ligero)
6. ResNet50 (Transfer Learning - Residual)

## 1. Configuración del Entorno

In [None]:
# Detectar si estamos en Colab
import sys

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Ejecutando en Google Colab")
    print("Verificando GPU disponible...")
    !nvidia-smi
else:
    print("Ejecutando en entorno local")

### 1.1. Instalación de Dependencias (Solo en Colab)

In [None]:
if IN_COLAB:
    print("Instalando dependencias...")
    !pip install -q kaggle tensorflow opencv-python scikit-learn seaborn plotly
    print("Dependencias instaladas correctamente")

### 1.2. Configuración de Rutas y Estructura

In [None]:
import os
from pathlib import Path

if IN_COLAB:
    # En Colab, usar /content como raíz
    BASE_DIR = Path('/content/asl_project')
    BASE_DIR.mkdir(exist_ok=True)
    os.chdir(BASE_DIR)
else:
    # En local, usar directorio del proyecto
    BASE_DIR = Path.cwd().parent
    os.chdir(BASE_DIR)

print(f"Directorio base: {BASE_DIR}")

# Crear estructura de directorios
DIRS = {
    'data_raw': BASE_DIR / 'data' / 'raw',
    'data_processed': BASE_DIR / 'data' / 'processed',
    'models': BASE_DIR / 'models',
    'results': BASE_DIR / 'results',
    'results_figures': BASE_DIR / 'results' / 'figures',
    'results_reports': BASE_DIR / 'results' / 'reports',
    'src': BASE_DIR / 'src',
    'src_models': BASE_DIR / 'src' / 'models',
    'src_utils': BASE_DIR / 'src' / 'utils',
}

for dir_name, dir_path in DIRS.items():
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"✓ {dir_name}: {dir_path}")

# Agregar src al path para imports
sys.path.insert(0, str(DIRS['src']))

### 1.3. Configuración de Kaggle (si es necesario descargar datos)

In [None]:
if IN_COLAB:
    from google.colab import files
    import json
    
    # Verificar si ya tenemos datos
    if not (DIRS['data_raw'] / 'sign_mnist_train.csv').exists():
        print("Datos no encontrados. Subir kaggle.json para descargar...")
        print("O ejecutar notebook 01_descarga_datos.ipynb primero")
        
        # Opcional: subir kaggle.json
        # uploaded = files.upload()
        # if 'kaggle.json' in uploaded:
        #     !mkdir -p ~/.kaggle
        #     !mv kaggle.json ~/.kaggle/
        #     !chmod 600 ~/.kaggle/kaggle.json
    else:
        print("Datos encontrados")

## 2. Importar Librerías

In [None]:
# Configuración de warnings
import warnings
warnings.filterwarnings('ignore')

# Core
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

# Métricas y utilidades
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report, 
    confusion_matrix,
    accuracy_score,
    precision_recall_fscore_support
)

# Utilidades
import json
import pickle
from datetime import datetime
from tqdm.auto import tqdm
import time

# Configuración de visualización
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

# Configuración de TensorFlow
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

# Verificar GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"\n✓ GPU disponible: {len(gpus)} dispositivo(s)")
    for gpu in gpus:
        print(f"  - {gpu}")
        tf.config.experimental.set_memory_growth(gpu, True)
else:
    print("\n⚠ No se detectó GPU, usando CPU")

# Semillas para reproducibilidad
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("\n✓ Librerías importadas correctamente")

## 3. Cargar Datos Preprocesados

Cargamos los datos que fueron preprocesados en el notebook 03.

In [None]:
# Verificar archivos preprocesados
processed_files = [
    'X_train.npy',
    'X_val.npy', 
    'X_test.npy',
    'y_train.npy',
    'y_val.npy',
    'y_test.npy'
]

all_files_exist = all(
    (DIRS['data_processed'] / f).exists() 
    for f in processed_files
)

if not all_files_exist:
    print("⚠ Archivos preprocesados no encontrados.")
    print("Por favor ejecuta el notebook 03_preprocesamiento_avanzado.ipynb primero.")
    print("\nCargando datos raw como alternativa...")
    
    # Cargar datos raw
    train_df = pd.read_csv(DIRS['data_raw'] / 'sign_mnist_train.csv')
    test_df = pd.read_csv(DIRS['data_raw'] / 'sign_mnist_test.csv')
    
    # Separar features y labels
    X_train_raw = train_df.drop('label', axis=1).values
    y_train_raw = train_df['label'].values
    X_test = test_df.drop('label', axis=1).values
    y_test = test_df['label'].values
    
    # Reshape y normalizar
    X_train_raw = X_train_raw.reshape(-1, 28, 28, 1).astype('float32') / 255.0
    X_test = X_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0
    
    # Split train/val
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_raw, y_train_raw, 
        test_size=0.15, 
        random_state=SEED,
        stratify=y_train_raw
    )
    
    print(f"✓ Datos cargados desde raw")
else:
    # Cargar datos preprocesados
    print("Cargando datos preprocesados...")
    X_train = np.load(DIRS['data_processed'] / 'X_train.npy')
    X_val = np.load(DIRS['data_processed'] / 'X_val.npy')
    X_test = np.load(DIRS['data_processed'] / 'X_test.npy')
    y_train = np.load(DIRS['data_processed'] / 'y_train.npy')
    y_val = np.load(DIRS['data_processed'] / 'y_val.npy')
    y_test = np.load(DIRS['data_processed'] / 'y_test.npy')
    print("✓ Datos preprocesados cargados")

# Información de los datos
print(f"\nForma de los datos:")
print(f"  X_train: {X_train.shape}")
print(f"  X_val: {X_val.shape}")
print(f"  X_test: {X_test.shape}")
print(f"  y_train: {y_train.shape}")
print(f"  y_val: {y_val.shape}")
print(f"  y_test: {y_test.shape}")

# Número de clases
NUM_CLASSES = len(np.unique(y_train))
print(f"\nNúmero de clases: {NUM_CLASSES}")

# Convertir labels a one-hot encoding
y_train_cat = to_categorical(y_train, NUM_CLASSES)
y_val_cat = to_categorical(y_val, NUM_CLASSES)
y_test_cat = to_categorical(y_test, NUM_CLASSES)

print("\n✓ Datos listos para entrenamiento")

## 4. Definir Arquitecturas de Modelos

Definimos todas las arquitecturas que vamos a entrenar y comparar.

In [None]:
def create_simple_cnn(input_shape=(28, 28, 1), num_classes=24):
    """
    CNN Simple - Baseline
    
    Arquitectura básica con 2 bloques convolucionales.
    """
    model = models.Sequential([
        # Bloque 1
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Bloque 2
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Capas densas
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ], name='SimpleCNN')
    
    return model


def create_improved_cnn(input_shape=(28, 28, 1), num_classes=24):
    """
    CNN Mejorada con BatchNormalization
    
    Incluye normalización por lotes para acelerar convergencia.
    """
    model = models.Sequential([
        # Bloque 1
        layers.Conv2D(32, (3, 3), padding='same', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Conv2D(32, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Bloque 2
        layers.Conv2D(64, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Conv2D(64, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Bloque 3
        layers.Conv2D(128, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Capas densas
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ], name='ImprovedCNN')
    
    return model


def create_deep_cnn(input_shape=(28, 28, 1), num_classes=24):
    """
    CNN Profunda con Skip Connections
    
    Arquitectura más profunda inspirada en ResNet.
    """
    inputs = layers.Input(shape=input_shape)
    
    # Bloque inicial
    x = layers.Conv2D(32, (3, 3), padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    # Bloque residual 1
    residual = x
    x = layers.Conv2D(32, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(32, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Add()([x, residual])
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)
    
    # Bloque residual 2
    x = layers.Conv2D(64, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    residual = x
    x = layers.Conv2D(64, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(64, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Add()([x, residual])
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)
    
    # Capas finales
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs, name='DeepCNN')
    return model


def create_transfer_learning_model(base_model_name='EfficientNetB0', 
                                   input_shape=(28, 28, 1), 
                                   num_classes=24,
                                   trainable=False):
    """
    Modelo usando Transfer Learning
    
    Args:
        base_model_name: Nombre del modelo base
        input_shape: Forma de entrada
        num_classes: Número de clases
        trainable: Si las capas del modelo base son entrenables
    """
    # Adaptar entrada para modelos preentrenados
    inputs = layers.Input(shape=input_shape)
    
    # Convertir grayscale a RGB (los modelos preentrenados esperan 3 canales)
    x = layers.Conv2D(3, (1, 1), padding='same')(inputs)
    
    # Redimensionar a tamaño mínimo requerido
    if base_model_name in ['EfficientNetB0', 'MobileNetV2']:
        target_size = 32  # Tamaño mínimo aceptable
    else:
        target_size = 32
    
    x = layers.Resizing(target_size, target_size)(x)
    
    # Cargar modelo base
    if base_model_name == 'EfficientNetB0':
        base_model = tf.keras.applications.EfficientNetB0(
            include_top=False,
            weights='imagenet',
            input_shape=(target_size, target_size, 3),
            pooling='avg'
        )
    elif base_model_name == 'MobileNetV2':
        base_model = tf.keras.applications.MobileNetV2(
            include_top=False,
            weights='imagenet',
            input_shape=(target_size, target_size, 3),
            pooling='avg'
        )
    elif base_model_name == 'ResNet50':
        base_model = tf.keras.applications.ResNet50(
            include_top=False,
            weights='imagenet',
            input_shape=(target_size, target_size, 3),
            pooling='avg'
        )
    else:
        raise ValueError(f"Modelo base no reconocido: {base_model_name}")
    
    base_model.trainable = trainable
    
    # Construir modelo completo
    x = base_model(x, training=False)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs, name=f'TL_{base_model_name}')
    return model


print("✓ Arquitecturas definidas")

## 5. Configuración de Entrenamiento

Definimos callbacks, data augmentation y parámetros de entrenamiento.

In [None]:
# Parámetros de entrenamiento
BATCH_SIZE = 128
EPOCHS = 50
LEARNING_RATE = 0.001

print(f"Configuración de entrenamiento:")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Épocas: {EPOCHS}")
print(f"  Learning rate: {LEARNING_RATE}")

In [None]:
def create_callbacks(model_name):
    """
    Crea callbacks para el entrenamiento
    
    Args:
        model_name: Nombre del modelo
    
    Returns:
        Lista de callbacks
    """
    model_path = DIRS['models'] / f"{model_name}.keras"
    
    callbacks_list = [
        # Guardar mejor modelo
        callbacks.ModelCheckpoint(
            str(model_path),
            monitor='val_accuracy',
            save_best_only=True,
            mode='max',
            verbose=1
        ),
        
        # Early stopping
        callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            verbose=1
        ),
        
        # Reducir learning rate
        callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=1
        ),
    ]
    
    return callbacks_list


# Data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.15,
    brightness_range=[0.5, 1.5],  # Importante para webcam
    shear_range=0.1,
    fill_mode='nearest'
)

# Generador de validación (sin augmentation)
val_datagen = ImageDataGenerator()

print("\n✓ Callbacks y data augmentation configurados")

## 6. Función de Entrenamiento

In [None]:
def train_model(model, model_name, X_train, y_train, X_val, y_val, 
                epochs=EPOCHS, batch_size=BATCH_SIZE, use_augmentation=True):
    """
    Entrena un modelo y guarda resultados
    
    Args:
        model: Modelo de Keras
        model_name: Nombre del modelo
        X_train, y_train: Datos de entrenamiento
        X_val, y_val: Datos de validación
        epochs: Número de épocas
        batch_size: Tamaño de batch
        use_augmentation: Usar data augmentation
    
    Returns:
        history: Historial de entrenamiento
    """
    print(f"\n{'='*60}")
    print(f"Entrenando: {model_name}")
    print(f"{'='*60}")
    
    # Compilar modelo
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Mostrar resumen
    print(f"\nParámetros totales: {model.count_params():,}")
    
    # Crear callbacks
    callbacks_list = create_callbacks(model_name)
    
    # Entrenar
    start_time = time.time()
    
    if use_augmentation:
        # Con data augmentation
        train_generator = train_datagen.flow(
            X_train, y_train, 
            batch_size=batch_size
        )
        
        history = model.fit(
            train_generator,
            steps_per_epoch=len(X_train) // batch_size,
            epochs=epochs,
            validation_data=(X_val, y_val),
            callbacks=callbacks_list,
            verbose=1
        )
    else:
        # Sin data augmentation
        history = model.fit(
            X_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(X_val, y_val),
            callbacks=callbacks_list,
            verbose=1
        )
    
    training_time = time.time() - start_time
    
    print(f"\n✓ Entrenamiento completado en {training_time/60:.2f} minutos")
    
    # Guardar historial
    history_path = DIRS['results_reports'] / f'{model_name}_history.json'
    with open(history_path, 'w') as f:
        json.dump({
            'history': str(history.history),
            'training_time': training_time,
            'epochs_trained': len(history.history['loss'])
        }, f)
    
    return history


print("✓ Función de entrenamiento definida")

## 7. Entrenar Modelos

Entrenamos todos los modelos definidos.

### 7.1. SimpleCNN (Baseline)

In [None]:
# Crear y entrenar SimpleCNN
model_simple = create_simple_cnn()
model_simple.summary()

history_simple = train_model(
    model_simple, 
    'SimpleCNN',
    X_train, y_train_cat,
    X_val, y_val_cat
)

### 7.2. ImprovedCNN

In [None]:
# Crear y entrenar ImprovedCNN
model_improved = create_improved_cnn()
model_improved.summary()

history_improved = train_model(
    model_improved,
    'ImprovedCNN',
    X_train, y_train_cat,
    X_val, y_val_cat
)

### 7.3. DeepCNN

In [None]:
# Crear y entrenar DeepCNN
model_deep = create_deep_cnn()
model_deep.summary()

history_deep = train_model(
    model_deep,
    'DeepCNN',
    X_train, y_train_cat,
    X_val, y_val_cat
)

### 7.4. Transfer Learning - EfficientNetB0

In [None]:
# Crear y entrenar EfficientNetB0
model_efficient = create_transfer_learning_model(
    base_model_name='EfficientNetB0',
    trainable=False
)
model_efficient.summary()

history_efficient = train_model(
    model_efficient,
    'TL_EfficientNetB0',
    X_train, y_train_cat,
    X_val, y_val_cat,
    use_augmentation=True
)

### 7.5. Transfer Learning - MobileNetV2

In [None]:
# Crear y entrenar MobileNetV2
model_mobile = create_transfer_learning_model(
    base_model_name='MobileNetV2',
    trainable=False
)
model_mobile.summary()

history_mobile = train_model(
    model_mobile,
    'TL_MobileNetV2',
    X_train, y_train_cat,
    X_val, y_val_cat,
    use_augmentation=True
)

### 7.6. Transfer Learning - ResNet50

In [None]:
# Crear y entrenar ResNet50
model_resnet = create_transfer_learning_model(
    base_model_name='ResNet50',
    trainable=False
)
model_resnet.summary()

history_resnet = train_model(
    model_resnet,
    'TL_ResNet50',
    X_train, y_train_cat,
    X_val, y_val_cat,
    use_augmentation=True
)

## 8. Evaluación en Conjunto de Test

In [None]:
# Lista de modelos entrenados
model_names = [
    'SimpleCNN',
    'ImprovedCNN', 
    'DeepCNN',
    'TL_EfficientNetB0',
    'TL_MobileNetV2',
    'TL_ResNet50'
]

# Evaluar cada modelo
results = []

for model_name in model_names:
    model_path = DIRS['models'] / f"{model_name}.keras"
    
    if not model_path.exists():
        print(f"⚠ Modelo no encontrado: {model_name}")
        continue
    
    print(f"\nEvaluando {model_name}...")
    
    # Cargar modelo
    model = keras.models.load_model(str(model_path))
    
    # Evaluar
    test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=0)
    
    # Predicciones
    y_pred = model.predict(X_test, verbose=0)
    y_pred_classes = np.argmax(y_pred, axis=1)
    
    # Métricas
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_test, y_pred_classes, average='weighted'
    )
    
    results.append({
        'Modelo': model_name,
        'Test Loss': test_loss,
        'Test Accuracy': test_acc,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    })
    
    print(f"  Test Accuracy: {test_acc:.4f}")
    print(f"  F1-Score: {f1:.4f}")

# Crear DataFrame de resultados
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('Test Accuracy', ascending=False)

print("\n" + "="*60)
print("RESULTADOS FINALES")
print("="*60)
print(results_df.to_string(index=False))

# Guardar resultados
results_df.to_csv(DIRS['results_reports'] / 'model_comparison.csv', index=False)
print(f"\n✓ Resultados guardados en {DIRS['results_reports'] / 'model_comparison.csv'}")

## 9. Visualización de Resultados

In [None]:
# Gráfico de comparación de modelos
fig = go.Figure()

# Accuracy
fig.add_trace(go.Bar(
    name='Test Accuracy',
    x=results_df['Modelo'],
    y=results_df['Test Accuracy'],
    marker_color='lightblue'
))

# F1-Score
fig.add_trace(go.Bar(
    name='F1-Score',
    x=results_df['Modelo'],
    y=results_df['F1-Score'],
    marker_color='lightcoral'
))

fig.update_layout(
    title='Comparación de Modelos - Test Set',
    xaxis_title='Modelo',
    yaxis_title='Score',
    barmode='group',
    template='plotly_white',
    height=500
)

fig.show()

# Guardar gráfico
fig.write_html(str(DIRS['results_figures'] / 'model_comparison.html'))