# Notebook 05: Fine-Tuning y Optimización del Mejor Modelo

Este notebook realiza fine-tuning del mejor modelo identificado en el notebook anterior.

**Objetivos:**
1. Cargar el mejor modelo entrenado
2. Realizar fine-tuning descongelando capas del modelo base
3. Experimentar con diferentes configuraciones de fine-tuning
4. Optimizar hiperparámetros (learning rate, batch size, etc.)
5. Evaluar mejoras en rendimiento
6. Generar modelo final optimizado

## 1. Configuración del Entorno

In [None]:
# Detectar si estamos en Colab
import sys

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Ejecutando en Google Colab")
    print("Verificando GPU disponible...")
    !nvidia-smi
else:
    print("Ejecutando en entorno local")

### 1.1. Instalación de Dependencias (Solo en Colab)

In [None]:
if IN_COLAB:
    print("Instalando dependencias...")
    !pip install -q tensorflow opencv-python scikit-learn seaborn plotly optuna
    print("Dependencias instaladas correctamente")

### 1.2. Configuración de Rutas

In [None]:
import os
from pathlib import Path

if IN_COLAB:
    BASE_DIR = Path('/content/asl_project')
    BASE_DIR.mkdir(exist_ok=True)
    os.chdir(BASE_DIR)
else:
    BASE_DIR = Path.cwd().parent
    os.chdir(BASE_DIR)

print(f"Directorio base: {BASE_DIR}")

# Crear estructura de directorios
DIRS = {
    'data_processed': BASE_DIR / 'data' / 'processed',
    'models': BASE_DIR / 'models',
    'results': BASE_DIR / 'results',
    'results_figures': BASE_DIR / 'results' / 'figures',
    'results_reports': BASE_DIR / 'results' / 'reports',
}

for dir_name, dir_path in DIRS.items():
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"✓ {dir_name}: {dir_path}")

## 2. Importar Librerías

In [None]:
# Configuración de warnings
import warnings
warnings.filterwarnings('ignore')

# Core
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

# Métricas
from sklearn.metrics import (
    classification_report, 
    confusion_matrix,
    accuracy_score,
    precision_recall_fscore_support
)

# Utilidades
import json
import time
from datetime import datetime
from tqdm.auto import tqdm

# Configuración de visualización
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

# Configuración de TensorFlow
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

# Verificar GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"\n✓ GPU disponible: {len(gpus)} dispositivo(s)")
    for gpu in gpus:
        print(f"  - {gpu}")
        tf.config.experimental.set_memory_growth(gpu, True)
else:
    print("\n⚠ No se detectó GPU, usando CPU")

# Semillas
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("\n✓ Librerías importadas correctamente")

## 3. Cargar Datos

In [None]:
# Cargar datos preprocesados
print("Cargando datos...")

X_train = np.load(DIRS['data_processed'] / 'X_train.npy')
X_val = np.load(DIRS['data_processed'] / 'X_val.npy')
X_test = np.load(DIRS['data_processed'] / 'X_test.npy')
y_train = np.load(DIRS['data_processed'] / 'y_train.npy')
y_val = np.load(DIRS['data_processed'] / 'y_val.npy')
y_test = np.load(DIRS['data_processed'] / 'y_test.npy')

print(f"\nForma de los datos:")
print(f"  X_train: {X_train.shape}")
print(f"  X_val: {X_val.shape}")
print(f"  X_test: {X_test.shape}")

# Número de clases
NUM_CLASSES = len(np.unique(y_train))
print(f"\nNúmero de clases: {NUM_CLASSES}")

# Convertir a one-hot
y_train_cat = to_categorical(y_train, NUM_CLASSES)
y_val_cat = to_categorical(y_val, NUM_CLASSES)
y_test_cat = to_categorical(y_test, NUM_CLASSES)

print("\n✓ Datos cargados correctamente")

## 4. Identificar y Cargar Mejor Modelo

Identificamos el mejor modelo del entrenamiento anterior.

In [None]:
# Cargar resultados de comparación
comparison_path = DIRS['results_reports'] / 'model_comparison.csv'

if comparison_path.exists():
    results_df = pd.read_csv(comparison_path)
    results_df = results_df.sort_values('Test Accuracy', ascending=False)
    
    print("Resultados de modelos entrenados:")
    print(results_df.to_string(index=False))
    
    best_model_name = results_df.iloc[0]['Modelo']
    best_accuracy = results_df.iloc[0]['Test Accuracy']
    
    print(f"\nMejor modelo identificado: {best_model_name}")
    print(f"Accuracy en test: {best_accuracy:.4f}")
else:
    print("⚠ No se encontraron resultados previos.")
    print("Ejecuta el notebook 04_entrenamiento_modelos.ipynb primero.")
    
    # Selección manual
    best_model_name = 'TL_EfficientNetB0'  # Cambiar según sea necesario
    print(f"\nUsando modelo: {best_model_name}")

In [None]:
# Cargar el mejor modelo
model_path = DIRS['models'] / f"{best_model_name}.keras"

if not model_path.exists():
    print(f"Error: Modelo no encontrado en {model_path}")
    print("Verifica que el modelo haya sido entrenado.")
else:
    print(f"Cargando modelo desde: {model_path}")
    base_model = keras.models.load_model(str(model_path))
    
    print("\n✓ Modelo cargado correctamente")
    print(f"\nResumen del modelo:")
    base_model.summary()

## 5. Estrategia de Fine-Tuning

Vamos a realizar fine-tuning descongelando gradualmente capas del modelo.

### 5.1. Explorar Estructura del Modelo

In [None]:
# Mostrar capas del modelo
print("Capas del modelo:")
print(f"Total de capas: {len(base_model.layers)}\n")

for i, layer in enumerate(base_model.layers):
    print(f"{i}: {layer.name} - Trainable: {layer.trainable}")

### 5.2. Función de Fine-Tuning

In [None]:
def fine_tune_model(base_model, 
                   unfreeze_from_layer=None,
                   learning_rate=1e-5,
                   epochs=20,
                   model_name='fine_tuned'):
    """
    Realiza fine-tuning de un modelo preentrenado
    
    Args:
        base_model: Modelo a hacer fine-tuning
        unfreeze_from_layer: Índice de capa desde donde descongelar (None = todas)
        learning_rate: Tasa de aprendizaje (debe ser baja)
        epochs: Número de épocas
        model_name: Nombre para guardar el modelo
    
    Returns:
        model: Modelo con fine-tuning
        history: Historial de entrenamiento
    """
    print(f"\n{'='*60}")
    print(f"Fine-Tuning: {model_name}")
    print(f"{'='*60}")
    
    # Clonar modelo para no modificar el original
    model = keras.models.clone_model(base_model)
    model.set_weights(base_model.get_weights())
    
    # Descongelar capas
    if unfreeze_from_layer is None:
        # Descongelar todas las capas
        for layer in model.layers:
            layer.trainable = True
        print("Todas las capas descongeladas")
    else:
        # Descongelar desde una capa específica
        for i, layer in enumerate(model.layers):
            if i >= unfreeze_from_layer:
                layer.trainable = True
            else:
                layer.trainable = False
        print(f"Descongeladas capas desde {unfreeze_from_layer}")
    
    # Contar parámetros
    trainable_params = sum(
        np.prod(w.shape) for w in model.trainable_weights
    )
    non_trainable_params = sum(
        np.prod(w.shape) for w in model.non_trainable_weights
    )
    
    print(f"\nParámetros entrenables: {trainable_params:,}")
    print(f"Parámetros no entrenables: {non_trainable_params:,}")
    
    # Recompilar con learning rate bajo
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    print(f"\nLearning rate: {learning_rate}")
    print(f"Épocas: {epochs}")
    
    # Callbacks
    save_path = DIRS['models'] / f"{model_name}.keras"
    
    callbacks_list = [
        callbacks.ModelCheckpoint(
            str(save_path),
            monitor='val_accuracy',
            save_best_only=True,
            mode='max',
            verbose=1
        ),
        callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True,
            verbose=1
        ),
        callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=3,
            min_lr=1e-8,
            verbose=1
        ),
    ]
    
    # Data augmentation
    train_datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.15,
        brightness_range=[0.5, 1.5],
        shear_range=0.1,
        fill_mode='nearest'
    )
    
    train_generator = train_datagen.flow(
        X_train, y_train_cat,
        batch_size=128
    )
    
    # Entrenar
    start_time = time.time()
    
    history = model.fit(
        train_generator,
        steps_per_epoch=len(X_train) // 128,
        epochs=epochs,
        validation_data=(X_val, y_val_cat),
        callbacks=callbacks_list,
        verbose=1
    )
    
    training_time = time.time() - start_time
    print(f"\n✓ Fine-tuning completado en {training_time/60:.2f} minutos")
    
    return model, history


print("✓ Función de fine-tuning definida")

## 6. Experimentos de Fine-Tuning

Realizamos varios experimentos con diferentes configuraciones.

### 6.1. Experimento 1: Fine-Tuning Solo Últimas Capas

In [None]:
# Descongelar solo las últimas 20 capas
total_layers = len(base_model.layers)
unfreeze_from = max(0, total_layers - 20)

model_ft1, history_ft1 = fine_tune_model(
    base_model,
    unfreeze_from_layer=unfreeze_from,
    learning_rate=1e-5,
    epochs=20,
    model_name=f'{best_model_name}_ft_last20'
)

### 6.2. Experimento 2: Fine-Tuning Todas las Capas

In [None]:
# Descongelar todas las capas con learning rate muy bajo
model_ft2, history_ft2 = fine_tune_model(
    base_model,
    unfreeze_from_layer=None,
    learning_rate=5e-6,
    epochs=20,
    model_name=f'{best_model_name}_ft_all'
)

### 6.3. Experimento 3: Fine-Tuning con Learning Rate Schedule

In [None]:
# Fine-tuning con learning rate adaptativo
def lr_schedule(epoch, lr):
    """
    Reduce learning rate gradualmente
    """
    if epoch < 5:
        return 1e-5
    elif epoch < 10:
        return 5e-6
    else:
        return 1e-6

# Crear modelo
model_ft3 = keras.models.clone_model(base_model)
model_ft3.set_weights(base_model.get_weights())

# Descongelar últimas 30 capas
unfreeze_from = max(0, total_layers - 30)
for i, layer in enumerate(model_ft3.layers):
    layer.trainable = i >= unfreeze_from

# Compilar
model_ft3.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
save_path = DIRS['models'] / f"{best_model_name}_ft_schedule.keras"

callbacks_list = [
    callbacks.ModelCheckpoint(
        str(save_path),
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    ),
    callbacks.LearningRateScheduler(lr_schedule, verbose=1),
    callbacks.EarlyStopping(
        monitor='val_loss',
        patience=7,
        restore_best_weights=True,
        verbose=1
    )
]

# Data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.15,
    brightness_range=[0.5, 1.5],
    shear_range=0.1,
    fill_mode='nearest'
)

train_generator = train_datagen.flow(
    X_train, y_train_cat,
    batch_size=128
)

# Entrenar
print("\nExperimento 3: Fine-tuning con learning rate schedule")
history_ft3 = model_ft3.fit(
    train_generator,
    steps_per_epoch=len(X_train) // 128,
    epochs=20,
    validation_data=(X_val, y_val_cat),
    callbacks=callbacks_list,
    verbose=1
)

## 7. Evaluación de Modelos Fine-Tuned

In [None]:
# Evaluar modelo original (baseline)
print("Evaluando modelo original (baseline)...")
baseline_loss, baseline_acc = base_model.evaluate(X_test, y_test_cat, verbose=0)

# Lista de modelos fine-tuned
ft_models = [
    (f"{best_model_name}_ft_last20", "FT últimas 20 capas"),
    (f"{best_model_name}_ft_all", "FT todas las capas"),
    (f"{best_model_name}_ft_schedule", "FT con LR schedule")
]

# Resultados
results = [{
    'Modelo': 'Baseline (sin FT)',
    'Descripción': 'Modelo original',
    'Test Loss': baseline_loss,
    'Test Accuracy': baseline_acc,
    'Mejora': 0.0
}]

# Evaluar cada modelo fine-tuned
for model_name, description in ft_models:
    model_path = DIRS['models'] / f"{model_name}.keras"
    
    if not model_path.exists():
        print(f"⚠ Modelo no encontrado: {model_name}")
        continue
    
    print(f"\nEvaluando {description}...")
    
    # Cargar modelo
    model = keras.models.load_model(str(model_path))
    
    # Evaluar
    test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=0)
    
    # Mejora respecto al baseline
    improvement = (test_acc - baseline_acc) * 100
    
    results.append({
        'Modelo': model_name,
        'Descripción': description,
        'Test Loss': test_loss,
        'Test Accuracy': test_acc,
        'Mejora': improvement
    })
    
    print(f"  Test Accuracy: {test_acc:.4f}")
    print(f"  Mejora: {improvement:+.2f}%")

# Crear DataFrame
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('Test Accuracy', ascending=False)

print("\n" + "="*80)
print("RESULTADOS DE FINE-TUNING")
print("="*80)
print(results_df.to_string(index=False))

# Guardar resultados
results_df.to_csv(DIRS['results_reports'] / 'fine_tuning_comparison.csv', index=False)
print(f"\n✓ Resultados guardados")

## 8. Visualización de Resultados

In [None]:
# Gráfico de comparación
fig = go.Figure()

# Accuracy
fig.add_trace(go.Bar(
    name='Test Accuracy',
    x=results_df['Descripción'],
    y=results_df['Test Accuracy'],
    marker_color='lightblue',
    text=[f"{acc:.4f}" for acc in results_df['Test Accuracy']],
    textposition='outside'
))

fig.update_layout(
    title='Comparación: Baseline vs Fine-Tuning',
    xaxis_title='Configuración',
    yaxis_title='Test Accuracy',
    yaxis_range=[0.85, max(results_df['Test Accuracy']) * 1.05],
    template='plotly_white',
    height=500
)

fig.show()

# Guardar gráfico
fig.write_html(str(DIRS['results_figures'] / 'fine_tuning_comparison.html'))

In [None]:
# Gráfico de mejora porcentual
fig = go.Figure()

fig.add_trace(go.Bar(
    x=results_df['Descripción'],
    y=results_df['Mejora'],
    marker_color=['gray'] + ['green' if x > 0 else 'red' for x in results_df['Mejora'][1:]],
    text=[f"{imp:+.2f}%" for imp in results_df['Mejora']],
    textposition='outside'
))

fig.update_layout(
    title='Mejora Porcentual con Fine-Tuning',
    xaxis_title='Configuración',
    yaxis_title='Mejora (%)',
    template='plotly_white',
    height=500
)

fig.add_hline(y=0, line_dash="dash", line_color="black", opacity=0.5)

fig.show()

# Guardar
fig.write_html(str(DIRS['results_figures'] / 'fine_tuning_improvement.html'))

## 9. Análisis Detallado del Mejor Modelo

In [None]:
# Identificar mejor modelo
best_ft_model = results_df.iloc[0]

print(f"\n{'='*60}")
print("MEJOR MODELO")
print(f"{'='*60}")
print(f"Modelo: {best_ft_model['Descripción']}")
print(f"Test Accuracy: {best_ft_model['Test Accuracy']:.4f}")
print(f"Mejora: {best_ft_model['Mejora']:+.2f}%")

# Cargar mejor modelo
if best_ft_model['Modelo'] == 'Baseline (sin FT)':
    final_model = base_model
else:
    model_path = DIRS['models'] / f"{best_ft_model['Modelo']}.keras"
    final_model = keras.models.load_model(str(model_path))

# Predicciones
y_pred = final_model.predict(X_test, verbose=0)
y_pred_classes = np.argmax(y_pred, axis=1)

# Reporte de clasificación
print("\nReporte de Clasificación:\n")

# Mapeo de labels
label_to_letter = {
    0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
    9: 'K', 10: 'L', 11: 'M', 12: 'N', 13: 'O', 14: 'P', 15: 'Q', 16: 'R',
    17: 'S', 18: 'T', 19: 'U', 20: 'V', 21: 'W', 22: 'X', 23: 'Y'
}

target_names = [label_to_letter[i] for i in range(NUM_CLASSES)]

report = classification_report(y_test, y_pred_classes, target_names=target_names)
print(report)

# Guardar reporte
with open(DIRS['results_reports'] / 'final_model_report.txt', 'w') as f:
    f.write(f"Mejor Modelo: {best_ft_model['Descripción']}\n")
    f.write(f"Test Accuracy: {best_ft_model['Test Accuracy']:.4f}\n\n")
    f.write(report)

## 10. Matriz de Confusión

In [None]:
# Calcular matriz de confusión
cm = confusion_matrix(y_test, y_pred_classes)

# Normalizar
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Plotear
fig = px.imshow(
    cm_normalized,
    labels=dict(x="Predicción", y="Real", color="Proporción"),
    x=target_names,
    y=target_names,
    color_continuous_scale='Blues',
    title='Matriz de Confusión (Normalizada)'
)

fig.update_layout(
    width=800,
    height=800,
    xaxis_title='Predicción',
    yaxis_title='Real'
)

fig.show()

# Guardar
fig.write_html(str(DIRS['results_figures'] / 'confusion_matrix_final.html'))

## 11. Guardar Modelo Final

Guardamos el mejor modelo con un nombre descriptivo.

In [None]:
# Guardar como modelo final
final_model_name = 'asl_model_final_optimized'
final_model_path = DIRS['models'] / f"{final_model_name}.keras"

final_model.save(str(final_model_path))

print(f"\n✓ Modelo final guardado en: {final_model_path}")
print(f"Tamaño del modelo: {final_model_path.stat().st_size / (1024*1024):.2f} MB")

# Guardar metadata
metadata = {
    'model_name': final_model_name,
    'base_architecture': best_model_name,
    'fine_tuning_config': best_ft_model['Descripción'],
    'test_accuracy': float(best_ft_model['Test Accuracy']),
    'improvement': float(best_ft_model['Mejora']),
    'num_classes': NUM_CLASSES,
    'input_shape': [28, 28, 1],
    'training_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'target_names': target_names
}

metadata_path = DIRS['models'] / f"{final_model_name}_metadata.json"
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"\n✓ Metadata guardada en: {metadata_path}")