# üß† Multi-Layer Perceptron (MLP) - Clasificaci√≥n de Delitos
## Red Neuronal Feedforward para Clasificaci√≥n Multi-clase

---

### Objetivos:
1. Construir y entrenar una red MLP para clasificar tipos de delitos
2. Optimizar hiperpar√°metros (capas, neuronas, regularizaci√≥n)
3. Evaluar rendimiento con m√©tricas detalladas
4. Visualizar matriz de confusi√≥n y curvas de aprendizaje
5. Interpretar resultados y features importantes

**Autor**: Adonnay Bazaldua  
**Fecha**: Noviembre 2025

## 1. Importaci√≥n de Librer√≠as

In [None]:
# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical

# Procesamiento de datos
import numpy as np
import pandas as pd
import pickle

# M√©tricas y evaluaci√≥n
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import roc_auc_score, roc_curve

# Visualizaci√≥n
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Utils
import os
import warnings
warnings.filterwarnings('ignore')

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

print(f"‚úÖ TensorFlow version: {tf.__version__}")
print(f"‚úÖ GPU disponible: {tf.config.list_physical_devices('GPU')}")

## 2. Carga de Datos Preprocesados

In [None]:
print("üìÇ Cargando datos preprocesados...\n")

# Cargar conjuntos de datos
X_train = np.load('processed_data/X_train.npy')
X_val = np.load('processed_data/X_val.npy')
X_test = np.load('processed_data/X_test.npy')
y_train = np.load('processed_data/y_train.npy')
y_val = np.load('processed_data/y_val.npy')
y_test = np.load('processed_data/y_test.npy')

# Cargar metadata
with open('processed_data/metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

# Cargar encoders
with open('processed_data/target_encoder.pkl', 'rb') as f:
    target_encoder = pickle.load(f)

with open('processed_data/feature_names.pkl', 'rb') as f:
    feature_names = pickle.load(f)

# Informaci√≥n
num_classes = metadata['num_classes']
num_features = metadata['num_features']

print(f"‚úÖ Datos cargados:")
print(f"   Train: {X_train.shape}")
print(f"   Val:   {X_val.shape}")
print(f"   Test:  {X_test.shape}")
print(f"\n   Features: {num_features}")
print(f"   Clases: {num_classes}")
print(f"\n   Clases disponibles (primeras 10):")
for i, clase in enumerate(target_encoder.classes_[:10], 1):
    print(f"   {i:2d}. {clase}")

## 3. Codificaci√≥n One-Hot de Etiquetas

Para clasificaci√≥n multi-clase con softmax, necesitamos one-hot encoding.

In [None]:
print("üè∑Ô∏è  Codificando etiquetas con One-Hot...\n")

# Convertir a one-hot encoding
y_train_cat = to_categorical(y_train, num_classes=num_classes)
y_val_cat = to_categorical(y_val, num_classes=num_classes)
y_test_cat = to_categorical(y_test, num_classes=num_classes)

print(f"‚úÖ One-Hot Encoding aplicado:")
print(f"   y_train: {y_train.shape} ‚Üí {y_train_cat.shape}")
print(f"   y_val:   {y_val.shape} ‚Üí {y_val_cat.shape}")
print(f"   y_test:  {y_test.shape} ‚Üí {y_test_cat.shape}")

## 4. Construcci√≥n de la Arquitectura MLP

### Arquitectura propuesta:
```
Input(num_features) 
  ‚Üí Dense(256) ‚Üí ReLU ‚Üí BatchNorm ‚Üí Dropout(0.3)
  ‚Üí Dense(128) ‚Üí ReLU ‚Üí BatchNorm ‚Üí Dropout(0.3)
  ‚Üí Dense(64)  ‚Üí ReLU ‚Üí BatchNorm ‚Üí Dropout(0.2)
  ‚Üí Dense(num_classes) ‚Üí Softmax
```

In [None]:
def create_mlp_model(input_dim, num_classes, learning_rate=0.001):
    """
    Crea un modelo MLP para clasificaci√≥n multi-clase.
    
    Args:
        input_dim: N√∫mero de features de entrada
        num_classes: N√∫mero de clases a predecir
        learning_rate: Tasa de aprendizaje
    
    Returns:
        Modelo MLP compilado
    """
    model = models.Sequential([
        # Input layer
        layers.Input(shape=(input_dim,)),
        
        # Hidden layer 1
        layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        
        # Hidden layer 2
        layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        
        # Hidden layer 3
        layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        
        # Output layer
        layers.Dense(num_classes, activation='softmax')
    ], name='MLP_Crime_Classifier')
    
    # Compilar modelo
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy', keras.metrics.TopKCategoricalAccuracy(k=3, name='top_3_accuracy')]
    )
    
    return model

# Crear modelo
print("üèóÔ∏è  Construyendo modelo MLP...\n")
mlp_model = create_mlp_model(input_dim=num_features, num_classes=num_classes)

# Resumen del modelo
mlp_model.summary()

# Contar par√°metros
total_params = mlp_model.count_params()
print(f"\nüìä Total de par√°metros: {total_params:,}")

## 5. Configuraci√≥n de Callbacks

In [None]:
# Crear directorio para modelos
os.makedirs('models', exist_ok=True)

# Callbacks
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-7,
    verbose=1
)

model_checkpoint = callbacks.ModelCheckpoint(
    'models/mlp_best.keras',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

tensorboard_callback = callbacks.TensorBoard(
    log_dir='logs/mlp',
    histogram_freq=1
)

callbacks_list = [early_stopping, reduce_lr, model_checkpoint, tensorboard_callback]

print("‚úÖ Callbacks configurados:")
print("   - EarlyStopping (patience=10)")
print("   - ReduceLROnPlateau (patience=5, factor=0.5)")
print("   - ModelCheckpoint (mejor val_accuracy)")
print("   - TensorBoard (logs/mlp)")

## 6. Entrenamiento del Modelo

In [None]:
print("üöÄ Iniciando entrenamiento...\n")

# Par√°metros de entrenamiento
BATCH_SIZE = 128
EPOCHS = 100

# Entrenar modelo
history = mlp_model.fit(
    X_train, y_train_cat,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val, y_val_cat),
    callbacks=callbacks_list,
    verbose=1
)

print("\n‚úÖ Entrenamiento completado!")

## 7. Visualizaci√≥n de Curvas de Aprendizaje

In [None]:
# Crear visualizaciones
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Loss
axes[0, 0].plot(history.history['loss'], label='Train Loss', linewidth=2)
axes[0, 0].plot(history.history['val_loss'], label='Val Loss', linewidth=2)
axes[0, 0].set_xlabel('√âpoca')
axes[0, 0].set_ylabel('Loss (Categorical Crossentropy)')
axes[0, 0].set_title('Curva de P√©rdida')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Accuracy
axes[0, 1].plot(history.history['accuracy'], label='Train Accuracy', linewidth=2)
axes[0, 1].plot(history.history['val_accuracy'], label='Val Accuracy', linewidth=2)
axes[0, 1].set_xlabel('√âpoca')
axes[0, 1].set_ylabel('Accuracy')
axes[0, 1].set_title('Curva de Precisi√≥n')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Top-3 Accuracy
axes[1, 0].plot(history.history['top_3_accuracy'], label='Train Top-3', linewidth=2)
axes[1, 0].plot(history.history['val_top_3_accuracy'], label='Val Top-3', linewidth=2)
axes[1, 0].set_xlabel('√âpoca')
axes[1, 0].set_ylabel('Top-3 Accuracy')
axes[1, 0].set_title('Top-3 Categorical Accuracy')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Learning Rate (si se redujo)
if 'lr' in history.history:
    axes[1, 1].plot(history.history['lr'], linewidth=2, color='red')
    axes[1, 1].set_xlabel('√âpoca')
    axes[1, 1].set_ylabel('Learning Rate')
    axes[1, 1].set_title('Learning Rate Schedule')
    axes[1, 1].set_yscale('log')
    axes[1, 1].grid(True, alpha=0.3)
else:
    # Comparaci√≥n directa Train vs Val Accuracy
    epochs_range = range(1, len(history.history['accuracy']) + 1)
    axes[1, 1].plot(epochs_range, history.history['accuracy'], 'b-', label='Train', linewidth=2)
    axes[1, 1].plot(epochs_range, history.history['val_accuracy'], 'r-', label='Val', linewidth=2)
    axes[1, 1].fill_between(epochs_range, history.history['accuracy'], 
                            history.history['val_accuracy'], alpha=0.2)
    axes[1, 1].set_xlabel('√âpoca')
    axes[1, 1].set_ylabel('Accuracy')
    axes[1, 1].set_title('Overfitting Analysis')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('models/mlp_training_curves.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Curvas de aprendizaje guardadas en 'models/mlp_training_curves.png'")

## 8. Evaluaci√≥n en Conjunto de Prueba

In [None]:
print("üìä Evaluando modelo en conjunto de prueba...\n")

# Evaluar
test_loss, test_accuracy, test_top3_accuracy = mlp_model.evaluate(X_test, y_test_cat, verbose=0)

print(f"\nüéØ Resultados en Test Set:")
print(f"   Loss: {test_loss:.4f}")
print(f"   Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"   Top-3 Accuracy: {test_top3_accuracy:.4f} ({test_top3_accuracy*100:.2f}%)")

# Predicciones
y_pred_proba = mlp_model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred_proba, axis=1)

# M√©tricas detalladas
print(f"\nüìã Reporte de Clasificaci√≥n (Top 10 clases m√°s frecuentes):\n")

# Obtener las 10 clases m√°s frecuentes en el conjunto de prueba
unique, counts = np.unique(y_test, return_counts=True)
top_10_indices = unique[np.argsort(counts)[-10:]]

# Filtrar predicciones y real para top 10
mask = np.isin(y_test, top_10_indices)
y_test_top10 = y_test[mask]
y_pred_top10 = y_pred[mask]

# Reporte de clasificaci√≥n para top 10
top10_class_names = [target_encoder.classes_[i] for i in top_10_indices]
print(classification_report(y_test_top10, y_pred_top10, 
                          labels=top_10_indices,
                          target_names=top10_class_names,
                          digits=3))

## 9. Matriz de Confusi√≥n (Top 10 Clases)

In [None]:
# Calcular matriz de confusi√≥n para top 10
cm = confusion_matrix(y_test_top10, y_pred_top10, labels=top_10_indices)

# Normalizar por filas (recall)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Visualizar
fig, axes = plt.subplots(1, 2, figsize=(20, 8))

# Matriz absoluta
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=[c[:30] for c in top10_class_names],
            yticklabels=[c[:30] for c in top10_class_names],
            ax=axes[0], cbar_kws={'label': 'N√∫mero de muestras'})
axes[0].set_title('Matriz de Confusi√≥n (Valores Absolutos)\nTop 10 Clases', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Predicci√≥n')
axes[0].set_ylabel('Real')
axes[0].tick_params(axis='x', rotation=45)
axes[0].tick_params(axis='y', rotation=0)

# Matriz normalizada
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='RdYlGn',
            xticklabels=[c[:30] for c in top10_class_names],
            yticklabels=[c[:30] for c in top10_class_names],
            ax=axes[1], cbar_kws={'label': 'Proporci√≥n'})
axes[1].set_title('Matriz de Confusi√≥n Normalizada (Recall)\nTop 10 Clases', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Predicci√≥n')
axes[1].set_ylabel('Real')
axes[1].tick_params(axis='x', rotation=45)
axes[1].tick_params(axis='y', rotation=0)

plt.tight_layout()
plt.savefig('models/mlp_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Matriz de confusi√≥n guardada en 'models/mlp_confusion_matrix.png'")

## 10. An√°lisis de Importancia de Features

In [None]:
print("üîç Analizando importancia de features...\n")

# Obtener pesos de la primera capa
first_layer_weights = mlp_model.layers[0].get_weights()[0]  # Shape: (num_features, 256)

# Calcular importancia como magnitud promedio de los pesos
feature_importance = np.abs(first_layer_weights).mean(axis=1)

# Crear DataFrame
importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': feature_importance
}).sort_values('Importance', ascending=False)

# Top 20 features
top_20_features = importance_df.head(20)

# Visualizar
fig, ax = plt.subplots(figsize=(12, 8))
bars = ax.barh(range(len(top_20_features)), top_20_features['Importance'].values)

# Colorear por tipo
colors = []
for feat in top_20_features['Feature']:
    if any(t in feat for t in ['latitud', 'longitud', 'densidad']):
        colors.append('steelblue')
    elif any(t in feat for t in ['a√±o', 'mes', 'dia', 'hora', 'semana', 'trimestre', 'sin', 'cos']):
        colors.append('coral')
    else:
        colors.append('lightgreen')

for bar, color in zip(bars, colors):
    bar.set_color(color)

ax.set_yticks(range(len(top_20_features)))
ax.set_yticklabels(top_20_features['Feature'].values, fontsize=9)
ax.set_xlabel('Importancia (Magnitud Promedio de Pesos)', fontsize=10)
ax.set_title('Top 20 Features M√°s Importantes\n(MLP - Primera Capa)', 
             fontsize=12, fontweight='bold')
ax.invert_yaxis()
ax.grid(True, alpha=0.3, axis='x')

# Leyenda
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='steelblue', label='Geogr√°ficas'),
    Patch(facecolor='coral', label='Temporales'),
    Patch(facecolor='lightgreen', label='Categ√≥ricas')
]
ax.legend(handles=legend_elements, loc='lower right')

plt.tight_layout()
plt.savefig('models/mlp_feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ An√°lisis de importancia guardado en 'models/mlp_feature_importance.png'")
print("\nüìä Top 10 Features m√°s importantes:")
for i, row in top_20_features.head(10).iterrows():
    print(f"   {row['Feature']:30s}: {row['Importance']:.6f}")

## 11. Guardar Modelo Final y Resultados

In [None]:
print("üíæ Guardando modelo y resultados...\n")

# Guardar modelo completo
mlp_model.save('models/mlp_classifier_final.keras')

# Guardar historial de entrenamiento
with open('models/mlp_history.pkl', 'wb') as f:
    pickle.dump(history.history, f)

# Guardar predicciones
np.save('models/mlp_predictions_test.npy', y_pred)
np.save('models/mlp_probabilities_test.npy', y_pred_proba)

# Guardar resultados
results = {
    'test_loss': test_loss,
    'test_accuracy': test_accuracy,
    'test_top3_accuracy': test_top3_accuracy,
    'num_parameters': total_params,
    'num_epochs_trained': len(history.history['loss']),
    'best_val_accuracy': max(history.history['val_accuracy']),
    'feature_importance': importance_df.to_dict()
}

with open('models/mlp_results.pkl', 'wb') as f:
    pickle.dump(results, f)

print("‚úÖ Archivos guardados:")
print("   - models/mlp_classifier_final.keras")
print("   - models/mlp_best.keras")
print("   - models/mlp_history.pkl")
print("   - models/mlp_predictions_test.npy")
print("   - models/mlp_probabilities_test.npy")
print("   - models/mlp_results.pkl")

## 12. Resumen Final

In [None]:
print("="*80)
print(" "*25 + "RESUMEN DEL MODELO MLP")
print("="*80)

print(f"\nüèóÔ∏è  ARQUITECTURA:")
print(f"   Tipo: Multi-Layer Perceptron (Feedforward)")
print(f"   Capas ocultas: 3 (256 ‚Üí 128 ‚Üí 64 neuronas)")
print(f"   Funci√≥n activaci√≥n: ReLU")
print(f"   Regularizaci√≥n: L2 + Dropout + BatchNormalization")
print(f"   Par√°metros totales: {total_params:,}")

print(f"\nüìä DATOS:")
print(f"   Features de entrada: {num_features}")
print(f"   Clases de salida: {num_classes}")
print(f"   Muestras entrenamiento: {len(X_train):,}")
print(f"   Muestras validaci√≥n: {len(X_val):,}")
print(f"   Muestras prueba: {len(X_test):,}")

print(f"\nüéØ RENDIMIENTO:")
print(f"   Test Accuracy: {test_accuracy*100:.2f}%")
print(f"   Test Top-3 Accuracy: {test_top3_accuracy*100:.2f}%")
print(f"   Test Loss: {test_loss:.4f}")
print(f"   Mejor Val Accuracy: {max(history.history['val_accuracy'])*100:.2f}%")
print(f"   √âpocas entrenadas: {len(history.history['loss'])}")

print(f"\nüîù TOP 3 FEATURES M√ÅS IMPORTANTES:")
for i, row in importance_df.head(3).iterrows():
    print(f"   {i+1}. {row['Feature']}: {row['Importance']:.6f}")

print(f"\n‚úÖ MODELO MLP COMPLETADO Y GUARDADO")
print("\n" + "="*80)

print("\nüìù Pr√≥ximo paso: Implementar LSTM para series temporales")
print("   ‚Üí Notebook: 03_LSTM_TimeSeries.ipynb")