# üîê Autoencoder - Detecci√≥n de Anomal√≠as y Reducci√≥n Dimensional
## Red Neuronal para Comprimir y Detectar Patrones At√≠picos

---

### Objetivos:
1. Construir Autoencoder para reducci√≥n dimensional
2. Detectar anomal√≠as mediante reconstruction error
3. Visualizar espacio latente con t-SNE
4. Identificar delitos at√≠picos

**Autor**: Adonnay Bazaldua  
**Fecha**: Noviembre 2025

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import numpy as np
import pandas as pd
import pickle
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)
tf.random.set_seed(42)

print(f"‚úÖ TensorFlow version: {tf.__version__}")

In [None]:
# Cargar datos
X_train = np.load('processed_data/X_train.npy')
X_test = np.load('processed_data/X_test.npy')

with open('processed_data/metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

input_dim = X_train.shape[1]
print(f"‚úÖ Datos cargados: {X_train.shape}, features: {input_dim}")

## Construcci√≥n del Autoencoder

### Arquitectura:
```
Encoder:  Input(features) ‚Üí Dense(128) ‚Üí Dense(64) ‚Üí Dense(32) [latent]
Decoder:  Dense(32) ‚Üí Dense(64) ‚Üí Dense(128) ‚Üí Dense(features)
```

In [None]:
# Encoder
encoder_input = layers.Input(shape=(input_dim,))
encoded = layers.Dense(128, activation='relu')(encoder_input)
encoded = layers.Dense(64, activation='relu')(encoded)
latent = layers.Dense(32, activation='relu', name='latent_space')(encoded)

encoder = models.Model(encoder_input, latent, name='encoder')

# Decoder
decoder_input = layers.Input(shape=(32,))
decoded = layers.Dense(64, activation='relu')(decoder_input)
decoded = layers.Dense(128, activation='relu')(decoded)
decoder_output = layers.Dense(input_dim, activation='linear')(decoded)

decoder = models.Model(decoder_input, decoder_output, name='decoder')

# Autoencoder completo
autoencoder_input = layers.Input(shape=(input_dim,))
encoded_out = encoder(autoencoder_input)
decoded_out = decoder(encoded_out)

autoencoder = models.Model(autoencoder_input, decoded_out, name='autoencoder')

autoencoder.compile(optimizer='adam', loss='mse', metrics=['mae'])

print("\nüìä Encoder:")
encoder.summary()
print("\nüìä Decoder:")
decoder.summary()
print(f"\nüìä Autoencoder completo: {autoencoder.count_params():,} par√°metros")

In [None]:
# Entrenar (entrada = salida deseada)
print("üöÄ Entrenando Autoencoder...\n")

history = autoencoder.fit(
    X_train, X_train,  # Input = Output
    epochs=50,
    batch_size=256,
    validation_split=0.2,
    callbacks=[keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)],
    verbose=1
)

print("\n‚úÖ Entrenamiento completado")

## Detecci√≥n de Anomal√≠as

Delitos con alto reconstruction error son "an√≥malos"

In [None]:
# Reconstruir test set
X_test_reconstructed = autoencoder.predict(X_test, verbose=0)

# Calcular reconstruction error (MSE por muestra)
reconstruction_error = np.mean(np.square(X_test - X_test_reconstructed), axis=1)

# Definir threshold: media + 2*std
threshold = reconstruction_error.mean() + 2 * reconstruction_error.std()

anomalies = reconstruction_error > threshold
num_anomalies = anomalies.sum()

print(f"\nüîç Detecci√≥n de Anomal√≠as:")
print(f"   Threshold: {threshold:.6f}")
print(f"   Anomal√≠as detectadas: {num_anomalies} ({num_anomalies/len(X_test)*100:.2f}%)")

# Visualizar distribuci√≥n de errores
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(reconstruction_error, bins=50, edgecolor='black', alpha=0.7)
plt.axvline(threshold, color='red', linestyle='--', linewidth=2, label=f'Threshold: {threshold:.4f}')
plt.xlabel('Reconstruction Error (MSE)')
plt.ylabel('Frecuencia')
plt.title('Distribuci√≥n de Reconstruction Error')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.scatter(range(len(reconstruction_error)), reconstruction_error, 
            c=anomalies, cmap='coolwarm', s=10, alpha=0.6)
plt.axhline(threshold, color='red', linestyle='--', linewidth=2)
plt.xlabel('√çndice de Muestra')
plt.ylabel('Reconstruction Error')
plt.title('Anomal√≠as Detectadas (Rojo)')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('models/autoencoder_anomalies.png', dpi=300)
plt.show()

## Visualizaci√≥n del Espacio Latente con t-SNE

In [None]:
# Obtener representaci√≥n en espacio latente
latent_representation = encoder.predict(X_test[:5000], verbose=0)  # Muestra para velocidad

# Aplicar t-SNE para reducir a 2D
print("üîÑ Aplicando t-SNE (esto puede tardar)...")
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
latent_2d = tsne.fit_transform(latent_representation)

# Visualizar
y_test = np.load('processed_data/y_test.npy')

plt.figure(figsize=(12, 10))
scatter = plt.scatter(latent_2d[:, 0], latent_2d[:, 1], 
                      c=y_test[:5000], cmap='tab20', s=10, alpha=0.6)
plt.colorbar(scatter, label='Clase de Delito')
plt.xlabel('t-SNE Dimensi√≥n 1')
plt.ylabel('t-SNE Dimensi√≥n 2')
plt.title('Visualizaci√≥n del Espacio Latente (32D ‚Üí 2D)', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('models/autoencoder_tsne.png', dpi=300)
plt.show()

print("‚úÖ Visualizaci√≥n completada")

In [None]:
# Guardar modelos
autoencoder.save('models/autoencoder_final.keras')
encoder.save('models/encoder.keras')
decoder.save('models/decoder.keras')

# Guardar resultados
with open('models/autoencoder_results.pkl', 'wb') as f:
    pickle.dump({
        'threshold': threshold,
        'num_anomalies': int(num_anomalies),
        'anomaly_percentage': float(num_anomalies/len(X_test)*100),
        'mean_reconstruction_error': float(reconstruction_error.mean()),
        'num_parameters': autoencoder.count_params()
    }, f)

print("\n‚úÖ Autoencoder guardado")
print("\nüìù Pr√≥ximo paso: An√°lisis comparativo de todos los modelos")
print("   ‚Üí Notebook: 07_Comparative_Analysis.ipynb")