In [None]:
# Importar librerías
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import Point, box
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings("ignore")

# Machine Learning
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GroupKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import (accuracy_score, classification_report, confusion_matrix,
                             mean_squared_error, r2_score, mean_absolute_error)
from sklearn.cluster import KMeans, DBSCAN

# XGBoost
try:
    import xgboost as xgb
    XGBOOST_AVAILABLE = True
    print("XGBoost disponible")
except ImportError:
    XGBOOST_AVAILABLE = False
    print("XGBoost no disponible")

# SHAP para interpretabilidad
try:
    import shap
    SHAP_AVAILABLE = True
    print("SHAP disponible")
except ImportError:
    SHAP_AVAILABLE = False
    print("SHAP no disponible")

# Configuración
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams["figure.figsize"] = (12, 8)
np.random.seed(42)

print("Librerías cargadas")

## 1. Cargar y Preparar Datos

In [None]:
import os

DATA_PATH = '../data/raw/isla_de_pascua'
CRS_UTM = 'EPSG:32719'

# Cargar datos
boundary = gpd.read_file(os.path.join(DATA_PATH, 'isla_de_pascua_boundary.geojson')).to_crs(CRS_UTM)
buildings = gpd.read_file(os.path.join(DATA_PATH, 'isla_de_pascua_buildings.geojson')).to_crs(CRS_UTM)
amenities = gpd.read_file(os.path.join(DATA_PATH, 'isla_de_pascua_amenities.geojson')).to_crs(CRS_UTM)
streets = gpd.read_file(os.path.join(DATA_PATH, 'isla_de_pascua_streets.geojson')).to_crs(CRS_UTM)

print(f"Datos cargados:")
print(f"   - Edificios: {len(buildings)}")
print(f"   - Amenidades: {len(amenities)}")
print(f"   - Calles: {len(streets)}")

## 2. Feature Engineering Espacial

In [None]:
# Crear dataset de análisis basado en grilla
minx, miny, maxx, maxy = boundary.total_bounds
cell_size = 200  # metros

# Generar grilla
grid_cells = []
x = minx
while x < maxx:
    y = miny
    while y < maxy:
        grid_cells.append(box(x, y, x + cell_size, y + cell_size))
        y += cell_size
    x += cell_size

grid = gpd.GeoDataFrame(geometry=grid_cells, crs=CRS_UTM)
grid = grid[grid.intersects(boundary.unary_union)].reset_index(drop=True)
grid['cell_id'] = range(len(grid))

print(f"Grilla creada: {len(grid)} celdas")

In [None]:
# Feature 1: Número de edificios
grid['n_buildings'] = 0
grid['total_building_area'] = 0.0
grid['avg_building_area'] = 0.0

buildings['area_m2'] = buildings.geometry.area
buildings['centroid'] = buildings.geometry.centroid

for idx, cell in grid.iterrows():
    buildings_in_cell = buildings[buildings['centroid'].within(cell.geometry)]
    n = len(buildings_in_cell)
    grid.loc[idx, 'n_buildings'] = n
    if n > 0:
        grid.loc[idx, 'total_building_area'] = buildings_in_cell['area_m2'].sum()
        grid.loc[idx, 'avg_building_area'] = buildings_in_cell['area_m2'].mean()

print("Features de edificios calculados")

In [None]:
# Feature 2: Amenidades
amenities_point = amenities[amenities.geometry.geom_type == 'Point'].copy()

grid['n_amenities'] = 0

for idx, cell in grid.iterrows():
    n = len(amenities_point[amenities_point.geometry.within(cell.geometry)])
    grid.loc[idx, 'n_amenities'] = n

# Distancia al centro de la isla (proxy de centralidad)
island_center = boundary.geometry.centroid.values[0]
grid['centroid'] = grid.geometry.centroid
grid['dist_to_center'] = grid['centroid'].distance(island_center)

print("Features de amenidades y centralidad calculados")

In [None]:
# Feature 3: Densidad de calles (longitud total en la celda)
grid['street_length'] = 0.0

for idx, cell in grid.iterrows():
    streets_in_cell = streets[streets.geometry.intersects(cell.geometry)]
    if len(streets_in_cell) > 0:
        # Clipear y sumar longitudes
        clipped = streets_in_cell.geometry.intersection(cell.geometry)
        total_length = clipped.length.sum()
        grid.loc[idx, 'street_length'] = total_length

print("Features de calles calculados")

In [None]:
# Feature 4: Vecinos (spatial lag)
# Calcular número de edificios en celdas vecinas

grid['neighbor_buildings'] = 0.0

for idx, cell in grid.iterrows():
    # Encontrar celdas vecinas (que tocan esta celda)
    neighbors = grid[grid.geometry.touches(cell.geometry)]
    if len(neighbors) > 0:
        grid.loc[idx, 'neighbor_buildings'] = neighbors['n_buildings'].mean()

print("Features de vecindario calculados")

In [None]:
# Feature 5: Coordenadas (para capturar tendencias espaciales)
grid['x_coord'] = grid.geometry.centroid.x
grid['y_coord'] = grid.geometry.centroid.y

# Normalizar coordenadas
grid['x_norm'] = (grid['x_coord'] - grid['x_coord'].min()) / (grid['x_coord'].max() - grid['x_coord'].min())
grid['y_norm'] = (grid['y_coord'] - grid['y_coord'].min()) / (grid['y_coord'].max() - grid['y_coord'].min())

print("Coordenadas normalizadas")

In [None]:
# Resumen del dataset
feature_cols = ['n_buildings', 'total_building_area', 'avg_building_area', 
                'n_amenities', 'dist_to_center', 'street_length', 
                'neighbor_buildings', 'x_norm', 'y_norm']

print("RESUMEN DE FEATURES")
print("="*60)
print(grid[feature_cols].describe().round(2))

In [None]:
# Visualizar correlaciones
fig, ax = plt.subplots(figsize=(10, 8))
corr = grid[feature_cols].corr()
sns.heatmap(corr, annot=True, cmap='RdBu_r', center=0, ax=ax, fmt='.2f')
ax.set_title('Matriz de Correlación de Features Espaciales', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('../outputs/13_feature_correlation.png', dpi=150, bbox_inches='tight')
plt.show()

## 3. Modelo de Clasificación: Zonas de Alta/Baja Densidad

In [None]:
# Crear variable objetivo: clasificación de densidad
grid['density_class'] = pd.cut(
    grid['n_buildings'],
    bins=[-1, 0, 5, 20, float('inf')],
    labels=['Sin edificios', 'Baja', 'Media', 'Alta']
)

print("Distribución de clases:")
print(grid['density_class'].value_counts())

In [None]:
# Preparar datos para clasificación
# Excluir celdas sin edificios para clasificación binaria
grid_with_buildings = grid[grid['n_buildings'] > 0].copy()

# Variable binaria: Alta densidad (>10 edificios) vs resto
grid_with_buildings['is_high_density'] = (grid_with_buildings['n_buildings'] > 10).astype(int)

print(f"\nCeldas con edificios: {len(grid_with_buildings)}")
print(f"Alta densidad: {grid_with_buildings['is_high_density'].sum()}")
print(f"Baja/media densidad: {(~grid_with_buildings['is_high_density'].astype(bool)).sum()}")

In [None]:
# Definir features y target
X_cols = ['n_amenities', 'dist_to_center', 'street_length', 
          'neighbor_buildings', 'x_norm', 'y_norm']

X = grid_with_buildings[X_cols].fillna(0)
y = grid_with_buildings['is_high_density']

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Escalar features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Train: {len(X_train)} | Test: {len(X_test)}")

In [None]:
# Entrenar modelos
models = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
}

results = {}

for name, model in models.items():
    # Entrenar
    model.fit(X_train_scaled, y_train)
    
    # Predecir
    y_pred = model.predict(X_test_scaled)
    
    # Métricas
    acc = accuracy_score(y_test, y_pred)
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5)
    
    results[name] = {
        'accuracy': acc,
        'cv_mean': cv_scores.mean(),
        'cv_std': cv_scores.std(),
        'model': model
    }
    
    print(f"\n{name}:")
    print(f"   Accuracy: {acc:.4f}")
    print(f"   CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std()*2:.4f})")

In [None]:
# Feature Importance (Random Forest)
rf_model = results['Random Forest']['model']
importance = pd.DataFrame({
    'feature': X_cols,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=True)

fig, ax = plt.subplots(figsize=(10, 6))
importance.plot(kind='barh', x='feature', y='importance', ax=ax, color='steelblue', legend=False)
ax.set_xlabel('Importancia')
ax.set_ylabel('Feature')
ax.set_title('Importancia de Features - Random Forest Classifier', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('../outputs/14_feature_importance_clf.png', dpi=150, bbox_inches='tight')
plt.show()

## 4.1. Comparación con XGBoost

Comparamos Random Forest con XGBoost para regresión.

In [None]:
# Comparación con XGBoost
if XGBOOST_AVAILABLE:
    xgb_model = xgb.XGBRegressor(
        n_estimators=100,
        max_depth=6,
        learning_rate=0.1,
        random_state=42,
        verbosity=0
    )
    
    xgb_model.fit(X_train_r_scaled, y_train_r)
    y_pred_xgb = xgb_model.predict(X_test_r_scaled)
    
    xgb_rmse = np.sqrt(mean_squared_error(y_test_r, y_pred_xgb))
    xgb_r2 = r2_score(y_test_r, y_pred_xgb)
    
    print("XGBoost Regressor:")
    print(f"   RMSE: {xgb_rmse:.4f}")
    print(f"   R²: {xgb_r2:.4f}")
    
    # Agregar a resultados
    reg_results["XGBoost"] = {
        "rmse": xgb_rmse,
        "r2": xgb_r2,
        "model": xgb_model,
        "predictions": y_pred_xgb
    }
else:
    print("XGBoost no disponible - instalar con: pip install xgboost")

## 4. Modelo de Regresión: Predicción de Densidad

In [None]:
# Regresión para predecir número de edificios
X_reg = grid[X_cols].fillna(0)
y_reg = grid['n_buildings']

X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(
    X_reg, y_reg, test_size=0.3, random_state=42
)

scaler_r = StandardScaler()
X_train_r_scaled = scaler_r.fit_transform(X_train_r)
X_test_r_scaled = scaler_r.transform(X_test_r)

print(f"Train: {len(X_train_r)} | Test: {len(X_test_r)}")

In [None]:
# Modelos de regresión
reg_models = {
    'Linear Regression': LinearRegression(),
    'Random Forest Regressor': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42)
}

reg_results = {}

for name, model in reg_models.items():
    model.fit(X_train_r_scaled, y_train_r)
    y_pred_r = model.predict(X_test_r_scaled)
    
    mse = mean_squared_error(y_test_r, y_pred_r)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test_r, y_pred_r)
    r2 = r2_score(y_test_r, y_pred_r)
    
    reg_results[name] = {
        'rmse': rmse,
        'mae': mae,
        'r2': r2,
        'model': model,
        'predictions': y_pred_r
    }
    
    print(f"\n{name}:")
    print(f"   RMSE: {rmse:.4f}")
    print(f"   MAE: {mae:.4f}")
    print(f"   R²: {r2:.4f}")

In [None]:
# Visualizar predicciones vs valores reales
best_model_name = max(reg_results, key=lambda x: reg_results[x]['r2'])
best_preds = reg_results[best_model_name]['predictions']

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Scatter plot
ax1 = axes[0]
ax1.scatter(y_test_r, best_preds, alpha=0.5, s=20)
ax1.plot([0, y_test_r.max()], [0, y_test_r.max()], 'r--', linewidth=2, label='Línea perfecta')
ax1.set_xlabel('Valor Real')
ax1.set_ylabel('Predicción')
ax1.set_title(f'{best_model_name}\nR² = {reg_results[best_model_name]["r2"]:.4f}')
ax1.legend()

# Residuales
ax2 = axes[1]
residuals = y_test_r.values - best_preds
ax2.scatter(best_preds, residuals, alpha=0.5, s=20)
ax2.axhline(y=0, color='r', linestyle='--', linewidth=2)
ax2.set_xlabel('Predicción')
ax2.set_ylabel('Residuales')
ax2.set_title('Análisis de Residuales')

plt.tight_layout()
plt.savefig('../outputs/15_regression_results.png', dpi=150, bbox_inches='tight')
plt.show()

## 6.1. Validación Espacial con GroupKFold

Usando zonas geográficas como grupos para evitar data leakage espacial.

In [None]:
# Crear zonas geográficas para GroupKFold
grid["zone_id"] = pd.cut(
    grid["x_norm"] + grid["y_norm"],
    bins=5,
    labels=False
)

X_gkf = grid[X_cols].fillna(0)
y_gkf = grid["n_buildings"]
groups_gkf = grid["zone_id"]

# GroupKFold Cross-Validation
gkf = GroupKFold(n_splits=5)
rf_gkf = RandomForestRegressor(n_estimators=100, random_state=42)

gkf_scores = []
print("GroupKFold Spatial Cross-Validation:")
print("-" * 40)

for fold, (train_idx, test_idx) in enumerate(gkf.split(X_gkf, y_gkf, groups_gkf)):
    X_tr = X_gkf.iloc[train_idx]
    X_te = X_gkf.iloc[test_idx]
    y_tr = y_gkf.iloc[train_idx]
    y_te = y_gkf.iloc[test_idx]
    
    scaler_gkf = StandardScaler()
    X_tr_sc = scaler_gkf.fit_transform(X_tr)
    X_te_sc = scaler_gkf.transform(X_te)
    
    rf_gkf.fit(X_tr_sc, y_tr)
    y_pred_gkf = rf_gkf.predict(X_te_sc)
    
    r2_fold = r2_score(y_te, y_pred_gkf)
    gkf_scores.append(r2_fold)
    print(f"Fold {fold+1}: R² = {r2_fold:.4f}")

print("-" * 40)
print(f"Mean R²: {np.mean(gkf_scores):.4f} (+/- {np.std(gkf_scores)*2:.4f})")

## 5. Clustering Espacial

In [None]:
# K-Means clustering
X_cluster = grid[['x_norm', 'y_norm', 'n_buildings', 'n_amenities', 'street_length']].fillna(0)
X_cluster_scaled = StandardScaler().fit_transform(X_cluster)

# Encontrar número óptimo de clusters (Elbow method)
inertias = []
K_range = range(2, 10)

for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(X_cluster_scaled)
    inertias.append(kmeans.inertia_)

# Gráfico del codo
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(K_range, inertias, 'bo-')
ax.set_xlabel('Número de Clusters (K)')
ax.set_ylabel('Inercia')
ax.set_title('Método del Codo - K-Means')
plt.tight_layout()
plt.show()

In [None]:
# Aplicar K-Means con K óptimo
k_optimal = 4
kmeans = KMeans(n_clusters=k_optimal, random_state=42, n_init=10)
grid['cluster'] = kmeans.fit_predict(X_cluster_scaled)

print(f"Distribución de clusters:")
print(grid['cluster'].value_counts().sort_index())

## 7.1. Interpretabilidad con SHAP Values

Análisis de la importancia de features usando SHAP (SHapley Additive exPlanations).

In [None]:
# Interpretabilidad con SHAP
if SHAP_AVAILABLE:
    print("Calculando SHAP values...")
    
    # Usar el modelo Random Forest entrenado
    rf_for_shap = reg_results["Random Forest Regressor"]["model"]
    
    # Crear explainer
    explainer = shap.TreeExplainer(rf_for_shap)
    
    # Calcular SHAP values (usar muestra para velocidad)
    X_sample = X_reg.sample(min(100, len(X_reg)), random_state=42)
    X_sample_scaled = scaler_r.transform(X_sample)
    shap_values = explainer.shap_values(X_sample_scaled)
    
    # Summary plot
    fig, ax = plt.subplots(figsize=(10, 6))
    shap.summary_plot(shap_values, X_sample, feature_names=X_cols, show=False)
    plt.title("SHAP Summary Plot - Random Forest", fontsize=14, fontweight="bold")
    plt.tight_layout()
    plt.savefig("../outputs/18_shap_summary.png", dpi=150, bbox_inches="tight")
    plt.show()
    
    # Bar plot de importancia media
    fig, ax = plt.subplots(figsize=(10, 6))
    shap.summary_plot(shap_values, X_sample, feature_names=X_cols, plot_type="bar", show=False)
    plt.title("SHAP Feature Importance", fontsize=14, fontweight="bold")
    plt.tight_layout()
    plt.savefig("../outputs/19_shap_importance.png", dpi=150, bbox_inches="tight")
    plt.show()
    
    print("\nSHAP analysis completado!")
else:
    print("SHAP no disponible - instalar con: pip install shap")

In [None]:
# Mapa de clusters
fig, ax = plt.subplots(figsize=(14, 10))

# Límite
boundary.plot(ax=ax, facecolor='none', edgecolor='black', linewidth=2)

# Clusters
colors = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00']
for cluster_id in range(k_optimal):
    subset = grid[grid['cluster'] == cluster_id]
    subset.plot(ax=ax, color=colors[cluster_id], edgecolor='gray', 
                linewidth=0.5, alpha=0.7, label=f'Cluster {cluster_id}')

ax.legend(loc='lower right', title='Cluster')
ax.set_title('Clustering Espacial - K-Means\nIsla de Pascua', fontsize=14, fontweight='bold')
ax.set_axis_off()

plt.tight_layout()
plt.savefig('../outputs/16_spatial_clustering.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Perfil de clusters
cluster_profile = grid.groupby('cluster')[['n_buildings', 'n_amenities', 'street_length', 'dist_to_center']].mean()
cluster_profile.columns = ['Edificios (prom)', 'Amenidades (prom)', 'Long. Calles (m)', 'Dist. Centro (m)']

print("PERFIL DE CLUSTERS")
print("="*60)
print(cluster_profile.round(2))

## 6. Validación Espacial

In [None]:
# Spatial Cross-Validation usando los clusters como folds
from sklearn.model_selection import LeaveOneGroupOut

X_spatial = grid[X_cols].fillna(0)
y_spatial = grid['n_buildings']
groups = grid['cluster']

logo = LeaveOneGroupOut()
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)

spatial_scores = []

for fold, (train_idx, test_idx) in enumerate(logo.split(X_spatial, y_spatial, groups)):
    X_tr, X_te = X_spatial.iloc[train_idx], X_spatial.iloc[test_idx]
    y_tr, y_te = y_spatial.iloc[train_idx], y_spatial.iloc[test_idx]
    
    scaler_sp = StandardScaler()
    X_tr_scaled = scaler_sp.fit_transform(X_tr)
    X_te_scaled = scaler_sp.transform(X_te)
    
    rf_reg.fit(X_tr_scaled, y_tr)
    y_pred_sp = rf_reg.predict(X_te_scaled)
    
    r2_sp = r2_score(y_te, y_pred_sp)
    spatial_scores.append(r2_sp)
    print(f"Fold {fold} (Cluster {fold} como test): R² = {r2_sp:.4f}")

print(f"\nSpatial CV Score: {np.mean(spatial_scores):.4f} (+/- {np.std(spatial_scores)*2:.4f})")

## 7. Mapa de Predicciones

In [None]:
# Predecir para toda la grilla
best_reg_model = reg_results['Random Forest Regressor']['model']
X_all_scaled = scaler_r.transform(X_reg)
grid['predicted_buildings'] = best_reg_model.predict(X_all_scaled)
grid['prediction_error'] = grid['n_buildings'] - grid['predicted_buildings']

# Mapa de predicciones
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# Real
ax1 = axes[0]
boundary.plot(ax=ax1, facecolor='none', edgecolor='black', linewidth=2)
grid.plot(column='n_buildings', ax=ax1, cmap='YlOrRd', legend=True,
          legend_kwds={'label': 'N° Edificios'})
ax1.set_title('Valores Reales', fontsize=12, fontweight='bold')
ax1.set_axis_off()

# Predicho
ax2 = axes[1]
boundary.plot(ax=ax2, facecolor='none', edgecolor='black', linewidth=2)
grid.plot(column='predicted_buildings', ax=ax2, cmap='YlOrRd', legend=True,
          legend_kwds={'label': 'N° Predicho'})
ax2.set_title('Predicciones', fontsize=12, fontweight='bold')
ax2.set_axis_off()

# Error
ax3 = axes[2]
boundary.plot(ax=ax3, facecolor='none', edgecolor='black', linewidth=2)
grid.plot(column='prediction_error', ax=ax3, cmap='RdBu_r', legend=True,
          legend_kwds={'label': 'Error'})
ax3.set_title('Error (Real - Predicho)', fontsize=12, fontweight='bold')
ax3.set_axis_off()

plt.suptitle('Comparación: Valores Reales vs Predicciones\nRandom Forest Regressor', 
             fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('../outputs/17_prediction_maps.png', dpi=150, bbox_inches='tight')
plt.show()

## 8. Guardar Modelos y Resultados

In [None]:
import joblib

# Guardar modelos
os.makedirs('../outputs/models', exist_ok=True)

joblib.dump(best_reg_model, '../outputs/models/rf_regressor.joblib')
joblib.dump(scaler_r, '../outputs/models/scaler.joblib')
joblib.dump(kmeans, '../outputs/models/kmeans_clustering.joblib')

# Guardar grid con predicciones
grid.drop(columns=['centroid'], errors='ignore').to_file(
    '../outputs/grid_with_predictions.geojson', driver='GeoJSON'
)

print("Modelos y resultados guardados")

## 9. Resumen Final

In [None]:
print("\n" + "="*60)
print("RESUMEN DE MACHINE LEARNING ESPACIAL")
print("="*60)

print("\nFEATURES CREADOS:")
for col in X_cols:
    print(f"   • {col}")

print("\nCLASIFICACIÓN (Alta Densidad vs Resto):")
for name, res in results.items():
    print(f"   • {name}: Accuracy = {res['accuracy']:.4f}")

print("\nREGRESIÓN (Predicción N° Edificios):")
for name, res in reg_results.items():
    print(f"   • {name}: R² = {res['r2']:.4f}, RMSE = {res['rmse']:.2f}")

print("\nCLUSTERING:")
print(f"   • K-Means con K={k_optimal} clusters")
for cluster_id in range(k_optimal):
    n = (grid['cluster'] == cluster_id).sum()
    print(f"   • Cluster {cluster_id}: {n} celdas")

print("\nVALIDACIÓN ESPACIAL:")
print(f"   • Leave-One-Cluster-Out: R² = {np.mean(spatial_scores):.4f} (+/- {np.std(spatial_scores)*2:.4f})")

print("\nARCHIVOS GENERADOS:")
print("   • outputs/13_feature_correlation.png")
print("   • outputs/14_feature_importance_clf.png")
print("   • outputs/15_regression_results.png")
print("   • outputs/16_spatial_clustering.png")
print("   • outputs/17_prediction_maps.png")
print("   • outputs/models/rf_regressor.joblib")
print("   • outputs/models/scaler.joblib")
print("   • outputs/models/kmeans_clustering.joblib")
print("   • outputs/grid_with_predictions.geojson")

print("\nFase 3 completada!")