In [None]:
# 1. Setup y carga librerías
import os, pandas as pd, geopandas as gpd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
from pathlib import Path
from sqlalchemy import create_engine
from dotenv import load_dotenv
load_dotenv()
sns.set_palette('viridis')
POSTGRES_USER=os.getenv('POSTGRES_USER'); POSTGRES_PASSWORD=os.getenv('POSTGRES_PASSWORD')
POSTGRES_HOST=os.getenv('POSTGRES_HOST','localhost'); POSTGRES_PORT=os.getenv('POSTGRES_PORT','5432'); POSTGRES_DB=os.getenv('POSTGRES_DB')
engine = create_engine(f'postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}')
print('Conexión lista')

In [None]:
# 2. Carga de datasets clave
def load_gdf(schema, table):
    try:
        return gpd.read_postgis(f'SELECT * FROM {schema}.{table}', engine, geom_col='geometry')
    except Exception as e:
        print(f'Error cargando {schema}.{table}:', e)
        return None
manz_attr = load_gdf('processed_data','manzanas_atributos')
uso = load_gdf('processed_data','manzanas_uso_suelo')
net_nodes = load_gdf('processed_data','network_nodes_metrics')
# Métricas tabulares (sin geometría)
metrics = pd.read_sql('SELECT * FROM processed_data.metrics_manzanas', engine)
print('Shapes:', len(manz_attr) if manz_attr is not None else None, len(metrics))

In [None]:
# 3. Unificación de atributos clave por manzana
# Selección mínima de columnas relevantes para exploración
merged = metrics.copy()
# Asegurar alias clave
key = 'manzent' if 'manzent' in merged.columns else ('MANZENT' if 'MANZENT' in merged.columns else merged.columns[0])
if manz_attr is not None:
    geo_subset = manz_attr[[c for c in manz_attr.columns if c in [key,'geometry']]]
    geo_subset[key] = geo_subset[key].astype(str)
    merged[key] = merged[key].astype(str)
    merged_geo = geo_subset.merge(merged, on=key, how='left')
else:
    merged_geo = None
print('Columnas merged:', merged.columns.tolist()[:15])

In [None]:
# 4. Estadísticas descriptivas
desc_cols = [c for c in merged.columns if merged[c].dtype!='object' and c not in ['geometry']]
stats = merged[desc_cols].describe().T
stats[['mean','std','min','max']].head(10)

In [None]:
# 5. Correlaciones básicas entre variables numéricas seleccionadas
sel = [col for col in ['edificios_count','amenidades_count','area_m2','ndvi_mean','road_length_m','road_density_m_per_km2'] if col in merged.columns]
corr = merged[sel].corr()
plt.figure(figsize=(6,5))
sns.heatmap(corr, annot=True, cmap='magma', fmt='.2f')
plt.title('Matriz de correlación variables clave')
plt.tight_layout()
plt.show()

In [None]:
# 6. Mapas temáticos: NDVI y densidad vial
if merged_geo is not None:
    fig, axes = plt.subplots(1,2, figsize=(12,6))
    if 'ndvi_mean' in merged_geo.columns:
        merged_geo.plot(column='ndvi_mean', ax=axes[0], cmap='YlGn', legend=True, missing_kwds={'color':'lightgrey'})
        axes[0].set_title('NDVI medio por manzana')
    if 'road_density_m_per_km2' in merged_geo.columns:
        merged_geo.plot(column='road_density_m_per_km2', ax=axes[1], cmap='OrRd', legend=True, missing_kwds={'color':'lightgrey'})
        axes[1].set_title('Densidad vial (m/km²)')
    for ax in axes: ax.axis('off')
    plt.tight_layout(); plt.show()
else:
    print('No hay geometría de manzanas para mapear.')

In [None]:
# 7. Distribución de NDVI y edificios (histogramas)
plot_vars = [v for v in ['ndvi_mean','edificios_count'] if v in merged.columns]
for v in plot_vars:
    plt.figure(figsize=(5,3))
    sns.histplot(merged[v], kde=True, bins=30, color='steelblue')
    plt.title(f'Distribución {v}')
    plt.tight_layout()
    plt.show()

In [None]:
# 8. Preparación para pesos espaciales (placeholder)
# Se requiere pysal para Moran. Creamos gdf limpio y chequeamos geometrías válidas.
try:
    import pysal
    from pysal.lib import weights
    from pysal.explore import esda
    if merged_geo is not None:
        gdf_valid = merged_geo[merged_geo.geometry.notnull()].copy()
        gdf_valid = gdf_valid[gdf_valid.geometry.is_valid]
        # Queen contiguity (puede ser lento si muchas features)
        w = weights.Queen.from_dataframe(gdf_valid)
        w.transform = 'r'
        if 'ndvi_mean' in gdf_valid.columns:
            mi = esda.Moran(gdf_valid['ndvi_mean'].fillna(gdf_valid['ndvi_mean'].mean()), w)
            print(f
        else:
            print('NDVI no disponible para Moran provisional.')
    else:
        print('No se pudo crear pesos: geometría ausente.')
except Exception as e:
    print('Placeholder Moran - instale pysal/esda si falta. Error:', e)

### Nota Moran/LISA
Para análisis completo: separar en celdas dedicadas, crear scatterplot de Moran y mapa LISA (splot). Esto es solo verificación preliminar.

In [None]:
# 9. Guardar subset para futuros análisis
out_path = Path('../data/processed/esda_subset.geojson')
if merged_geo is not None:
    try:
        cols_keep = [c for c in merged_geo.columns if c in ['manzent','ndvi_mean','edificios_count','amenidades_count','road_density_m_per_km2']] + ['geometry']
        subset = merged_geo[cols_keep]
        subset.to_file(out_path, driver='GeoJSON')
        print('Guardado subset ESDA en', out_path)
    except Exception as e:
        print('Error guardando subset ESDA:', e)

## Checklist Exploratorio
- [x] Datasets cargados
- [x] Estadísticas descriptivas calculadas
- [x] Mapas temáticos básicos generados
- [x] Correlaciones iniciales
- [x] Placeholder Moran
## Próximos pasos ESDA
1. Refinar selección de variables socioeconómicas.
2. Calcular Moran Local (LISA) y clasificar clusters.
3. Incorporar visualizaciones avanzadas (scatterplots espaciales).
4. Derivar indicadores per cápita si se agregan datos poblacionales.