In [16]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pandas as pd
import numpy as np


## Carga de datos

In [4]:
df = pd.read_csv("../../data/transformed/dane/dane.csv")
df.columns

Index(['DPTO_CNMBR', 'MPIO_CNMBR', 'geometry', 'AFBMO_TOT', 'BLE_TOT',
       'BASS_TOT', 'SAS_TOT', 'HC_TOT', 'MIPE_TOT', 'MIP_TOT', 'SAFAM_TOT',
       'TRIML_TOT'],
      dtype='str')

## Modelo

In [None]:
# Selección de variables de vulnerabilidad
vars_vuln = [
    "AFBMO_TOT",
    "BLE_TOT",
    "BASS_TOT",
    "SAS_TOT",
    "HC_TOT",
    "MIPE_TOT",
    "MIP_TOT",
    "SAFAM_TOT",
    "TRIML_TOT"
]

# Estandarización de las variables
X = df[vars_vuln].copy()


In [None]:
# Estandarización de las variables
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Aplicación de PCA
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

# Varianza explicada por cada componente principal
explained = pd.DataFrame({
    "PC": [f"PC{i+1}" for i in range(len(pca.explained_variance_ratio_))],
    "var_exp": pca.explained_variance_ratio_,
    "var_acum": np.cumsum(pca.explained_variance_ratio_)
})

explained


Unnamed: 0,PC,var_exp,var_acum
0,PC1,0.41424,0.41424
1,PC2,0.174368,0.588608
2,PC3,0.109474,0.698082
3,PC4,0.096868,0.79495
4,PC5,0.061754,0.856704
5,PC6,0.056004,0.912708
6,PC7,0.037439,0.950147
7,PC8,0.029469,0.979616
8,PC9,0.020384,1.0


In [None]:
# Loadings de las variables en los componentes principales

loadings = pd.DataFrame(
    pca.components_.T,
    index=vars_vuln,
    columns=[f"PC{i+1}" for i in range(len(vars_vuln))]
)

loadings


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9
AFBMO_TOT,0.433474,-0.102187,-0.093831,-0.105899,0.115332,-0.468882,-0.48408,-0.346966,-0.440191
BLE_TOT,0.332872,-0.488037,-0.015694,0.245221,-0.073133,-0.190149,-0.240608,0.602029,0.358888
BASS_TOT,0.117343,0.072904,0.940777,0.227264,0.198671,-0.007249,-0.034363,-0.057594,-0.013622
SAS_TOT,0.214575,0.606089,-0.02423,0.272639,-0.460315,-0.028389,-0.02824,0.40391,-0.367501
HC_TOT,0.356059,0.380159,0.075439,-0.418906,-0.239005,-0.232505,0.037461,-0.181552,0.634019
MIPE_TOT,0.311477,0.359956,-0.232817,0.070914,0.79909,0.073999,0.133722,0.218645,0.066879
MIP_TOT,0.344528,-0.137344,0.120619,-0.552999,-0.055819,0.659473,-0.162431,0.186577,-0.207038
SAFAM_TOT,0.365745,-0.038274,-0.174389,0.56203,-0.141393,0.471234,-0.076036,-0.48206,0.195604
TRIML_TOT,0.406472,-0.290346,0.021121,-0.021797,-0.105894,-0.1632,0.808916,-0.052494,-0.233687


* AFBMO_TOT   → déficit de vivienda
* BLE_TOT     → bajo logro educativo
* BASS_TOT    → barreras de acceso a salud
* SAS_TOT     → saneamiento básico
* HC_TOT      → hacinamiento crítico
* MIPE_TOT    → material inadecuado paredes
* MIP_TOT     → material inadecuado pisos
* SAFAM_TOT   → dependencia económica
* TRIML_TOT   → trabajo infantil


Con el análisis municipal de vulnerabilidad, con base en los inidaciores del DANE, se aplicó un PCA con variables estandrizadas. 

Se encontró que el primer componente presentó cargas positivas en todas las variables, además de tener valores altos. Esto representa un gradiente multidimensional de la vulnerabilidad por municipio.

Se esta manera, el índice normalizado será el PC1 con valores entre 0 y 1.

In [20]:
# Cálculo del índice de vulnerabilidad
df["vuln_index_raw"] = X_pca[:, 0]

# Normalización del índice de vulnerabilidad entre 0 y 1
# Usando min-max scaling
# vuln_index = (x - min) / (max - min)

df["vuln_index"] = (
    df["vuln_index_raw"] - df["vuln_index_raw"].min()
) / (
    df["vuln_index_raw"].max() - df["vuln_index_raw"].min()
)

df.to_csv("../../data/processed/dane/dane.csv", index=False)