In [None]:
from criminologia_cdmx.etl import *
from criminologia_cdmx.patrones_espacio_temporales import *
from criminologia_cdmx.covariables import *
from criminologia_cdmx.modelos import *
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
import contextily as ctx

In [None]:
%load_ext autoreload
%autoreload 2

# Ejemplo de modelos

## Preparación de variable dependiente

En este ejemplo vamos trabajar con 'ROBO A CASA HABITACION SIN VIOLENCIA' agregado en colonias.

In [None]:
carpetas = get_carpetas_desde_archivo("datos/descargas/carpetas_fiscalia.csv")
carpetas = agrega_ids_espaciales(carpetas)
fecha_inicio = "01-01-2019"
fecha_fin = "01-01-2022"
delito = 'ROBO A CASA HABITACION SIN VIOLENCIA'
Y = variable_independiente(carpetas, 'delito', delito, fecha_inicio, fecha_fin)
Y

Unnamed: 0,colonia_cve,ROBO A CASA HABITACION SIN VIOLENCIA
0,32,0.0
1,619,12.0
2,1792,5.0
3,1774,0.0
4,583,20.0
...,...,...
1819,1731,42.0
1820,1801,0.0
1821,1799,7.0
1822,1800,15.0


## Preparación de covariables

Vamos a prepartar dos conjuntos de covariables, uno con variables de la población y las viviendas y otro en el que vamos a incluir variables de uso de suelo.

In [None]:
diccionario = get_diccionario_censo()
censo = get_variables_censo()
agregado = agrega_en_unidades(censo, diccionario)
agregado = censo_a_tasas(agregado, diccionario)
agregado

Unnamed: 0_level_0,POBTOT,POBFEM,POBMAS,P_0A2,P_0A2_F,P_0A2_M,P_3YMAS,P_3YMAS_F,P_3YMAS_M,P_5YMAS,...,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC,OCUPVIVPAR,PROM_OCUP_C
colonia_cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,5642.0,0.503545,0.496455,0.035271,0.015775,0.014711,0.959412,0.482630,0.476781,0.927331,...,0.527206,0.322794,0.142647,0.103676,0.016176,0.036765,0.435294,0.007353,5559.0,1.630683
1,7470.0,0.499063,0.500937,0.048059,0.020616,0.023025,0.949398,0.474297,0.475100,0.911111,...,0.505724,0.157790,0.076157,0.070184,0.013440,0.056247,0.453459,0.000000,7470.0,1.588686
2,7625.0,0.509377,0.490623,0.036197,0.014557,0.014820,0.959607,0.489180,0.470426,0.928656,...,0.623609,0.175617,0.134494,0.094340,0.007741,0.043058,0.330914,0.000000,7625.0,1.387878
3,1617.0,0.526283,0.473717,0.047001,0.024737,0.017316,0.951144,0.498454,0.452690,0.904143,...,0.546948,0.138498,0.068075,0.089202,0.000000,0.044601,0.389671,0.000000,1617.0,1.493075
4,4765.0,0.521511,0.478489,0.042183,0.017419,0.018888,0.955929,0.500735,0.455194,0.925289,...,0.580834,0.177030,0.091441,0.075347,0.010241,0.058522,0.383321,0.002195,4765.0,1.403121
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1820,6674.0,0.511687,0.488313,0.033263,0.012736,0.014384,0.962242,0.494156,0.468085,0.933174,...,0.689066,0.410023,0.150342,0.112187,0.009681,0.029613,0.266515,0.000000,6672.0,1.402860
1821,27259.0,0.514582,0.485418,0.034117,0.015738,0.015848,0.963755,0.496460,0.467295,0.935544,...,0.701059,0.298336,0.162330,0.114826,0.015431,0.030257,0.308775,0.000908,27257.0,1.442475
1822,1508.0,0.527851,0.472149,0.039788,0.019231,0.019231,0.960212,0.507294,0.452918,0.929045,...,1.058091,0.522822,0.340249,0.207469,0.033195,0.024896,0.414938,0.000000,1508.0,1.457005
1823,9496.0,0.510425,0.489575,0.040649,0.017376,0.021061,0.957772,0.491259,0.466512,0.923336,...,0.651829,0.239312,0.157338,0.099163,0.022477,0.042750,0.321287,0.005729,9496.0,1.554682


Seleccionamos variables de vivienda:

* Total de viviendas habitadas
* Viviendas con más de tres habitaciones

In [None]:
vars_viv = agregado[['VIVPAR_HAB', 'VPH_3YMASC']]
vars_viv

Unnamed: 0_level_0,VIVPAR_HAB,VPH_3YMASC
colonia_cve,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1360.0,0.653676
1,2009.0,0.549029
2,2067.0,0.758587
3,426.0,0.676056
4,1367.0,0.628383
...,...,...
1820,1756.0,0.801822
1821,6610.0,0.821936
1822,241.0,1.298755
1823,2269.0,0.759806


Construimos un índice para representar la Concentración de Desventajas

In [None]:
vars_indice = ['P5_HLI', 'POB_AFRO', 'PCON_DISC', 'P3A5_NOA', 
               'P6A11_NOA', 'P12A14NOA', 'P15YM_AN', 'PSINDER', 'PDESOCUP']
indice = IndicePCA(agregado, vars_indice)
indice.calcula_indice()
print(f'El porcentaje de la varianza explicada por el índice es {indice.varianza_explicada[0]}')

El porcentaje de la varianza explicada por el índice es 0.3139919934039488


Construimos un Dataframe con los datos de vivienda y el índice

In [None]:
var_m1 = (vars_viv
          .join(indice.indice.set_index('colonia_cve'))
          .rename({'Índice': 'Concentración de Desventajas'}, axis=1))
var_m1

Unnamed: 0_level_0,VIVPAR_HAB,VPH_3YMASC,Concentración de Desventajas
colonia_cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1360.0,0.653676,3.549108
1,2009.0,0.549029,3.638010
2,2067.0,0.758587,1.045746
3,426.0,0.676056,2.574354
4,1367.0,0.628383,2.640318
...,...,...,...
1820,1756.0,0.801822,0.232755
1821,6610.0,0.821936,1.529263
1822,241.0,1.298755,1.942715
1823,2269.0,0.759806,1.923533


### Uso de suelo

Ahora preparamos otro conjunto al que le vamos a pegar las variables de uso de suelo

In [None]:
usos = get_uso_de_suelo()
usos = agrega_uso_suelo(usos, unidades='colonias')
usos

Unnamed: 0_level_0,Industria,Comercio,Servicios,Intensidad,Entropía
colonia_cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,8,87,44,139,-4.072303
2,15,112,55,182,-3.803105
3,2,22,7,31,-4.161488
4,9,55,31,95,-3.661970
5,31,144,66,241,-3.514375
...,...,...,...,...,...
1820,45,255,213,513,-3.651524
1821,135,1074,733,1942,-3.852911
1822,1,18,12,31,-4.484460
1823,28,166,107,301,-3.644880


In [None]:
var_m2 = var_m1.join(usos)
var_m2

Unnamed: 0_level_0,VIVPAR_HAB,VPH_3YMASC,Concentración de Desventajas,Industria,Comercio,Servicios,Intensidad,Entropía
colonia_cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,1360.0,0.653676,3.549108,,,,,
1,2009.0,0.549029,3.638010,8.0,87.0,44.0,139.0,-4.072303
2,2067.0,0.758587,1.045746,15.0,112.0,55.0,182.0,-3.803105
3,426.0,0.676056,2.574354,2.0,22.0,7.0,31.0,-4.161488
4,1367.0,0.628383,2.640318,9.0,55.0,31.0,95.0,-3.661970
...,...,...,...,...,...,...,...,...
1820,1756.0,0.801822,0.232755,45.0,255.0,213.0,513.0,-3.651524
1821,6610.0,0.821936,1.529263,135.0,1074.0,733.0,1942.0,-3.852911
1822,241.0,1.298755,1.942715,1.0,18.0,12.0,31.0,-4.484460
1823,2269.0,0.759806,1.923533,28.0,166.0,107.0,301.0,-3.644880


## Ajuste de modelos

Creamos capas de análisis para ambos conjuntos de datos y ajustamos los modelos

In [None]:
ca1 = CapaDeAnalisis(Y, var_m1, 'colonia_cve')
ca2 = CapaDeAnalisis(Y, var_m2, 'colonia_cve')

In [None]:
m1 = ModeloGLM(ca1, sm.families.NegativeBinomial())
r1 = m1.fit()
m2 = ModeloGLM(ca2, sm.families.NegativeBinomial())
r2 = m2.fit()

In [None]:
r1.summary()

0,1,2,3
Dep. Variable:,Q('ROBO A CASA HABITACION SIN VIOLENCIA'),No. Observations:,1809.0
Model:,GLM,Df Residuals:,1805.0
Model Family:,NegativeBinomial,Df Model:,3.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-4885.6
Date:,"Wed, 30 Mar 2022",Deviance:,1249.0
Time:,00:40:31,Pearson chi2:,1010.0
No. Iterations:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.3299,0.148,-2.230,0.026,-0.620,-0.040
Q('VIVPAR_HAB'),0.0007,2.07e-05,33.445,0.000,0.001,0.001
Q('VPH_3YMASC'),1.0757,0.146,7.353,0.000,0.789,1.362
Q('Concentración de Desventajas'),-0.0229,0.027,-0.859,0.391,-0.075,0.029


In [None]:
r2.summary()

0,1,2,3
Dep. Variable:,Q('ROBO A CASA HABITACION SIN VIOLENCIA'),No. Observations:,1623.0
Model:,GLM,Df Residuals:,1615.0
Model Family:,NegativeBinomial,Df Model:,7.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-4542.9
Date:,"Wed, 30 Mar 2022",Deviance:,907.37
Time:,00:40:40,Pearson chi2:,697.0
No. Iterations:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.8717,0.226,-3.859,0.000,-1.314,-0.429
Q('VIVPAR_HAB'),0.0005,2.7e-05,19.733,0.000,0.000,0.001
Q('VPH_3YMASC'),0.6635,0.166,3.988,0.000,0.337,0.990
Q('Concentración de Desventajas'),-0.0291,0.028,-1.028,0.304,-0.085,0.026
Q('Industria'),0.0015,0.001,1.549,0.121,-0.000,0.003
Q('Comercio'),-0.0011,0.000,-3.209,0.001,-0.002,-0.000
Q('Servicios'),0.0004,0.000,0.906,0.365,-0.000,0.001
Q('Intensidad'),0.0007,0.000,2.675,0.007,0.000,0.001
Q('Entropía'),-0.2750,0.048,-5.781,0.000,-0.368,-0.182
