# Data Lesiones Homicidios

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from datetime import datetime
import os
from unidecode import unidecode

In [2]:
files_1 = ['./data/homicidios/' + str(i) + '.xlsx' for i in range(2010, 2013)]
files_2 = ['./data/homicidios/' + str(i) + '.xls' for i in range(2013, 2020)]
files = files_1+files_2
print(files)

['./data/homicidios/2010.xlsx', './data/homicidios/2011.xlsx', './data/homicidios/2012.xlsx', './data/homicidios/2013.xls', './data/homicidios/2014.xls', './data/homicidios/2015.xls', './data/homicidios/2016.xls', './data/homicidios/2017.xls', './data/homicidios/2018.xls', './data/homicidios/2019.xls']


In [3]:
all_dfs = []

for file in files:
    temp = pd.read_excel(file)
    
    # Eliminar acentos en el nombre de las columnas
    temp.columns = [unidecode(str(col)) for col in temp.columns]

    # Reemplazar espacios por _
    temp.columns = temp.columns.str.replace(' ', '_')
    
    # Strip
    temp.columns = temp.columns.str.strip()

    # Reemplazar espacios por _
    temp.columns = temp.columns.str.lower()

    # Reemplazar NaN, None
    temp.replace('-', np.nan, inplace=True)

    # Set axis name
    temp.rename_axis('id', inplace=True)
    
    all_dfs.append(temp)

In [4]:
renaming={
    'clase_sitio':'clase_de_sitio',
    'pais_nace':'pais_de_nacimiento',
    'clase_empleado':'clase_de_empleado'
}

for i in range(len(all_dfs)):
    all_dfs[i].rename(columns=renaming, inplace=True)
    
df_f = pd.concat(all_dfs, axis=0)

In [5]:
df_f # also try: .describe

Unnamed: 0_level_0,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,movil_agresor,...,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,2010,2011,2012,profesion,cantidad
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2010-01-01 00:00:00,ANTIOQUIA,AMAGÁ,Viernes,14:00:00,MALABRIGO PARTE ALTA,RURAL,FINCAS Y SIMILARES,ARMA BLANCA,A PIE,...,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,PRIMARIA,5030000.0,1.0,,,,
1,2010-01-01 00:00:00,ANTIOQUIA,COPACABANA,Viernes,08:30:00,LA MISERICORDIA,URBANA,FRENTE A RESIDENCIAS - VIA PUBLICA,ARMA DE FUEGO,A PIE,...,COLOMBIA,INDEPENDIENTE,NO REPORTADO,SECUNDARIA,5212000.0,1.0,,,,
2,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,02:00:00,LAS GRANJAS C-3,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,COLOMBIA,DESEMPLEADO,NO REPORTADO,SECUNDARIA,5001000.0,1.0,,,,
3,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,A PIE,...,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,5001000.0,1.0,,,,
4,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,A PIE,...,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,5001000.0,1.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12512,2019-12-31 00:00:00,VALLE,TULUÁ,Martes,23:50:00,Villa Liliana,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,BICICLETA,...,COLOMBIA,EMPLEADO PARTICULAR,,SECUNDARIA,76834000.0,,,,,1.0
12513,,,,,,,,,,,...,,,,,,,,,,12558.0
12514,FUENTE: DIJIN-POLICÍA NACI...,,,,,,,,,,...,,,,,,,,,,
12515,Elaboró: PT. Andrés Felipe Taborda,,,,,,,,,,...,,,,,,,,,,


In [6]:
df_f.isnull().sum()

fecha                      5
departamento              40
municipio                 40
dia                       40
hora                      40
barrio                  5119
zona                      40
clase_de_sitio            40
arma_empleada             51
movil_agresor            712
movil_victima            522
edad                      84
sexo                      40
estado_civil             688
pais_de_nacimiento      1678
clase_de_empleado         40
profesiones            89689
escolaridad              868
codigo_dane               40
2010                  121392
2011                  120606
2012                  120374
profesion             131991
cantidad               46687
dtype: int64

In [7]:
df_f.drop(columns=['2010','2011','2012','profesion'], inplace=True)

In [8]:
df_f

Unnamed: 0_level_0,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,movil_agresor,movil_victima,edad,sexo,estado_civil,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,cantidad
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,2010-01-01 00:00:00,ANTIOQUIA,AMAGÁ,Viernes,14:00:00,MALABRIGO PARTE ALTA,RURAL,FINCAS Y SIMILARES,ARMA BLANCA,A PIE,A PIE,49,MASCULINO,CASADO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,PRIMARIA,5030000.0,
1,2010-01-01 00:00:00,ANTIOQUIA,COPACABANA,Viernes,08:30:00,LA MISERICORDIA,URBANA,FRENTE A RESIDENCIAS - VIA PUBLICA,ARMA DE FUEGO,A PIE,A PIE,32,MASCULINO,UNION LIBRE,COLOMBIA,INDEPENDIENTE,NO REPORTADO,SECUNDARIA,5212000.0,
2,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,02:00:00,LAS GRANJAS C-3,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,A PIE,33,MASCULINO,UNION LIBRE,COLOMBIA,DESEMPLEADO,NO REPORTADO,SECUNDARIA,5001000.0,
3,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,A PIE,A PIE,23,MASCULINO,SOLTERO,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,5001000.0,
4,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,A PIE,A PIE,23,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,5001000.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12512,2019-12-31 00:00:00,VALLE,TULUÁ,Martes,23:50:00,Villa Liliana,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,BICICLETA,A PIE,19,MASCULINO,UNION LIBRE,COLOMBIA,EMPLEADO PARTICULAR,,SECUNDARIA,76834000.0,1.0
12513,,,,,,,,,,,,,,,,,,,,12558.0
12514,FUENTE: DIJIN-POLICÍA NACI...,,,,,,,,,,,,,,,,,,,
12515,Elaboró: PT. Andrés Felipe Taborda,,,,,,,,,,,,,,,,,,,


In [9]:
df_f = df_f.reset_index()

In [10]:
df_f

Unnamed: 0,id,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,...,movil_victima,edad,sexo,estado_civil,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,cantidad
0,0,2010-01-01 00:00:00,ANTIOQUIA,AMAGÁ,Viernes,14:00:00,MALABRIGO PARTE ALTA,RURAL,FINCAS Y SIMILARES,ARMA BLANCA,...,A PIE,49,MASCULINO,CASADO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,PRIMARIA,5030000.0,
1,1,2010-01-01 00:00:00,ANTIOQUIA,COPACABANA,Viernes,08:30:00,LA MISERICORDIA,URBANA,FRENTE A RESIDENCIAS - VIA PUBLICA,ARMA DE FUEGO,...,A PIE,32,MASCULINO,UNION LIBRE,COLOMBIA,INDEPENDIENTE,NO REPORTADO,SECUNDARIA,5212000.0,
2,2,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,02:00:00,LAS GRANJAS C-3,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,...,A PIE,33,MASCULINO,UNION LIBRE,COLOMBIA,DESEMPLEADO,NO REPORTADO,SECUNDARIA,5001000.0,
3,3,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,...,A PIE,23,MASCULINO,SOLTERO,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,5001000.0,
4,4,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,...,A PIE,23,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,5001000.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136338,12512,2019-12-31 00:00:00,VALLE,TULUÁ,Martes,23:50:00,Villa Liliana,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,...,A PIE,19,MASCULINO,UNION LIBRE,COLOMBIA,EMPLEADO PARTICULAR,,SECUNDARIA,76834000.0,1.0
136339,12513,,,,,,,,,,...,,,,,,,,,,12558.0
136340,12514,FUENTE: DIJIN-POLICÍA NACI...,,,,,,,,,...,,,,,,,,,,
136341,12515,Elaboró: PT. Andrés Felipe Taborda,,,,,,,,,...,,,,,,,,,,


In [11]:
df_f.drop(columns=['id'], inplace=True)

In [12]:
df_f

Unnamed: 0,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,movil_agresor,movil_victima,edad,sexo,estado_civil,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,cantidad
0,2010-01-01 00:00:00,ANTIOQUIA,AMAGÁ,Viernes,14:00:00,MALABRIGO PARTE ALTA,RURAL,FINCAS Y SIMILARES,ARMA BLANCA,A PIE,A PIE,49,MASCULINO,CASADO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,PRIMARIA,5030000.0,
1,2010-01-01 00:00:00,ANTIOQUIA,COPACABANA,Viernes,08:30:00,LA MISERICORDIA,URBANA,FRENTE A RESIDENCIAS - VIA PUBLICA,ARMA DE FUEGO,A PIE,A PIE,32,MASCULINO,UNION LIBRE,COLOMBIA,INDEPENDIENTE,NO REPORTADO,SECUNDARIA,5212000.0,
2,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,02:00:00,LAS GRANJAS C-3,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,A PIE,33,MASCULINO,UNION LIBRE,COLOMBIA,DESEMPLEADO,NO REPORTADO,SECUNDARIA,5001000.0,
3,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,A PIE,A PIE,23,MASCULINO,SOLTERO,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,5001000.0,
4,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,A PIE,A PIE,23,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,5001000.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136338,2019-12-31 00:00:00,VALLE,TULUÁ,Martes,23:50:00,Villa Liliana,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,BICICLETA,A PIE,19,MASCULINO,UNION LIBRE,COLOMBIA,EMPLEADO PARTICULAR,,SECUNDARIA,76834000.0,1.0
136339,,,,,,,,,,,,,,,,,,,,12558.0
136340,FUENTE: DIJIN-POLICÍA NACI...,,,,,,,,,,,,,,,,,,,
136341,Elaboró: PT. Andrés Felipe Taborda,,,,,,,,,,,,,,,,,,,


In [13]:
df_f = df_f[df_f.isnull().sum(axis=1) < 5]

In [14]:
df_f

Unnamed: 0,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,movil_agresor,movil_victima,edad,sexo,estado_civil,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,cantidad
0,2010-01-01 00:00:00,ANTIOQUIA,AMAGÁ,Viernes,14:00:00,MALABRIGO PARTE ALTA,RURAL,FINCAS Y SIMILARES,ARMA BLANCA,A PIE,A PIE,49,MASCULINO,CASADO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,PRIMARIA,5030000.0,
1,2010-01-01 00:00:00,ANTIOQUIA,COPACABANA,Viernes,08:30:00,LA MISERICORDIA,URBANA,FRENTE A RESIDENCIAS - VIA PUBLICA,ARMA DE FUEGO,A PIE,A PIE,32,MASCULINO,UNION LIBRE,COLOMBIA,INDEPENDIENTE,NO REPORTADO,SECUNDARIA,5212000.0,
2,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,02:00:00,LAS GRANJAS C-3,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,A PIE,33,MASCULINO,UNION LIBRE,COLOMBIA,DESEMPLEADO,NO REPORTADO,SECUNDARIA,5001000.0,
3,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,A PIE,A PIE,23,MASCULINO,SOLTERO,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,5001000.0,
4,2010-01-01 00:00:00,ANTIOQUIA,MEDELLÍN (CT),Viernes,11:10:00,ALEJANDRO ECHAVARRIA C-9,URBANA,ESCENARIOS DEPORTIVOS,ARMA DE FUEGO,A PIE,A PIE,23,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,5001000.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136334,2019-12-31 00:00:00,VALLE,CAICEDONIA,Martes,05:05:00,VALLE DEL CAUCA,URBANA,VIAS PUBLICAS,ARMA BLANCA / CORTOPUNZANTE,A PIE,A PIE,42,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,,PRIMARIA,76122000.0,1.0
136335,2019-12-31 00:00:00,VALLE,CALI (CT),Martes,20:50:00,VILLA DEL PRADO - EL GUABITO E5,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,A PIE,36,MASCULINO,SOLTERO,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,76001000.0,1.0
136336,2019-12-31 00:00:00,VALLE,CALI (CT),Martes,00:20:00,CIUDADELA FLORALIA E6,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,A PIE,22,MASCULINO,SOLTERO,COLOMBIA,DESEMPLEADO,,PRIMARIA,76001000.0,1.0
136337,2019-12-31 00:00:00,VALLE,CALI (CT),Martes,21:16:00,MARIO CORREA RENGIFO E18,URBANA,PELUQUERIA Y SIMILARES,ARMA DE FUEGO,CONDUCTOR MOTOCICLETA,A PIE,25,MASCULINO,SOLTERO,COLOMBIA,INDEPENDIENTE,,PRIMARIA,76001000.0,1.0


In [15]:
df_f.isnull().sum()

fecha                     0
departamento              0
municipio                 0
dia                       0
hora                      0
barrio                 5067
zona                      0
clase_de_sitio            0
arma_empleada             9
movil_agresor           627
movil_victima           453
edad                     33
sexo                      0
estado_civil            585
pais_de_nacimiento     1577
clase_de_empleado         0
profesiones           89584
escolaridad             768
codigo_dane               0
cantidad              46654
dtype: int64

In [16]:
df_f.hora = pd.to_datetime(df_f.hora, format='%H:%M:%S')
df_f['year'] = df_f.fecha.dt.year
df_f['hora_int'] = df_f.hora.dt.hour

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [17]:
df_f.edad.replace('NO REPORTADO', np.nan, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [18]:
df_f.edad = df_f.edad.astype(float)

In [19]:
df_f.municipio.replace(' \(CT\)', '', regex=True, inplace=True)
df_f.municipio.replace('BOGOTÁ', 'BOGOTA D.C.', inplace=True)
df_f.municipio.replace('BOGOTÁ D.C.', 'BOGOTA D.C.', inplace=True)
df_f.departamento.replace('BOGOTÁ', 'BOGOTA D.C.', inplace=True)
df_f.departamento.replace('GUAJIRA', 'LA GUAJIRA', inplace=True)
df_f.departamento.replace('VALLE', 'VALLE DEL CAUCA', inplace=True)
df_f.departamento.replace('NARIÑO', 'NARINO', inplace=True)

In [20]:
for col in df_f.columns:
    print(col)
    print(df_f[col].unique())

fecha
[datetime.datetime(2010, 1, 1, 0, 0) datetime.datetime(2010, 1, 2, 0, 0)
 datetime.datetime(2010, 1, 3, 0, 0) ...
 datetime.datetime(2019, 12, 29, 0, 0)
 datetime.datetime(2019, 12, 30, 0, 0)
 datetime.datetime(2019, 12, 31, 0, 0)]
departamento
['ANTIOQUIA' 'ATLÁNTICO' 'BOLÍVAR' 'CALDAS' 'CAQUETÁ' 'CAUCA' 'CESAR'
 'CUNDINAMARCA' 'LA GUAJIRA' 'GUAVIARE' 'HUILA' 'NARINO' 'QUINDÍO'
 'RISARALDA' 'SANTANDER' 'SUCRE' 'TOLIMA' 'VALLE DEL CAUCA' 'CASANARE'
 'CHOCÓ' 'META' 'PUTUMAYO' 'ARAUCA' 'CÓRDOBA' 'NORTE DE SANTANDER'
 'BOYACÁ' 'MAGDALENA' 'VICHADA' 'SAN ANDRÉS' 'AMAZONAS' 'GUAINÍA' 'VAUPÉS']
municipio
['AMAGÁ' 'COPACABANA' 'MEDELLÍN' 'RIONEGRO' 'BARRANQUILLA' 'MALAMBO'
 'CARTAGENA' 'MAGANGUÉ' 'MARÍA LA BAJA' 'MOMPÓS' 'MANIZALES' 'SAN JOSÉ'
 'CARTAGENA DEL CHAIRÁ' 'CURILLO' 'FLORENCIA' 'ARGELIA' 'BALBOA' 'CAJIBÍO'
 'POPAYÁN' 'SANTANDER DE QUILICHAO' 'PAILITAS' 'BOGOTA D.C.' 'GUADUAS'
 'RIOHACHA' 'CALAMAR' 'ACEVEDO' 'NEIVA' 'LEIVA' 'PASTO'
 'SAN ANDRES DE TUMACO' 'TAMINANGO' 'ARMENIA'

## Solo Bogotá:

In [21]:
df_f = df_f[df_f['municipio'] == 'BOGOTA D.C.']

In [22]:
df_f

Unnamed: 0,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,movil_agresor,...,sexo,estado_civil,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,cantidad,year,hora_int
29,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 00:10:00,SANTA CECILIA I E-11,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,0
30,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 01:27:00,VILLA GLORIA E-19,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,1
31,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 02:22:00,LA FAVORITA E-14,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,SOLTERO,ECUADOR,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,2
32,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 03:17:00,QUIRIGUA I E-10,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,11001000.0,,2010,3
33,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 03:56:00,SANTA ROSA SUR E-19,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,FEMENINO,SOLTERO,COLOMBIA,INDEPENDIENTE,NO REPORTADO,SECUNDARIA,11001000.0,,2010,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136284,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 16:46:00,EL PARAISO E-19,URBANA,VIAS PUBLICAS,ARMA BLANCA / CORTOPUNZANTE,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,16
136285,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 18:36:00,SANTA FE E-14,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,18
136310,2019-12-31 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Martes,1900-01-01 22:00:00,LA FAVORITA E-14,URBANA,VIAS PUBLICAS,ARMA BLANCA / CORTOPUNZANTE,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,22
136311,2019-12-31 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Martes,1900-01-01 23:45:00,FONTANAR DEL RIO E-11,URBANA,VIAS PUBLICAS,ARMA BLANCA / CORTOPUNZANTE,A PIE,...,MASCULINO,UNION LIBRE,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,23


# EDA

In [23]:
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['xtick.labelsize'] = 14
# mpl.rcParams.keys()
sns.set_palette('Accent')

In [24]:
df_f.barrio.unique()

array(['SANTA CECILIA I E-11', 'VILLA GLORIA E-19', 'LA FAVORITA E-14',
       ..., 'AVORIAZ E-1', 'LAS ORQUIDEAS E-1', 'FONTANAR DEL RIO E-11'],
      dtype=object)

In [25]:
df_f.arma_empleada.replace('ARMA BLANCA / CORTOPUNZANTE', 'ARMA BLANCA', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


## Maps of counts

In [26]:
import json

with open('./data/geojson_departamentos.json', 'r') as f:
    departamentos = json.load(f)
    
areas = pd.read_json('./data/areas_departamentos.json',
                     orient='index')
# with open('./data/geoJson_departamentos.json', 'w') as f:
#     json.dump(departamentos, f)

# with open('./data/areas_departamentos.json', 'w') as f:
#     json.dump(areas, f)

In [27]:
import plotly.express as px

In [28]:
df_f.departamento = df_f.departamento.apply(unidecode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [30]:
df_f

Unnamed: 0,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,movil_agresor,...,sexo,estado_civil,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,cantidad,year,hora_int
29,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 00:10:00,SANTA CECILIA I E-11,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,0
30,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 01:27:00,VILLA GLORIA E-19,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,1
31,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 02:22:00,LA FAVORITA E-14,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,SOLTERO,ECUADOR,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,2
32,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 03:17:00,QUIRIGUA I E-10,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,11001000.0,,2010,3
33,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 03:56:00,SANTA ROSA SUR E-19,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,FEMENINO,SOLTERO,COLOMBIA,INDEPENDIENTE,NO REPORTADO,SECUNDARIA,11001000.0,,2010,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136284,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 16:46:00,EL PARAISO E-19,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,16
136285,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 18:36:00,SANTA FE E-14,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,18
136310,2019-12-31 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Martes,1900-01-01 22:00:00,LA FAVORITA E-14,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,SOLTERO,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,22
136311,2019-12-31 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Martes,1900-01-01 23:45:00,FONTANAR DEL RIO E-11,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,MASCULINO,UNION LIBRE,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,23


In [31]:
df_f.barrio.unique()

array(['SANTA CECILIA I E-11', 'VILLA GLORIA E-19', 'LA FAVORITA E-14',
       ..., 'AVORIAZ E-1', 'LAS ORQUIDEAS E-1', 'FONTANAR DEL RIO E-11'],
      dtype=object)

In [32]:
df_f['barrio'] = df_f['barrio'].map(lambda x: str(x)[:-4])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [33]:
df_f.barrio.unique()

array(['SANTA CECILIA I ', 'VILLA GLORIA ', 'LA FAVORITA ', ...,
       'AVORIAZ', 'LAS ORQUIDEAS', 'FONTANAR DEL RIO '], dtype=object)

In [34]:
df_f['barrio'] = df_f['barrio'].map(lambda x: str(x).strip())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [35]:
df_f.barrio.unique().shape

(1635,)

In [36]:
coordenadas = pd.read_csv('coordenadas_barrios_bogota.csv')

In [37]:
coordenadas.rename(columns = {'name':'barrio'}, inplace = True)

In [38]:
cols = ['barrio']
df_f = df_f.join(coordenadas.set_index(cols), on=cols)
#total1['Hosp'].fillna(0, inplace=True)

In [39]:
df_f

Unnamed: 0,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,movil_agresor,...,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,cantidad,year,hora_int,lat,lon
29,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 00:10:00,SANTA CECILIA I,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,0,,
30,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 01:27:00,VILLA GLORIA,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,1,4.549735,-74.152364
31,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 02:22:00,LA FAVORITA,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,ECUADOR,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,2,4.609963,-74.078646
32,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 03:17:00,QUIRIGUA I,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,11001000.0,,2010,3,4.711938,-74.100686
33,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 03:56:00,SANTA ROSA SUR,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,COLOMBIA,INDEPENDIENTE,NO REPORTADO,SECUNDARIA,11001000.0,,2010,3,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136284,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 16:46:00,EL PARAISO,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,16,4.540863,-74.091920
136285,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 18:36:00,SANTA FE,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,18,4.615873,-74.077311
136310,2019-12-31 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Martes,1900-01-01 22:00:00,LA FAVORITA,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,22,4.609963,-74.078646
136311,2019-12-31 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Martes,1900-01-01 23:45:00,FONTANAR DEL RIO,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,23,,


In [40]:
df_f.isnull().sum()

fecha                    0
departamento             0
municipio                0
dia                      0
hora                     0
barrio                   0
zona                     0
clase_de_sitio           0
arma_empleada            0
movil_agresor           18
movil_victima           18
edad                     4
sexo                     0
estado_civil             4
pais_de_nacimiento      24
clase_de_empleado        0
profesiones           9709
escolaridad              2
codigo_dane              0
cantidad              4635
year                     0
hora_int                 0
lat                   6555
lon                   6555
dtype: int64

In [41]:
df_f_no_coord = df_f[df_f['lat'].isnull()]

In [42]:
df_f_coord = df_f[df_f['lat'].notnull()]

In [43]:
df_f.barrio.unique().shape

(1635,)

In [44]:
df_f_no_coord.barrio.unique().shape

(1144,)

In [45]:
df_f_coord.barrio.unique().shape

(491,)

In [46]:
df_f_coord_simplified = df_f_coord[]

SyntaxError: invalid syntax (<ipython-input-46-1bc0922f9903>, line 1)

In [47]:
TotalH1=df_f_coord.groupby(['barrio'])['hora_int'].count().reset_index(name='Total')

In [48]:
cols = ['barrio']
df_f_definitivo = TotalH1.join(coordenadas.set_index(cols), on=cols)

In [49]:
df_f_definitivo

Unnamed: 0,barrio,Total,lat,lon
0,ACEVEDO TEJADA,1,4.630691,-74.081465
1,AGUAS CLARAS,10,4.557023,-74.069100
2,ALASKA,4,4.529911,-74.111248
3,ALCALA,5,4.603044,-74.126449
4,ALCAZARES,6,4.662511,-74.070415
...,...,...,...,...
486,VILLAS EL DIAMANTE,9,4.550524,-74.153755
487,VILLEMAR,4,4.670606,-74.140076
488,VITELMA,14,4.575928,-74.077078
489,VOTO NACIONAL,75,4.603330,-74.083741


In [50]:
import folium  #needed for interactive map
from folium.plugins import HeatMap

In [51]:
max_amount = float(df_f_definitivo['Total'].max())

folium_hmap = folium.Map(location=[4.728381, -74.046869],
                        zoom_start=13,
                        tiles='CartoDB positron')

hm_wide = HeatMap( list(zip(df_f_definitivo['lat'], df_f_definitivo['lon'], df_f_definitivo['Total'])),
                   min_opacity=0.2,
                   max_val=max_amount,
                   radius=20, blur=6, 
                   max_zoom=15, 
                 )

folium_hmap.add_child(hm_wide)

In [52]:
df_f_definitivo

Unnamed: 0,barrio,Total,lat,lon
0,ACEVEDO TEJADA,1,4.630691,-74.081465
1,AGUAS CLARAS,10,4.557023,-74.069100
2,ALASKA,4,4.529911,-74.111248
3,ALCALA,5,4.603044,-74.126449
4,ALCAZARES,6,4.662511,-74.070415
...,...,...,...,...
486,VILLAS EL DIAMANTE,9,4.550524,-74.153755
487,VILLEMAR,4,4.670606,-74.140076
488,VITELMA,14,4.575928,-74.077078
489,VOTO NACIONAL,75,4.603330,-74.083741


In [53]:
TotalH1

Unnamed: 0,barrio,Total
0,ACEVEDO TEJADA,1
1,AGUAS CLARAS,10
2,ALASKA,4
3,ALCALA,5
4,ALCAZARES,6
...,...,...
486,VILLAS EL DIAMANTE,9
487,VILLEMAR,4
488,VITELMA,14
489,VOTO NACIONAL,75


In [54]:
df_f_coord

Unnamed: 0,fecha,departamento,municipio,dia,hora,barrio,zona,clase_de_sitio,arma_empleada,movil_agresor,...,pais_de_nacimiento,clase_de_empleado,profesiones,escolaridad,codigo_dane,cantidad,year,hora_int,lat,lon
30,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 01:27:00,VILLA GLORIA,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,1,4.549735,-74.152364
31,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 02:22:00,LA FAVORITA,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,ECUADOR,EMPLEADO PARTICULAR,NO REPORTADO,SECUNDARIA,11001000.0,,2010,2,4.609963,-74.078646
32,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 03:17:00,QUIRIGUA I,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,COLOMBIA,ESTUDIANTE,NO REPORTADO,SECUNDARIA,11001000.0,,2010,3,4.711938,-74.100686
38,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 08:10:00,LA VICTORIA,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,COLOMBIA,DESEMPLEADO,NO REPORTADO,SECUNDARIA,11001000.0,,2010,8,4.551783,-74.092221
39,2010-01-01 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Viernes,1900-01-01 10:35:00,POTOSI,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,COLOMBIA,EMPLEADO PARTICULAR,NO REPORTADO,PRIMARIA,11001000.0,,2010,10,4.569170,-74.170351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136284,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 16:46:00,EL PARAISO,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,16,4.627185,-74.059534
136284,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 16:46:00,EL PARAISO,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,16,4.540863,-74.091920
136285,2019-12-30 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Lunes,1900-01-01 18:36:00,SANTA FE,URBANA,VIAS PUBLICAS,ARMA DE FUEGO,A PIE,...,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,18,4.615873,-74.077311
136310,2019-12-31 00:00:00,CUNDINAMARCA,BOGOTA D.C.,Martes,1900-01-01 22:00:00,LA FAVORITA,URBANA,VIAS PUBLICAS,ARMA BLANCA,A PIE,...,COLOMBIA,INDEPENDIENTE,,SECUNDARIA,11001000.0,1.0,2019,22,4.609963,-74.078646


In [55]:
df_f_coord_KDE = df_f_coord[['lat', 'lon']]

In [56]:
df_f_coord_KDE

Unnamed: 0,lat,lon
30,4.549735,-74.152364
31,4.609963,-74.078646
32,4.711938,-74.100686
38,4.551783,-74.092221
39,4.569170,-74.170351
...,...,...
136284,4.627185,-74.059534
136284,4.540863,-74.091920
136285,4.615873,-74.077311
136310,4.609963,-74.078646


In [57]:
df_f_coord_KDE = df_f_coord_KDE.to_numpy()

In [58]:
df_f_coord_KDE.shape

(7789, 2)

In [59]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_species_distributions
from sklearn.neighbors import KernelDensity

In [60]:
Xtrain = df_f_coord_KDE
ytrain = np.ones(7789)

ygrid = np.linspace(3.80, 4.82, num = 283)
xgrid = np.linspace(-74.38, -74.02, num = 100)


#X, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])

X, Y = np.meshgrid(xgrid, ygrid)

xy = np.vstack([Y.ravel(), X.ravel()]).T

In [61]:
Xtrain[:,0].min()

3.809720128555752

In [62]:
Xtrain[:,0].max()

4.819483937375536

In [63]:
Xtrain[:,1].min()

-74.3732223241509

In [64]:
Xtrain[:,1].max()

-74.01937155645214

In [65]:
# construct a kernel density estimate of the distribution
print(" - computing KDE in spherical coordinates")
kde = KernelDensity(bandwidth=0.01, metric='haversine',
                    kernel='gaussian', algorithm='ball_tree')
kde.fit(Xtrain)

 - computing KDE in spherical coordinates


KernelDensity(algorithm='ball_tree', atol=0, bandwidth=0.01, breadth_first=True,
              kernel='gaussian', leaf_size=40, metric='haversine',
              metric_params=None, rtol=0)

In [66]:
xy.shape

(28300, 2)

In [67]:
Z = kde.score_samples(xy)
#Z = Z.reshape(X.shape)

In [68]:
Z.shape

(28300,)

In [69]:
Z

array([-0.12357967, -0.01160242,  0.01827175, ..., -0.6233181 ,
       -0.64214904, -0.66242962])

In [70]:
np.exp(Z)

array([0.88375123, 0.98846463, 1.01843969, ..., 0.53616244, 0.52616047,
       0.51559711])

In [71]:
max_amount = float(df_f_definitivo['Total'].max())

folium_hmap = folium.Map(location=[4.728381, -74.046869],
                        zoom_start=13,
                        tiles='CartoDB positron')

hm_wide = HeatMap( list(zip(xy[:,0], xy[:,1], np.exp(Z))),
                   min_opacity=0.005,
                   max_val=max_amount,
                   radius=15, blur=6, 
                   max_zoom=15, 
                 )

folium_hmap.add_child(hm_wide)