# FE: Georeferencing

In [None]:
#Run only if there are errors related to missing packages
#!pip install -r requirements.txt

In [None]:
import pandas as pd
import numpy as np
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Nominatim
import googlemaps
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import pingouin as pg

Since the only datasets with info are the one from "aseguramiento" and the PDFs, we will only work with that info.

## Preparing the data

We prepare the data from "aseguramiento". Note that we build the column "DIRECCION_COMPLETA" that will be used by the Geocoder.

In [None]:
# Importing the df from "aseguramiento"
aseguramiento=pd.read_csv('/datasets/gdrive/DS4A - Team Project/data/aseguramiento.v3/aseguramiento_final.csv', encoding="latin-1")

#We clean the names of the column
#aseguramiento.columns = [c.replace(' ', '_') for c in df.columns]

#We create a new column with the address ready for the Geocoder
aseguramiento["DIRECCION_COMPLETA"] = aseguramiento['DIRECCION'].str.strip() +", "+ aseguramiento["MUNICIPIO"] +", "+ aseguramiento["DEPARTAMENTO"] +", "+ aseguramiento["PAIS"]

aseguramiento

Unnamed: 0,RADICADO,FECHA,ASUNTO,DIRECCION,MUNICIPIO,DEPARTAMENTO,PAIS,DIRECCION_COMPLETA
0,20191700062932,2019-05-06 14:24:20.476704,INFORMACION DE CAMBIO DE DIRECCION,CARRERA 14 No. 13-82,SOGAMOSO,BOYACA,COLOMBIA,"CARRERA 14 No. 13-82, SOGAMOSO, BOYACA, COLOMBIA"
1,20191700063412,2019-05-06 16:14:32.807325,DESCARGOS POR PROCESO SANCIONATORIO Y CARGOS D...,KRA 10 A NÂ° 53-68 JUAN JOSE RONDON,SOGAMOSO,BOYACA,COLOMBIA,"KRA 10 A NÂ° 53-68 JUAN JOSE RONDON, SOGAMOSO,..."
2,20191700063562,2019-05-06 17:20:00.604149,PD-1149\r\nRADICADO 1039 DE 24 DE ABRIL DE 2019,CALLE 15 NO. 11-79 OFICINA 302 EDIFICIO EL PIN...,SOGAMOSO,BOYACA,COLOMBIA,CALLE 15 NO. 11-79 OFICINA 302 EDIFICIO EL PIN...
3,20191700063762,2019-05-07 10:48:55.942940,TRASLADO DENUNCIA CON RADICADO INVIMA No. 2019...,CRA 10 N 64-28,BOGOTA,D.C.,COLOMBIA,"CRA 10 N 64-28, BOGOTA, D.C., COLOMBIA"
4,20191700063922,2019-05-07 11:44:11.598638,SOLICITUD INFORMACION,CARRERA 12 No. 16-31 PISO 3,SOGAMOSO,BOYACA,COLOMBIA,"CARRERA 12 No. 16-31 PISO 3, SOGAMOSO, BOYACA,..."
...,...,...,...,...,...,...,...,...
1956,20191700061802,2019-05-03 09:46:20.619884,ACCION DE TUTELA No. 2019-0017\r\nACTE: MARIA ...,CALLE 11 No. 9-30 OFC 202,SOGAMOSO,BOYACA,COLOMBIA,"CALLE 11 No. 9-30 OFC 202, SOGAMOSO, BOYACA, C..."
1957,20191700061812,2019-05-03 09:48:27.418469,ACCION DE TUTELA No. 2019-0019\r\nACTE: MARIA ...,CALLE 11 No. 9-30 OFC 202,SOGAMOSO,BOYACA,COLOMBIA,"CALLE 11 No. 9-30 OFC 202, SOGAMOSO, BOYACA, C..."
1958,20191700062142,2019-05-03 14:31:18.900281,SOLICITUD VISTA SANITARIA,CARRERA 13 No. 4-68 INT. 5,SOGAMOSO,BOYACA,COLOMBIA,"CARRERA 13 No. 4-68 INT. 5, SOGAMOSO, BOYACA, ..."
1959,20191700062312,2019-05-03 15:30:18.361842,RESPUESTA A DERECHO DE PETICION RADICADO INVIM...,CRA 10 N 64-28,BOGOTA,D.C.,COLOMBIA,"CRA 10 N 64-28, BOGOTA, D.C., COLOMBIA"


Now, we prepare the data from the PDFs. Note that we also create the column "DIRECCION_COMPLETA" that will be used by the Geocoder.

In [None]:
# Importing the df from PDFs
PDF=pd.read_csv('/datasets/gdrive/DS4A - Team Project/data/PDF_Orfeo.v3/data_pdfs_orfeo.csv', encoding="latin-1")
PDF.columns = ["RADICADO","FECHA","ASUNTO","PAIS","DIRECCION","TEXTO_PQRSDF"]

# We have verified that all entries in the PDFs are from the same place
PDF["MUNICIPIO"] = "SOGAMOSO"
PDF["DEPARTAMENTO"] = "BOYACA"

# We delete a dot at the end of the strings in that column
PDF["DIRECCION"] = PDF["DIRECCION"].str[:-1]

# We create the new column with the complete address
PDF["DIRECCION_COMPLETA"] = PDF['DIRECCION'].str.strip() +", "+ PDF["PAIS"]

PDF

Unnamed: 0,RADICADO,FECHA,ASUNTO,PAIS,DIRECCION,TEXTO_PQRSDF,MUNICIPIO,DEPARTAMENTO,DIRECCION_COMPLETA
0,20182000001012,27 - 10 - 2018 11:09:51,AUTORIZACIÓN CX,COLOMBIA,"CARRERA 34 # 15-145 SOGAMOSO, BOYACA",LA EPS NO AUTORIZA CX DE CADERAS,SOGAMOSO,BOYACA,"CARRERA 34 # 15-145 SOGAMOSO, BOYACA, COLOMBIA"
1,20182000001072,27 - 10 - 2018 11:32:35,AUTORIZACIÓN AYUDAS DX,COLOMBIA,"CALLE 58 DIAGONAL 11 SOGAMOSO, BOYACA","LA EPS NO AUTORIZA LABORATORIOS,CONTROL POR CX.",SOGAMOSO,BOYACA,"CALLE 58 DIAGONAL 11 SOGAMOSO, BOYACA, COLOMBIA"
2,20182000001312,11 - 12 - 2018 01:19:06,MEDICAMENTOS,COLOMBIA,"CARRERA 19 # 11 A 06 SOGAMOSO, BOYACA",LA EPS NO AUTORIZA NMEDICAMENTO DABIGATRAN.,SOGAMOSO,BOYACA,"CARRERA 19 # 11 A 06 SOGAMOSO, BOYACA, COLOMBIA"
3,20182000000792,27 - 09 - 2018 03:38:10,QUE LA EPS LE BRINDE UNA ATENCIÓN INTEGRAL YA...,COLOMBIA,"CALLE 13 No 229-15 SOGAMOSO, BOYACA",SOLICITO A LA EPS ME PRESTE MANEJO DOMI...,SOGAMOSO,BOYACA,"CALLE 13 No 229-15 SOGAMOSO, BOYACA, COLOMBIA"
4,20182000000172,15 - 08 - 2018 02:55:09,ASIGNACION DE CITA,COLOMBIA,"calle 7A 14A-02 SOGAMOSO, BOYACA",LLEVO ESPERANDO CITA EN FAMEDIC Y OTORRINONLOG...,SOGAMOSO,BOYACA,"calle 7A 14A-02 SOGAMOSO, BOYACA, COLOMBIA"
...,...,...,...,...,...,...,...,...,...
756,20222000000772,13 - 05 - 2022 06:32:54,INOPORTUNIDAD-EN-LA-ENTREGA,COLOMBIA,"CALLE-47-No.11A-27 SOGAMOSO, BOYACA",USUARIA-QUE-SOLICITA-AGILIDAD-EN-LA-ENTREGA-DE...,SOGAMOSO,BOYACA,"CALLE-47-No.11A-27 SOGAMOSO, BOYACA, COLOMBIA"
757,20222000000592,22 - 04 - 2022 11:18:46,INOPORTUNIDAD-EN-EL-SERVICIO,COLOMBIA,"CARRERA-20-No.7-31 SOGAMOSO, BOYACA",USUARIA-MENOR-DE-EDAD-QUE-MANIFIESTA-INCONFORM...,SOGAMOSO,BOYACA,"CARRERA-20-No.7-31 SOGAMOSO, BOYACA, COLOMBIA"
758,20222000000452,18 - 03 - 2022 06:14:08,INOPORTUNIDAD-EN-EL-SERVICIO,COLOMBIA,"CARRERA-21-15-48 SOGAMOSO, BOYACA",USUARIA-QUE-MANIFIESTA-INCONFORMIDAD-EN-LA-ENT...,SOGAMOSO,BOYACA,"CARRERA-21-15-48 SOGAMOSO, BOYACA, COLOMBIA"
759,20222000000712,13 - 05 - 2022 05:50:49,INOPORTUNIDAD-EN-EL-SERVICIO,COLOMBIA,"CALLE-46-No.12-D-61-CASA-51 SOGAMOSO, BOYACA",PADRE-DEL-MENOR-SOLICITA-QUE-EL-MEDICAMENTO-FO...,SOGAMOSO,BOYACA,"CALLE-46-No.12-D-61-CASA-51 SOGAMOSO, BOYACA, ..."


Finally, we concatenate the two datasets.

In [None]:
df = pd.concat([PDF, aseguramiento], axis=0, ignore_index=True, verify_integrity=True)
df

Unnamed: 0,RADICADO,FECHA,ASUNTO,PAIS,DIRECCION,TEXTO_PQRSDF,MUNICIPIO,DEPARTAMENTO,DIRECCION_COMPLETA
0,20182000001012,27 - 10 - 2018 11:09:51,AUTORIZACIÓN CX,COLOMBIA,"CARRERA 34 # 15-145 SOGAMOSO, BOYACA",LA EPS NO AUTORIZA CX DE CADERAS,SOGAMOSO,BOYACA,"CARRERA 34 # 15-145 SOGAMOSO, BOYACA, COLOMBIA"
1,20182000001072,27 - 10 - 2018 11:32:35,AUTORIZACIÓN AYUDAS DX,COLOMBIA,"CALLE 58 DIAGONAL 11 SOGAMOSO, BOYACA","LA EPS NO AUTORIZA LABORATORIOS,CONTROL POR CX.",SOGAMOSO,BOYACA,"CALLE 58 DIAGONAL 11 SOGAMOSO, BOYACA, COLOMBIA"
2,20182000001312,11 - 12 - 2018 01:19:06,MEDICAMENTOS,COLOMBIA,"CARRERA 19 # 11 A 06 SOGAMOSO, BOYACA",LA EPS NO AUTORIZA NMEDICAMENTO DABIGATRAN.,SOGAMOSO,BOYACA,"CARRERA 19 # 11 A 06 SOGAMOSO, BOYACA, COLOMBIA"
3,20182000000792,27 - 09 - 2018 03:38:10,QUE LA EPS LE BRINDE UNA ATENCIÓN INTEGRAL YA...,COLOMBIA,"CALLE 13 No 229-15 SOGAMOSO, BOYACA",SOLICITO A LA EPS ME PRESTE MANEJO DOMI...,SOGAMOSO,BOYACA,"CALLE 13 No 229-15 SOGAMOSO, BOYACA, COLOMBIA"
4,20182000000172,15 - 08 - 2018 02:55:09,ASIGNACION DE CITA,COLOMBIA,"calle 7A 14A-02 SOGAMOSO, BOYACA",LLEVO ESPERANDO CITA EN FAMEDIC Y OTORRINONLOG...,SOGAMOSO,BOYACA,"calle 7A 14A-02 SOGAMOSO, BOYACA, COLOMBIA"
...,...,...,...,...,...,...,...,...,...
2717,20191700061802,2019-05-03 09:46:20.619884,ACCION DE TUTELA No. 2019-0017\r\nACTE: MARIA ...,COLOMBIA,CALLE 11 No. 9-30 OFC 202,,SOGAMOSO,BOYACA,"CALLE 11 No. 9-30 OFC 202, SOGAMOSO, BOYACA, C..."
2718,20191700061812,2019-05-03 09:48:27.418469,ACCION DE TUTELA No. 2019-0019\r\nACTE: MARIA ...,COLOMBIA,CALLE 11 No. 9-30 OFC 202,,SOGAMOSO,BOYACA,"CALLE 11 No. 9-30 OFC 202, SOGAMOSO, BOYACA, C..."
2719,20191700062142,2019-05-03 14:31:18.900281,SOLICITUD VISTA SANITARIA,COLOMBIA,CARRERA 13 No. 4-68 INT. 5,,SOGAMOSO,BOYACA,"CARRERA 13 No. 4-68 INT. 5, SOGAMOSO, BOYACA, ..."
2720,20191700062312,2019-05-03 15:30:18.361842,RESPUESTA A DERECHO DE PETICION RADICADO INVIM...,COLOMBIA,CRA 10 N 64-28,,BOGOTA,D.C.,"CRA 10 N 64-28, BOGOTA, D.C., COLOMBIA"


## Georeferencing the addresses

We set the API key from Google Maps. (It has been deleted to avoid unwanted charges; ask Fabio if you need it).

In [None]:
# Commented to avoid unwanted executions
# API Key from GMaps
#gmaps = googlemaps.Client(key='')

In [None]:
# Functions that gives a match and lat/long in GMaps
def get_info_gmaps(busqueda):
    resultado = gmaps.geocode(busqueda)[0]
    match = resultado["formatted_address"]
    lat = resultado["geometry"]['location']["lat"]
    lng = resultado["geometry"]['location']["lng"]
    return match,lat,lng

And now, the magic of GMaps:

In [None]:
# Commented to avoid unwanted executions
# WARNING: This cell takes several minutes to run
#df["GEOREF"], df["LATITUD"], df["LONGITUD"] = zip(*df['DIRECCION_COMPLETA'].map(get_info_gmaps))

In [None]:
df

Unnamed: 0,RADICADO,FECHA,ASUNTO,PAIS,DIRECCION,TEXTO_PQRSDF,MUNICIPIO,DEPARTAMENTO,DIRECCION_COMPLETA
0,20182000001012,27 - 10 - 2018 11:09:51,AUTORIZACIÓN CX,COLOMBIA,"CARRERA 34 # 15-145 SOGAMOSO, BOYACA",LA EPS NO AUTORIZA CX DE CADERAS,SOGAMOSO,BOYACA,"CARRERA 34 # 15-145 SOGAMOSO, BOYACA, COLOMBIA"
1,20182000001072,27 - 10 - 2018 11:32:35,AUTORIZACIÓN AYUDAS DX,COLOMBIA,"CALLE 58 DIAGONAL 11 SOGAMOSO, BOYACA","LA EPS NO AUTORIZA LABORATORIOS,CONTROL POR CX.",SOGAMOSO,BOYACA,"CALLE 58 DIAGONAL 11 SOGAMOSO, BOYACA, COLOMBIA"
2,20182000001312,11 - 12 - 2018 01:19:06,MEDICAMENTOS,COLOMBIA,"CARRERA 19 # 11 A 06 SOGAMOSO, BOYACA",LA EPS NO AUTORIZA NMEDICAMENTO DABIGATRAN.,SOGAMOSO,BOYACA,"CARRERA 19 # 11 A 06 SOGAMOSO, BOYACA, COLOMBIA"
3,20182000000792,27 - 09 - 2018 03:38:10,QUE LA EPS LE BRINDE UNA ATENCIÓN INTEGRAL YA...,COLOMBIA,"CALLE 13 No 229-15 SOGAMOSO, BOYACA",SOLICITO A LA EPS ME PRESTE MANEJO DOMI...,SOGAMOSO,BOYACA,"CALLE 13 No 229-15 SOGAMOSO, BOYACA, COLOMBIA"
4,20182000000172,15 - 08 - 2018 02:55:09,ASIGNACION DE CITA,COLOMBIA,"calle 7A 14A-02 SOGAMOSO, BOYACA",LLEVO ESPERANDO CITA EN FAMEDIC Y OTORRINONLOG...,SOGAMOSO,BOYACA,"calle 7A 14A-02 SOGAMOSO, BOYACA, COLOMBIA"
...,...,...,...,...,...,...,...,...,...
2717,20191700061802,2019-05-03 09:46:20.619884,ACCION DE TUTELA No. 2019-0017\r\nACTE: MARIA ...,COLOMBIA,CALLE 11 No. 9-30 OFC 202,,SOGAMOSO,BOYACA,"CALLE 11 No. 9-30 OFC 202, SOGAMOSO, BOYACA, C..."
2718,20191700061812,2019-05-03 09:48:27.418469,ACCION DE TUTELA No. 2019-0019\r\nACTE: MARIA ...,COLOMBIA,CALLE 11 No. 9-30 OFC 202,,SOGAMOSO,BOYACA,"CALLE 11 No. 9-30 OFC 202, SOGAMOSO, BOYACA, C..."
2719,20191700062142,2019-05-03 14:31:18.900281,SOLICITUD VISTA SANITARIA,COLOMBIA,CARRERA 13 No. 4-68 INT. 5,,SOGAMOSO,BOYACA,"CARRERA 13 No. 4-68 INT. 5, SOGAMOSO, BOYACA, ..."
2720,20191700062312,2019-05-03 15:30:18.361842,RESPUESTA A DERECHO DE PETICION RADICADO INVIM...,COLOMBIA,CRA 10 N 64-28,,BOGOTA,D.C.,"CRA 10 N 64-28, BOGOTA, D.C., COLOMBIA"


We export this dataset with our results.

In [None]:
# Commented to avoid unwanted executions
#df.to_csv(r'/datasets/gdrive/DS4A - Team Project/data/geo_data.csv',encoding='UTF-8', index=False)

## Visualizing the data

In [None]:
from folium.plugins import HeatMap

df = pd.read_csv('/datasets/gdrive/DS4A - Team Project/data/geo_data.csv', encoding="UTF-8")

# Creation of the base map, centered in Sogamoso
hmap = folium.Map(location=[5.7160848,-72.93112889999999], tiles = "Stamen Terrain", zoom_start=14)

In [None]:
# Creating a new df with only the relevant info and dropping NaNs
df_hm = df[["LATITUD", 'LONGITUD']]
#df_hm = df_hm[(df_hm['LATITUD'] != 5.7160848) | (df_hm['LONGITUD'] != -72.93112889999999)]
df_hm = df_hm.dropna(axis=0)

In [None]:
# List comprehension to make out list of lists for the heatmap
heat_data = [[row['LATITUD'],row['LONGITUD']] for index, row in df_hm.iterrows()]

In [None]:
# Create new layer with data and add it to the base map
hm_layer = HeatMap(heat_data, min_opacity=0.2, radius=17, blur=15, max_zoom=1, )
hm_layer.add_to(hmap)
# Display the map
hmap

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=746f9757-a4f8-490f-a84c-875a1acb7471' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>