In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import numpy as np

import branca

In [None]:
#Load map of Colombia, grained by municipio
#All files extracted from zip should be available at data/
geo_df = gpd.read_file("../data/GeoData/MGN_MPIO_POLITICO.shp")

In [None]:
geo_df.columns

# Department codes:

05= Antioquia,
08= Atlántico,
11= Bogotá,
13= Bolivar,
15= Boyaca,
17=Caldas,
18= Caqueta,
19=Cauca,
20= Cesar,
23= Cordoba,
25=Cundinamarca,
27= Choco,
41= Huila,
44= La guajira,
47= Magdalena,
50= Meta,
52= Nariño,
54= Norte de Santander,
63= Quindio,
66= Risaralda,
68= Santander,
70= Sucre,
73= Tolima,
76= Valle del Cauca,
81= Arauca,
85= Casanare,
86= Putumayo,
88= Archipiélago de San Andrés, Providencia y Santa Catalina,
91= Amazonas,
94= Guainía,
95= Guaviare,
97= Vaupés,
99= Vichada

### A slice of the total df must be taken in order to plot the maps with folium. Attempting to plot the entire map of Colombia grained by municipio would take too long and probably crash.

### Suggestion: if you are to plot all of Colombia, use departments (that's another dataset), if you are to plot by municipio then do it for only one departamento.

In [None]:
#Find centroid of municip. so we can make colored plots in folium
geo_df['centroid'] = geo_df['geometry'].centroid
geo_df['longitude'] = geo_df['centroid'].apply(lambda x: np.array(x)[0])
geo_df['latitude'] = geo_df['centroid'].apply(lambda x: np.array(x)[1])
geo_df.drop('centroid',axis=1,inplace=True)

#Select a single departamento. In this case Amazonas
df_dpt = geo_df[geo_df['DPTO_CCDGO'] == '05']

In [None]:
min_cn, max_cn = df_dpt['MPIO_NAREA'].quantile([0.01,0.99]).apply(round, 2)

colormap = branca.colormap.LinearColormap(
    colors=['white','green','blue'],
    vmin=min_cn,
    vmax=max_cn
)

colormap.caption="Area of municipality"

m = folium.Map(location=[1.2, -73.63])

style_function = lambda x: {
    'fillColor': colormap(x['properties']['MPIO_NAREA']),
    'color': 'black',
    'weight':2,
    'fillOpacity':0.5
}

stategeo = folium.GeoJson(
    df_dpt.to_json(),
    style_function=style_function,  #Controls color filling (which variable, line color, fill color, etc)
    tooltip=folium.GeoJsonTooltip(    #Controls pop-out box when mouse passes over map
        fields=['MPIO_CCDGO', 'MPIO_NAREA'],
        aliases=['ID', 'MPIO_NAREA'],
        localize=True
    )
).add_to(m)

colormap.add_to(m)   #Add the color scale
m

In [None]:
m = folium.Map(location=[-1.373599012000057, -69.66974281299997],
                        zoom_start=12)

style_function = lambda x: {
 #   'fillColor': colormap(x['properties']['crime_count']),
    'color': 'black',
    'weight':2,
    'fillOpacity':0.5
}

stategeo = folium.GeoJson(
    df_dpt.to_json(),
    name='Chicago beats'    ).add_to(m)

#colormap.add_to(m)
m

## A better and more flexible (however not interactive) option is directly using geopandas plot. This allows easy plotting of data with pandas syntax

In [None]:
fig,ax=plt.subplots(figsize=(10,10))
#Plot municips. in Antioquia colored by area.
geo_df[geo_df['DPTO_CCDGO'] == '05'].plot(column='MPIO_NAREA', cmap='Blues', legend=True,ax=ax)

ax.set_xticks([])
ax.set_yticks([])
plt.show()

In [None]:
nac = pd.read_csv('../data/nac2018.csv',sep=';')
nac.head()

In [None]:
#Convert dpt. and munic. codes to strings and fill with zeros
nac['COD_DPTO'] = nac['COD_DPTO'].apply(lambda x : str(x).zfill(2))
nac['COD_MUNIC'] = nac['COD_MUNIC'].apply(lambda x : str(x).zfill(3))
#Create a single ID for each munic.
nac['ID'] = nac['COD_DPTO'] + nac['COD_MUNIC']

#Do the same for geo_df
geo_df['COD_DPTO'] = geo_df['DPTO_CCDGO'].apply(lambda x : str(x).zfill(2))
geo_df['COD_MUNIC'] = geo_df['MPIO_CCDGO'].apply(lambda x : str(x).zfill(3))
geo_df['ID'] = geo_df['COD_DPTO'] + geo_df['COD_MUNIC']

In [None]:
#Add row of total births to geo_df
new_df = (nac.loc[:,['ID','SIT_PARTO']]  #Select ID and dummy variable to count
          .groupby('ID')                 #Group by ID
          .count()                       #Count
          .merge(geo_df,how='outer',left_index=True,right_on='ID')  #Outer merge with geo_df
          .rename(columns={'SIT_PARTO':'BIRTHS_2018'}))             #Rename dummy variable to Birth count

new_df = gpd.GeoDataFrame(new_df)  #Convert to geoDF
new_df.shape

In [None]:
#Let's visualize this new variable
fig,ax=plt.subplots(figsize=(10,10))
new_df[(new_df['COD_DPTO'] == '05') & (new_df['BIRTHS_2018'] < 35000)].plot(column='BIRTHS_2018', cmap='Blues', legend=True,ax=ax)

ax.set_title('Number of births per municipio in Colombia')
ax.set_xticks([])
ax.set_yticks([])
plt.show()