In [1]:
import pandas as pd
import json
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
from folium import plugins

# Data Wrangling

In [106]:
df_it = pd.read_csv('covid19-ita-province.csv')
df_it

Unnamed: 0.1,Unnamed: 0,date,state,region_code,region,province_code,province,province_ISO,lat,long,total_cases,note_it,note_en
0,0,2020-02-24T18:00:00,ITA,13,Abruzzo,69,Chieti,CH,42.351032,14.167546,0,,
1,1,2020-02-24T18:00:00,ITA,13,Abruzzo,66,L'Aquila,AQ,42.351222,13.398438,0,,
2,2,2020-02-24T18:00:00,ITA,13,Abruzzo,68,Pescara,PE,42.464584,14.213648,0,,
3,3,2020-02-24T18:00:00,ITA,13,Abruzzo,67,Teramo,TE,42.658918,13.704400,0,,
4,4,2020-02-24T18:00:00,ITA,13,Abruzzo,979,In fase di definizione/aggiornamento,,0.000000,0.000000,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7419,7419,2020-04-21T17:00:00,ITA,5,Veneto,26,Treviso,TV,45.667546,12.245074,2351,,
7420,7420,2020-04-21T17:00:00,ITA,5,Veneto,27,Venezia,VE,45.434905,12.338452,2191,,
7421,7421,2020-04-21T17:00:00,ITA,5,Veneto,23,Verona,VR,45.438390,10.993527,4070,,
7422,7422,2020-04-21T17:00:00,ITA,5,Veneto,24,Vicenza,VI,45.547497,11.545971,2390,,


In [None]:
#How many rows and series are in
df_it.shape

(7424, 13)

In [None]:
#How many null values are in each serie
df_it.isnull().sum()

Unnamed: 0          0
date                0
state               0
region_code         0
region              0
province_code       0
province            0
province_ISO     1276
lat                 0
long                0
total_cases         0
note_it          7419
note_en          7419
dtype: int64

In [107]:
#Reducing series
df_it = df_it[['Unnamed: 0', 'date', 'region_code', 'region', 'province_code',
       'province', 'lat', 'long', 'total_cases']]

In [108]:
#Which region got most cases
df_it['region'].value_counts()

Lombardia                754
Toscana                  638
Sicilia                  580
Emilia-Romagna           580
Piemonte                 522
Veneto                   464
Puglia                   406
Sardegna                 348
Marche                   348
Lazio                    348
Campania                 348
Calabria                 348
Liguria                  290
Friuli Venezia Giulia    290
Abruzzo                  290
Basilicata               174
Molise                   174
Umbria                   174
P.A. Bolzano             116
P.A. Trento              116
Valle d'Aosta            116
Name: region, dtype: int64

In [109]:
#Lombardia dataframe. A smaller DataFrame
df_lom = df_it[(df_it['region'] == 'Lombardia')]

# Map with Folium

**Tiles**
”OpenStreetMap”
”Stamen Terrain”, “Stamen Toner”, “Stamen Watercolor”
”CartoDB positron”, “CartoDB dark_matter”
”Mapbox Bright”, “Mapbox Control Room” (Limited zoom)
”Cloudmade” (Must pass API key)
”Mapbox” (Must pass API key)

## Map with Markers

In [None]:
#Create a map

map_ = folium.Map(location=[df_lom.iloc[0]['lat'], df_lom.iloc[0]['long']], tiles='OpenStreetMap', zoom_start = 11)

#Display
map_

In [None]:
#Add points as red icons to the map

for index, row in df_lom.iterrows():
    Marker((row['lat'], row['long']),
    icon=folium.Icon(color='red')).add_to(map_)

#Display

map_

In [None]:
map_.save('output/map_markers')

## Map with customised markers

In [None]:
#Create a map

map_virus = folium.Map(location=[df_lom.iloc[0]['lat'], df_lom.iloc[0]['long']], tiles='OpenStreetMap', zoom_start = 11)

#Display
map_virus

In [None]:
#Customised icon
icon_url = 'https://img.icons8.com/ios/50/000000/virus.png'


#Add points, customised icon and popups to the map

for index, row in df_lom.iterrows():
    icon = folium.features.CustomIcon(icon_url,icon_size=(28, 30))
    tooltip=row['province']
    Marker((row['lat'], row['long']), popup=row['province'], tooltip=tooltip, icon=icon).add_to(map_virus)

#Display
map_virus

In [None]:
map_.save('output/map_markers_virus')

## Heatmap

In [None]:
#Create a map
heatmap_ = folium.Map(location=[df_lom.iloc[0]['lat'], df_lom.iloc[0]['long']], tiles='OpenStreetMap', zoom_start = 11)

#Display
heatmap_

In [None]:
#Create a list of nested lists with lat and long for each point
data_heatmap = [(row['lat'], row['long']) for index, row in df_lom.iterrows()]

In [None]:
#Add data
hm = plugins.HeatMap(data_heatmap, name='Number of COVID-19 cases')
hm.add_to(heatmap_)
heatmap_

All points are the same because there is a bug with Folium and weighted rows to print a heatmap.

## Choropleth

In [None]:
df_it

In [2]:
# Have to use geojson regions.
#https://github.com/codeforamerica/click_that_hood/blob/master/public/data/europe-capitals.geojson

# communities_geo = r'italy-provinces.geojson'
communities_geo = r'chile_regions_v1.geojson'

In [3]:
import json

# Carga el archivo GeoJSON
with open(communities_geo) as f:
    data = json.load(f)
# Imprime el objeto GeoJSON
# print(data)

In [4]:
pd.io.json.json_normalize(data['features'])[['geometry.coordinates']].iloc[0]

  pd.io.json.json_normalize(data['features'])[['geometry.coordinates']].iloc[0]


geometry.coordinates    [[[-68.951698, -18.93816], [-68.99942, -18.957...
Name: 0, dtype: object

In [5]:
pd.io.json.json_normalize(data['features'])['properties.label_es'].values.tolist()

  pd.io.json.json_normalize(data['features'])['properties.label_es'].values.tolist()


['Arica y Parinacota',
 'Tarapacá',
 'Antofagasta',
 'Atacama',
 'Coquimbo',
 'Región Metropolitana de Santiago',
 "O'Higgins",
 'Maule',
 'Biobío',
 'La Araucanía',
 'Los Ríos',
 'Los Lagos',
 'Aysén del General Carlos Ibáñez del Campo',
 'Magallanes y de la Antártica Chilena',
 'Valparaíso',
 'Ñuble']

In [None]:
import requests
import pandas as pd
# url = 'https://github.com/holtzy/The-Python-Graph-Gallery/blob/master/static/data/france.geojson'
# url = 'https://github.com/fcortes/Chile-GeoJSON/blob/master/Regional.geojson'
url = 'https://github.com/patricrp/covid19-geospatial-analysis/blob/master/input/italy-provinces.geojson'
response = requests.get(url)
response
data = response.json()
data
# df = pd.io.json.json_normalize(data['payload'])
# df

In [130]:
communities_geo

'italy-provinces.geojson'

In [None]:
# pd.read_excel("/content/drive/MyDrive/MAPAS_CHILE/DATA_ENTEL/Información_ventas_Entel_móvil.xlsx")

In [4]:
import numpy as np

In [145]:
df_it

Unnamed: 0.1,Unnamed: 0,date,region_code,region,province_code,province,lat,long,total_cases
0,0,2020-02-24T18:00:00,13,Abruzzo,69,Chieti,42.351032,14.167546,0
1,1,2020-02-24T18:00:00,13,Abruzzo,66,L'Aquila,42.351222,13.398438,0
2,2,2020-02-24T18:00:00,13,Abruzzo,68,Pescara,42.464584,14.213648,0
3,3,2020-02-24T18:00:00,13,Abruzzo,67,Teramo,42.658918,13.704400,0
4,4,2020-02-24T18:00:00,13,Abruzzo,979,In fase di definizione/aggiornamento,0.000000,0.000000,0
...,...,...,...,...,...,...,...,...,...
7419,7419,2020-04-21T17:00:00,5,Veneto,26,Treviso,45.667546,12.245074,2351
7420,7420,2020-04-21T17:00:00,5,Veneto,27,Venezia,45.434905,12.338452,2191
7421,7421,2020-04-21T17:00:00,5,Veneto,23,Verona,45.438390,10.993527,4070
7422,7422,2020-04-21T17:00:00,5,Veneto,24,Vicenza,45.547497,11.545971,2390


In [150]:
#Using the complete DataFrame to plot Italy totally
df_gr = df_it.groupby(['province', 'lat', 'long']).agg({'total_cases':'sum'}).reset_index()
# df_gr[['']]

In [5]:
#@title información de regiones
df_g = pd.DataFrame()
# df_g['Region'] = ['Arica y Parinacota', 'Tarapacá', 'Antofagasta', 'Atacama',
#                   'Coquimbo', 'Región Metropolitana de Santiago', "O'Higgins",
#                   'Maule', 'Biobío', 'La Araucanía', 'Los Ríos', 'Los Lagos',
#                   'Aysén del General Carlos Ibáñez del Campo',
#                   'Magallanes y de la Antártica Chilena',
#                   'Valparaíso', 'Ñuble']
df_g['Region'] = ['Antofagasta',
                  'Región Metropolitana de Santiago', "O'Higgins",
                  'Maule', 'Biobío', 'La Araucanía', 'Los Lagos',
                  'Valparaíso']
df_g['total_cases'] = np.nan
df_g['lat'] = np.nan
df_g['long'] = np.nan
# df_g
df_g['total_cases'][df_g['Region']=='Región Metropolitana de Santiago'] = 38.35
df_g['long'][df_g['Region']=='Región Metropolitana de Santiago'] = -70.64827
df_g['lat'][df_g['Region']=='Región Metropolitana de Santiago'] = -33.45694

df_g['total_cases'][df_g['Region']=='Valparaíso'] = 10.23
df_g['long'][df_g['Region']=='Valparaíso'] = -70.9735356
df_g['lat'][df_g['Region']=='Valparaíso'] = -34.2175512

df_g['total_cases'][df_g['Region']=='Biobío'] = 8.75
df_g['long'][df_g['Region']=='Biobío'] = -70.7219141
df_g['lat'][df_g['Region']=='Biobío'] = -34.1508318

df_g['total_cases'][df_g['Region']=='Maule'] = 6.18
df_g['long'][df_g['Region']=='Maule'] = -71.6918891
df_g['lat'][df_g['Region']=='Maule'] = -35.521347

df_g['total_cases'][df_g['Region']=="O'Higgins"] = 5.74
df_g['long'][df_g['Region']=="O'Higgins"] = -71.5320529
df_g['lat'][df_g['Region']=="O'Higgins"] = -32.9709015

df_g['total_cases'][df_g['Region']=="La Araucanía"] = 5.42
df_g['long'][df_g['Region']=="La Araucanía"] = -73.157326
df_g['lat'][df_g['Region']=="La Araucanía"] = -40.9071314

df_g['total_cases'][df_g['Region']=="Los Lagos"] = 5.20
df_g['long'][df_g['Region']=="Los Lagos"] = -72.9504926
df_g['lat'][df_g['Region']=="Los Lagos"] = -41.3273284

df_g['total_cases'][df_g['Region']=="Antofagasta"] = 4.84
df_g['long'][df_g['Region']=="Antofagasta"] = -72.739914
df_g['lat'][df_g['Region']=="Antofagasta"] = -38.1496722


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_g['total_cases'][df_g['Region']=='Región Metropolitana de Santiago'] = 38.35
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_g['long'][df_g['Region']=='Región Metropolitana de Santiago'] = -70.64827
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_g['lat'][df_g['Region']=='Región Metropolitana de Santiago'] = -33.45694
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [230]:
df_g

Unnamed: 0,Region,total_cases
0,Antofagasta,4.84
1,Región Metropolitana de Santiago,38.35
2,O'Higgins,5.74
3,Maule,6.18
4,Biobío,8.75
5,La Araucanía,5.42
6,Los Lagos,5.2
7,Valparaíso,10.23


In [6]:
# @title información de comunas
communities_geo_comuna = r'comunas.geojson'
# Carga el archivo GeoJSON
with open(communities_geo_comuna) as f:
    data_ = json.load(f)
lista_comunas = pd.io.json.json_normalize(data_['features'])['properties.Comuna'].values.tolist()


  lista_comunas = pd.io.json.json_normalize(data_['features'])['properties.Comuna'].values.tolist()


In [None]:
pd.DataFrame(lista_comunas)[0].str.contains('Santiago').sum()

1

In [7]:
df_gc = pd.DataFrame()
df_gc['Comuna'] = ['Talca', 'Santiago']
df_gc['total_cases'] = np.nan
df_gc['total_cases'][df_gc['Comuna']=='Talca'] = 27.8
df_gc['total_cases'][df_gc['Comuna']=='Santiago'] = 9

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_gc['total_cases'][df_gc['Comuna']=='Talca'] = 27.8
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_gc['total_cases'][df_gc['Comuna']=='Santiago'] = 9


In [None]:
df_gc

Unnamed: 0,Comuna,total_cases
0,Talca,27.8


In [8]:
import folium

In [338]:
!pip install colorbrewer

Collecting colorbrewer
  Downloading colorbrewer-0.2.0-py3-none-any.whl (9.4 kB)
Installing collected packages: colorbrewer
Successfully installed colorbrewer-0.2.0


In [9]:
bins = list(df_g['total_cases'].quantile([0, 0.25, 0.5, 0.75, 1]).round(2))#[1:]
bins

[4.84, 5.36, 5.96, 9.12, 38.35]

In [9]:
#Create a map
choromap_ = folium.Map(location=[-33.45, -70.6667], zoom_start=5)

#Add choropleth info layer
choromap_.choropleth(
  geo_data=communities_geo,
  data=df_g,
  columns=['Region', 'total_cases'],
  key_on='feature.properties.label_es',
  fill_color='YlOrRd_r',
  fill_opacity=0.3,
  line_opacity=0.4,
  nan_fill_opacity= 0,
  nan_line_opacity=0,
  legend_name='chile',
  nan_fill_color = 'None',
  bins = [4, 5, 6,  9, 11,  38.35],
  smooth_factor=0)

#Add choropleth info layer
choromap_.choropleth(
  geo_data=communities_geo_comuna,
  data=df_gc,
  columns=['Comuna', 'total_cases'],
  key_on='feature.properties.Comuna',
  fill_color='BuGn',
  fill_opacity=0.8,
  line_opacity=0.2,
  nan_fill_opacity= 0,
  nan_line_opacity=0,
  legend_name='comuns chila',
  nan_fill_color = 'None',
  bins = [4, 5, 6,  9, 11,  38.35],
  smooth_factor=0)


from branca.element import Element
from branca.colormap import LinearColormap



#Display
choromap_



In [10]:
e = Element("""
  var ticks = document.querySelectorAll('div.legend g.tick text')
  for(var i = 0; i < ticks.length; i++) {
    var value = parseFloat(ticks[i].textContent.replace(',', ''))
    var newvalue = Math.pow(10.0, value).toFixed(0).toString()
    ticks[i].textContent = newvalue
  }
""")

NameError: ignored

In [412]:
choromap_

In [375]:
icon_url = "senales.png"

for index, row in df_g.iterrows():
  # print(index)
  # print(index)
  popup=row['Region'], row['total_cases']
  icon = folium.features.CustomIcon(icon_url, icon_size=(25, 45))
  Marker((row['lat'], row['long']), popup=popup,icon=icon).add_to(choromap_)

In [376]:
choromap_


Output hidden; open in https://colab.research.google.com to view.

  pd.io.json.json_normalize(data_['features'])#['properties.label_es'].values.tolist()


Unnamed: 0,type,properties.objectid,properties.shape_leng,properties.dis_elec,properties.cir_sena,properties.cod_comuna,properties.codregion,properties.st_area_sh,properties.st_length_,properties.Region,properties.Comuna,properties.Provincia,geometry.type,geometry.coordinates
0,Feature,48,170038.6241,16,8,6204,6,9.685774e+08,206184.2716,Región del Libertador Bernardo O'Higgins,Marchigüe,Cardenal Caro,Polygon,"[[[-71.8007, -34.2072], [-71.7295, -34.3146], ..."
1,Feature,29,125730.1047,15,8,6102,6,4.157446e+08,151911.5768,Región del Libertador Bernardo O'Higgins,Codegua,Cachapoal,Polygon,"[[[-70.3529, -33.9526], [-70.3276, -34.0103], ..."
2,Feature,30,63026.0844,15,8,6103,6,1.448565e+08,76355.3261,Región del Libertador Bernardo O'Higgins,Coinco,Cachapoal,Polygon,"[[[-70.9005, -34.237], [-70.8704, -34.2681], [..."
3,Feature,31,89840.9035,15,8,6104,6,3.256572e+08,108874.6231,Región del Libertador Bernardo O'Higgins,Coltauco,Cachapoal,Polygon,"[[[-71.0249, -34.1705], [-70.9657, -34.1761], ..."
4,Feature,78,122626.4932,23,11,9121,9,6.990727e+08,156680.4106,Región de La Araucanía,Cholchol,Cautín,Polygon,"[[[-72.9589, -38.4687], [-72.9212, -38.4925], ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
341,Feature,255,130240.5558,19,10,16305,16,8.751006e+08,160459.1379,Región de Ñuble,San Nicolás,Punilla,Polygon,"[[[-72.242, -36.3522], [-72.0513, -36.4192], [..."
342,Feature,253,311061.9324,19,10,16304,16,2.393007e+09,388109.6138,Región de Ñuble,San Fabián,Punilla,Polygon,"[[[-71.2483, -36.4559], [-71.2249, -36.4626], ..."
343,Feature,245,159257.9064,19,10,16303,16,7.638173e+08,195593.9337,Región de Ñuble,Ñiquén,Punilla,Polygon,"[[[-72.099, -36.1371], [-72.0754, -36.1311], [..."
344,Feature,243,157467.0496,19,10,16104,16,1.044405e+09,196962.6643,Región de Ñuble,El Carmen,Ñuble,Polygon,"[[[-71.7422, -36.8288], [-71.6045, -36.9016], ..."


In [318]:
colormap = LinearColormap(
    colors=['white', 'yellow', 'green'],
    index=[0, df_g['total_cases'].mean(), df_g['total_cases'].max()],
    vmin=df_g['total_cases'].min(),
    vmax=df_g['total_cases'].max()
).to_step(5)
colormap

In [256]:
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, rgb2hex

color_inicio = '#461e7d'  # Color de inicio en formato hexadecimal
color_fin = '#fd6600'#Naranja E"#fd6600"#Morado E'#ff006e'     # Color de fin en formato hexadecimal

# Crear una lista de colores para el cmap
colores = [color_inicio, color_fin]

# Crear una lista de posiciones para el cmap
posiciones = [0.0, 1.0]

# Crear el cmap con el degradado de 10 tonos entre los colores
N =8
cmap = LinearSegmentedColormap.from_list('CustomCmap', list(zip(posiciones, colores)), N=N)

In [263]:
cmap(np.linspace(0, 1, cmap.N))

array([[0.2745098 , 0.11764706, 0.49019608, 1.        ],
       [0.37703081, 0.15798319, 0.42016807, 1.        ],
       [0.47955182, 0.19831933, 0.35014006, 1.        ],
       [0.58207283, 0.23865546, 0.28011204, 1.        ],
       [0.68459384, 0.2789916 , 0.21008403, 1.        ],
       [0.78711485, 0.31932773, 0.14005602, 1.        ],
       [0.88963585, 0.35966387, 0.07002801, 1.        ],
       [0.99215686, 0.4       , 0.        , 1.        ]])

In [None]:
choromap_.save('output/choromap')

## Choropleth with quartiles

In [None]:
#Including quantiles to map
bins = list(df_gr['total_cases'].quantile([0, 0.25, 0.5, 0.75, 1]))

#Customised colors

'''https://github.com/python-visualization/folium/blob/v0.2.0/folium/utilities.py#L104'''

#Create a map
c_ = folium.Map(location=[df_gr.iloc[0]['lat'], df_gr.iloc[0]['long']], zoom_start=5)



#Add choropleth info layer
c_.choropleth(
    geo_data=communities_geo,
    data=df_gr,
    columns=['province', 'total_cases'],
    key_on='feature.properties.name',
    fill_color='Oranges',
    fill_opacity=0.6,
    line_opacity=1,
    legend_name='COVID-19 Italy',
    bins=bins,
    smooth_factor=0)

#Display
c_

Output hidden; open in https://colab.research.google.com to view.

In [None]:
choromap_.save('output/cuantiles_choromap')

## Choropleth by quartiles with markers and popups

In [None]:
df_gr

Unnamed: 0,province,lat,long,total_cases
0,Agrigento,37.309711,13.584575,3377
1,Alessandria,44.912974,8.615401,60028
2,Ancona,43.616760,13.518875,44501
3,Aosta,45.737503,7.320149,24901
4,Arezzo,43.466428,11.882288,12712
...,...,...,...,...
103,Vercelli,45.323981,8.423234,20729
104,Verona,45.438390,10.993527,89257
105,Vibo Valentia,38.676241,16.101574,1722
106,Vicenza,45.547497,11.545971,53595


In [None]:
#Testing popups in choropleth. Add as child
cp_ = folium.Map(location=[df_gr.iloc[0]['lat'], df_gr.iloc[0]['long']], zoom_start=5)

cp_.choropleth(
    geo_data=communities_geo,
    data=df_gr,
    columns=['province', 'total_cases'],
    key_on='feature.properties.name',
    fill_color='Oranges',
    fill_opacity=0.6,
    line_opacity=1,
    legend_name='COVID-19 Italy',
    bins=bins,
    smooth_factor=0)

icon_url = "senales.png"

for index, row in df_gr.iterrows():
    popup=row['province'], row['total_cases']
    icon = folium.features.CustomIcon(icon_url,icon_size=(14, 15))
    Marker((row['lat'], row['long']), popup=popup,icon=icon).add_to(cp_)

#Display

cp_

Output hidden; open in https://colab.research.google.com to view.

In [None]:
choromap_.save('output/cuantiles_choromap_markers')