In [1]:
# Common external libraries

import pandas as pd
import numpy as np
import sklearn # scikit-learn
import requests
from bs4 import BeautifulSoup
from time import sleep


In [3]:
# Visualization libraries

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
plt.rcParams['figure.figsize'] = [15, 5]
from IPython import display
from ipywidgets import interact, widgets

OSError: [Errno 121] Remote I/O error

In [None]:
# Common standard libraries

import datetime
import time
import os

In [None]:
# Setting plot appearance
# See here for more options: https://matplotlib.org/users/customizing.html

%config InlineBackend.figure_format='retina'
sns.set() # Revert to matplotlib defaults
plt.rcParams['figure.figsize'] = (9, 6)
plt.rcParams['axes.labelpad'] = 10
sns.set_style("darkgrid")
# sns.set_context("poster", font_scale=1.0)

In [None]:
# Managing maps
import folium
from folium import plugins
from folium.plugins import MarkerCluster

In [None]:
# Ignore warnings (don't display stderr)

import warnings
warnings.filterwarnings('ignore')

In [None]:
# if local use, then check presence of local data file (default = 0)

LOCAL = 1 # check for local data file and use it if it exists
#LOCAL = 0 # don't check for local data file, but download from Zenodo


# Glass Submarine Project
## Mortality Rate Analysis in Italy (2015-2020)
## Time and Geographical plots generator

This notebook uses the mortality rates data file from a subset of municipalities in the ANPR system</br>
and the municipality census information to produce a set of customizable time plots

Version: 1.0-20200504

## Step 1: Load the data from Zenodo

In [None]:
# if local is specified, then try to use a local file if it exists, else call the generator to produce a fresh dataset
import os.path

if LOCAL:
    if not os.path.isfile('mortalita_giornaliero_comune_20200515.xlsx'):
        %run ./Italy_mortality_rates_dataset_generator_20200515.ipynb
else:
    %run ./Italy_mortality_rates_dataset_generator_20200515.ipynb
    
print('Loading aggregated data file...')
df_2020 = pd.read_excel(io='mortalita_giornaliero_comune_20200515.xlsx')
df_2020 = df_2020.drop(labels='Unnamed: 0', axis=1)
print('Loaded %d records' %len(df_2020))
    
df_2020.head()

## Step 2: Reduced data set (debugging)

In [None]:
cols = ['NOME_REGIONE','NOME_PROVINCIA','NOME_COMUNE','CL_ETA','GE','T_15','T_16','T_17','T_18','T_19','T_20','POPULATION','LONGITUDE','LATITUDE']
comuni = df_2020.loc[:,cols]
comuni.head()


## Step 3: Consolidate at regional level (debugging)
Time plot

In [None]:
### Get Regionwise Data
def countrydata(df_cleaned,varia):
    df_country=df_cleaned.groupby(['NOME_REGIONE','GE'])[varia].sum().reset_index()
    df_country=df_country.set_index(['NOME_REGIONE','GE'])
    df_country.index=df_country.index.set_levels([df_country.index.levels[0], df_country.index.levels[1]])
    df_country=df_country.sort_values(['NOME_REGIONE','GE'],ascending=True)
#     df_country=df_country.rename(columns={oldname:newname})
    return df_country
  
dfbyregion15=countrydata(comuni,'T_15')
dfbyregion16=countrydata(comuni,'T_16')
dfbyregion17=countrydata(comuni,'T_17')
dfbyregion18=countrydata(comuni,'T_18')
dfbyregion19=countrydata(comuni,'T_19')
dfbyregion20=countrydata(comuni,'T_20')

temp1=pd.merge(dfbyregion15,dfbyregion16,how='left',left_index=True,right_index=True)
temp2=pd.merge(temp1,dfbyregion17,how='left',left_index=True,right_index=True)
temp3=pd.merge(temp2,dfbyregion18,how='left',left_index=True,right_index=True)
temp4=pd.merge(temp3,dfbyregion19,how='left',left_index=True,right_index=True)
RegionConsolidated=pd.merge(temp4,dfbyregion20,how='left',left_index=True,right_index=True)

#RegionConsolidated.head()
GlobalTotals=RegionConsolidated.reset_index().groupby('GE').sum()

fig = go.Figure()

fig.add_trace(go.Scatter(x=GlobalTotals.index,y=GlobalTotals[ 'T_15'],
                         mode='markers',
                         name='Deaths 2015',
                         line=dict(color='royalblue',width=2)))
fig.add_trace(go.Scatter(x=GlobalTotals.index,y=GlobalTotals[ 'T_16'],
                         mode='markers',
                         name='Deaths 2016',
                         line=dict(color='yellow',width=2)))
fig.add_trace(go.Scatter(x=GlobalTotals.index,y=GlobalTotals[ 'T_17'],
                         mode='markers',
                         name='Deaths 2017',
                         line=dict(color='black',width=2)))
fig.add_trace(go.Scatter(x=GlobalTotals.index,y=GlobalTotals[ 'T_18'],
                         mode='markers',
                         name='Deaths 2018',
                         line=dict(color='green',width=2)))
fig.add_trace(go.Scatter(x=GlobalTotals.index,y=GlobalTotals[ 'T_19'],
                         mode='markers',
                         name='Deaths 2019',
                         line=dict(color='magenta',width=2)))
fig.add_trace(go.Scatter(x=GlobalTotals.index,y=GlobalTotals['T_20'],
                         mode='markers',
                         name='Deaths 2020',
                         line=dict(color='red',width=2)))
fig.update_layout(showlegend=True)
fig.update_layout(yaxis_type="log")

## Step 4: Top 10 Regions comparison 2019-2020

In [None]:
TotalCasesCountry=RegionConsolidated.sum(level=0)['T_19'].reset_index().set_index('NOME_REGIONE')
TotalCasesCountry=TotalCasesCountry.sort_values(by='T_19',ascending=False)
TotalCasesCountry=TotalCasesCountry[~TotalCasesCountry.index.isin(['ALL','Others'])]
Top10countriesbycases=TotalCasesCountry.head(10)
TotalCasesCountrytop10=TotalCasesCountry.head(10)

TotalCasesCountry20=RegionConsolidated.sum(level=0)['T_20'].reset_index().set_index('NOME_REGIONE')
TotalCasesCountry20=TotalCasesCountry20.sort_values(by='T_20',ascending=False)
TotalCasesCountry20=TotalCasesCountry20[~TotalCasesCountry20.index.isin(['ALL','Others'])]
Top10countriesbycases20=TotalCasesCountry20.head(10)
TotalCasesCountrytop1020=TotalCasesCountry20.head(10)

fig = go.Figure()
    

fig.add_trace(go.Bar(x=Top10countriesbycases20.index, y=Top10countriesbycases20['T_20'],
                      text=Top10countriesbycases20['T_20'],
                        textposition='outside',
                    name='2020'))

fig.add_trace(go.Bar(x=Top10countriesbycases.index, y=Top10countriesbycases['T_19'],
                      text=Top10countriesbycases['T_19'],
                        textposition='outside',
                    name='2019'))



fig.update_layout(title_text='Top 10 Regions by Deaths in 2019 and in 2020')
fig.update_yaxes(showticklabels=False)

fig.show()

## Step 5: Top 10 Provinces comparison 2019-2020

In [None]:
### Get Provincewise Data
def provinciadata(df_cleaned,varia):
    df_country=df_cleaned.groupby(['NOME_REGIONE','NOME_PROVINCIA','GE'])[varia].sum().reset_index()
    df_country=df_country.set_index(['NOME_REGIONE','NOME_PROVINCIA','GE'])
    df_country.index=df_country.index.set_levels([df_country.index.levels[0], df_country.index.levels[1],df_country.index.levels[2]])
    df_country=df_country.sort_values(['NOME_REGIONE','NOME_PROVINCIA','GE'],ascending=True)
#     df_country=df_country.rename(columns={oldname:newname})
    return df_country

  
dfbyprovincia15=provinciadata(comuni,'T_15')
dfbyprovincia16=provinciadata(comuni,'T_16')
dfbyprovincia17=provinciadata(comuni,'T_17')
dfbyprovincia18=provinciadata(comuni,'T_18')
dfbyprovincia19=provinciadata(comuni,'T_19')
dfbyprovincia20=provinciadata(comuni,'T_20')

temp1=pd.merge(dfbyprovincia15,dfbyprovincia16,how='left',left_index=True,right_index=True)
temp2=pd.merge(temp1,dfbyprovincia17,how='left',left_index=True,right_index=True)
temp3=pd.merge(temp2,dfbyprovincia18,how='left',left_index=True,right_index=True)
temp4=pd.merge(temp3,dfbyprovincia19,how='left',left_index=True,right_index=True)
ProvinciaConsolidated=pd.merge(temp4,dfbyprovincia20,how='left',left_index=True,right_index=True)
TotalCasesProvince=ProvinciaConsolidated.sum(level=1)['T_19'].reset_index().set_index('NOME_PROVINCIA')
TotalCasesProvince=TotalCasesProvince.sort_values(by='T_19',ascending=False)
TotalCasesProvince=TotalCasesProvince[~TotalCasesProvince.index.isin(['ALL','Others'])]
Top10Provincebycases=TotalCasesProvince.head(10)
TotalCasesProvincetop10=TotalCasesProvince.head(10)

TotalCasesProvince20=ProvinciaConsolidated.sum(level=1)['T_20'].reset_index().set_index('NOME_PROVINCIA')
TotalCasesProvince20=TotalCasesProvince20.sort_values(by='T_20',ascending=False)
TotalCasesProvince20=TotalCasesProvince20[~TotalCasesProvince20.index.isin(['ALL','Others'])]
Top10Provincebycases20=TotalCasesProvince20.head(10)
TotalCasesProvincetop1020=TotalCasesProvince20.head(10)

fig = go.Figure()


fig.add_trace(go.Bar(x=Top10Provincebycases20.index, y=Top10Provincebycases20['T_20'],
                      text=Top10Provincebycases20['T_20'],
                        textposition='outside',
                    name='2020'))



fig.add_trace(go.Bar(x=Top10Provincebycases.index, y=Top10Provincebycases['T_19'],
                      text=Top10Provincebycases['T_19'],
                        textposition='outside',
                    name='2019'))




fig.update_layout(title_text='Top 10 Provinces by Deaths in 2019 and in 2020')
fig.update_yaxes(showticklabels=False)

fig.show()

## Step 6: Local data for geographical representation

In [None]:
### Get Regionwise Data
def comunedata(df_cleaned,varia):
    df_country=df_cleaned.groupby(['NOME_REGIONE','NOME_PROVINCIA','NOME_COMUNE','LONGITUDE','LATITUDE','POPULATION','GE'])[varia].sum().reset_index()
    df_country=df_country.set_index(['NOME_REGIONE','NOME_PROVINCIA','NOME_COMUNE','LONGITUDE','LATITUDE','POPULATION','GE'])
    df_country.index=df_country.index.set_levels([df_country.index.levels[0], df_country.index.levels[1],df_country.index.levels[2],df_country.index.levels[3],df_country.index.levels[4],df_country.index.levels[5],df_country.index.levels[6]])
    df_country=df_country.sort_values(['NOME_REGIONE','NOME_PROVINCIA','NOME_COMUNE','LONGITUDE','LATITUDE','POPULATION','GE'],ascending=True)
#     df_country=df_country.rename(columns={oldname:newname})
    return df_country

  
dfbycomune15=comunedata(comuni,'T_15')
dfbycomune16=comunedata(comuni,'T_16')
dfbycomune17=comunedata(comuni,'T_17')
dfbycomune18=comunedata(comuni,'T_18')
dfbycomune19=comunedata(comuni,'T_19')
dfbycomune20=comunedata(comuni,'T_20')

temp1=pd.merge(dfbycomune15,dfbycomune16,how='left',left_index=True,right_index=True)
temp2=pd.merge(temp1,dfbycomune17,how='left',left_index=True,right_index=True)
temp3=pd.merge(temp2,dfbycomune18,how='left',left_index=True,right_index=True)
temp4=pd.merge(temp3,dfbycomune19,how='left',left_index=True,right_index=True)
ComuniConsolidated=pd.merge(temp4,dfbycomune20,how='left',left_index=True,right_index=True)




# df_new = comuni.groupby(['NOME_REGIONE','NOME_PROVINCIA','NOME_COMUNE','LONGITUDE','LATITUDE','POPULATION']).agg({'T_15':sum})

df_new = comuni.groupby(['NOME_REGIONE','NOME_PROVINCIA','NOME_COMUNE','LONGITUDE','LATITUDE','POPULATION']).agg({'T_15':sum,'T_16':sum,'T_17':sum,'T_18':sum,'T_19':sum,'T_20':sum})


# ComuniConsolidated.head()
df_new.head()

## Step 7: Prepare to plot maps

In [None]:
df_geo = df_new.reset_index()
# df_geo.head()

df_clean = df_geo[(df_geo.LONGITUDE > 0) &  (df_geo.LATITUDE > 0)]

long19 = list(df_clean.iloc[:,3])
lat19 = list(df_clean.iloc[:,4])


plt.plot(long19,lat19, 'o')
plt.title('Italian places')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.axis('equal')
plt.show()


## Step 8: First map

In [None]:
# BBox = ((df_clean.LONGITUDE.min(),   df_clean.LONGITUDE.max(),      
#         df_clean.LATITUDE.min(), df_clean.LATITUDE.max()))
# print(BBox)


my_map1 = folium.Map(location = [42,12], 
                                        zoom_start = 6 ) 
coordinates =[]
#
for la,lo in zip(df_clean.LATITUDE,df_clean.LONGITUDE):
    coordinates.append([la,lo])

locations = list(zip(df_clean.LATITUDE,df_clean.LONGITUDE))
icons = [folium.Icon(icon="car", prefix="fa") for _ in range(len(locations))]



for index, row in df_clean.iterrows():
    perc_deaths = 100*(row["T_20"]/row["POPULATION"])
    if row["T_20"]>0:
        increase = 100*((row["T_20"]-row["T_19"])/row["T_20"])
        if increase>0 :
            radio = row["T_20"]/1000.
        else:
            radio = 0
    else:
        increase = 0.00
        radio = 0
    # generate the popup message that is shown on click.
    popup_text = "{}<br> Population: {}<br> Total Deaths 2020: {} \({} \%\) <br> Increase 2019-20 : {} \%"
    popup_text = popup_text.format(row["NOME_COMUNE"],
                        row["POPULATION"],
                        row["T_20"],
                        format(round(perc_deaths,2)),
                        format(round(increase,2)))           
                        
        
    # radius of circles
    #radio = row["T_20"]
        
    # choose the color of the marker
    if perc_deaths>1.:
        # color="#FFCE00" # orange
        # color="#007849" # green
        color="#E37222" # tangerine
    else:
        # color="#0375B4" # blue
        # color="#FFCE00" # yellow            
        color="#0A8A9F" # teal
        
    # add marker to the map
    folium.CircleMarker(location=(row["LATITUDE"],
                            row["LONGITUDE"]),
                            radius=radio,
                            color=color,
                            popup=popup_text,
                            fill=True).add_to(my_map1)
    

my_map1

## Step 9: Second map with groups

In [None]:


m = folium.Map(location = [42,12], 
                                        zoom_start = 6 ) 
coordinates =[]
#
for la,lo in zip(df_clean.LATITUDE,df_clean.LONGITUDE):
    coordinates.append([la,lo])

locations = list(zip(df_clean.LATITUDE,df_clean.LONGITUDE))
icons = [folium.Icon(icon="car", prefix="fa") for _ in range(len(locations))]



fg = folium.FeatureGroup(name='groups')
m.add_child(fg)

g1 = plugins.FeatureGroupSubGroup(fg, '2019')
m.add_child(g1)

g2 = plugins.FeatureGroupSubGroup(fg, '2020')
m.add_child(g2)








for index, row in df_clean.iterrows():
    perc_deaths = 100*(row["T_20"]/row["POPULATION"])
    if row["T_20"]>0:
        increase = 100*((row["T_20"]-row["T_19"])/row["T_20"])
        if increase>0 :
            radio = row["T_20"]/1000.
        else:
            radio = 0
    else:
        increase = 0.00
        radio = 0
    radio2 = row["T_19"]/1000
    # generate the popup message that is shown on click.
    popup_text = "{}<br> Population: {}<br> Total Deaths 2020: {} \({} \%\) <br> Increase 2019-20 : {} \%"
    popup_text = popup_text.format(row["NOME_COMUNE"],
                        row["POPULATION"],
                        row["T_20"],
                        format(round(perc_deaths,2)),
                        format(round(increase,2)))           
                        
    popup_text2 = "{}<br> Population: {}<br> Total Deaths 2019: {} \({} \%\) <br> Increase 2019-20 : {} \%"
    popup_text2 = popup_text2.format(row["NOME_COMUNE"],
                        row["POPULATION"],
                        row["T_19"],
                        format(round(perc_deaths,2)),
                        format(round(increase,2)))            
    # radius of circles
    #radio = row["T_20"]
        
    # choose the color of the marker
    if perc_deaths>1.:
        # color="#FFCE00" # orange
        # color="#007849" # green
        color="#E37222" # tangerine
    else:
        # color="#0375B4" # blue
        # color="#FFCE00" # yellow            
        color="#0A8A9F" # teal
        
    # add marker to the map
    folium.CircleMarker(location=(row["LATITUDE"],
                            row["LONGITUDE"]),
                            radius=radio,
                            color=color,
                            popup=popup_text,
                            fill=True).add_to(g2)
    
    folium.CircleMarker(location=(row["LATITUDE"],
                            row["LONGITUDE"]),
                            radius=radio2,
                            color='red',
                            popup=popup_text2,
                            fill=True).add_to(g1)
    
    
    
    
    
folium.LayerControl(collapsed=False).add_to(m)
# m.save(os.path.join('results', 'Plugins_8.html'))
m


# my_map1


In [None]:
from folium.plugins import HeatMap


my_map2 = folium.Map(location = [42,12], 
                                        zoom_start = 6 ) 
coordinates =[]

pesi = []
#
for la,lo in zip(df_clean.LATITUDE,df_clean.LONGITUDE):
    coordinates.append([la,lo])

for la,lo,peso in zip(df_clean.LATITUDE,df_clean.LONGITUDE,df_clean.T_20/df_clean.POPULATION):
    pesi.append([la,lo,peso])

popuplist = []
    
locations = list(zip(df_clean.LATITUDE,df_clean.LONGITUDE))
icons = [folium.Icon(icon="ambulance", prefix="fa") for _ in range(len(locations))]
for index, row in df_clean.iterrows():
    popup_text10 = "{}<br> Population: {}<br> Total Deaths 2020: {} \({} \%\) <br> Increase 2019-20 : {} \%"
    popup_text10 = popup_text10.format(row["NOME_COMUNE"],
                        row["POPULATION"],
                        row["T_20"],
                        format(round(perc_deaths,2)),
                        format(round(increase,2)))
    popuplist.append(popup_text10)



for index, row in df_clean.iterrows():
    perc_deaths = 100*(row["T_20"]/row["POPULATION"])
    if row["T_20"]>0:
        increase = 100*((row["T_20"]-row["T_19"])/row["T_20"])
        if increase>0 :
            radio = row["T_20"]/1000.
        else:
            radio = 0
    else:
        increase = 0.00
        radio = 0
    # generate the popup message that is shown on click.
    popup_text = "{}<br> Population: {}<br> Total Deaths 2020: {} \({} \%\) <br> Increase 2019-20 : {} \%"
    popup_text = popup_text.format(row["NOME_COMUNE"],
                        row["POPULATION"],
                        row["T_20"],
                        format(round(perc_deaths,2)),
                        format(round(increase,2)))           
                        
        
    # radius of circles
    #radio = row["T_20"]
        
    # choose the color of the marker
    if perc_deaths>1.:
        # color="#FFCE00" # orange
        # color="#007849" # green
        color="#E37222" # tangerine
    else:
        # color="#0375B4" # blue
        # color="#FFCE00" # yellow            
        color="#0A8A9F" # teal
        
#     # add marker to the map
#     folium.CircleMarker(location=(row["LATITUDE"],
#                             row["LONGITUDE"]),
#                             radius=1,
#                             color="#0A8A9F",
#                             popup=popup_text,
#                             fill=True).add_to(my_map2)
    

# plugins.MarkerCluster(data, popups=popups).add_to(m)
cluster = MarkerCluster(locations=locations, popups=popuplist, icons=icons)
my_map2.add_child(cluster)

max_amount = float((df_clean['T_20']/df_clean['POPULATION']).max())
print(max_amount)
# hm_wide = HeatMap( list(zip(df_clean.LONGITUDE, df_clean.LONGITUDE, df_clean.T_20))).add_to(my_map2)#,
hm_wide = HeatMap(pesi,
                   min_opacity=0.2,
                   max_val=max_amount,
                   radius=10, blur=5, 
                   max_zoom=1, 
                 ).add_to(my_map2)

        
        
#my_map2.add_child(hm_wide)
my_map2