<a href="https://colab.research.google.com/github/Datangels/COVID19_analysis/blob/master/CoronaVirusITALY_PROVINCES.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **DEFINE LIBRARIES**

In [0]:
import pandas as pd
import numpy as np
import io
import requests
from google.colab import files
pd.options.mode.chained_assignment = None
! pip install geopandas

# **DOWNLOAD REGION DATA**

In [0]:
url_first_part = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province-'

file_dates = [
              '20200224.csv', '20200225.csv', '20200226.csv', '20200227.csv', '20200228.csv', '20200229.csv',   
              '20200301.csv', '20200302.csv', '20200303.csv', '20200304.csv', '20200305.csv', '20200306.csv',
              '20200307.csv', '20200308.csv', '20200309.csv', '20200310.csv', '20200311.csv', '20200312.csv',
              '20200313.csv', '20200314.csv', '20200315.csv', '20200316.csv', '20200317.csv', '20200318.csv',
              '20200319.csv', '20200320.csv', '20200321.csv', '20200322.csv', '20200323.csv', '20200324.csv']


full_data_list = []

for i in range(0, len(file_dates)):
  # print('Processing: ' + file_dates[i])
  url_complete = url_first_part + file_dates[i]
  response = requests.get(url_complete).content
  temp_df = pd.read_csv(io.StringIO(response.decode('utf-8')))
  full_data_list.append(temp_df)

province_df = pd.concat(full_data_list, ignore_index=True)

# province_df.to_csv('province_df.csv')
# files.download('province_df.csv')

# **FIX AND ADAPT DATA**

In [0]:
# Create copy of df to avoid not useful re-estraction
province_df_to_manage = province_df

province_df_to_manage['days_from_last_obs'] = (pd.to_datetime(province_df_to_manage.data.max()) - pd.to_datetime(province_df_to_manage.data, errors='coerce')).dt.days

province_df_to_manage = province_df_to_manage[province_df_to_manage['denominazione_provincia'] != 'In fase di definizione/aggiornamento']

# **ANALYZE DATA**

In [0]:
kpi_columns = ['totale_casi']

province_df_to_manage_daily = pd.DataFrame()

temp_province_daily_df = province_df_to_manage.sort_values(by=['denominazione_regione', 'days_from_last_obs'], ascending=[False, False])

for q in range(0, len(kpi_columns)): 
  temp_province_daily_df['d_' + kpi_columns[q]] = temp_province_daily_df.groupby(['denominazione_provincia'])[kpi_columns[q]].diff().fillna(0).replace([np.inf, -np.inf], 0)
  temp_province_daily_df['dp_' + kpi_columns[q]] = temp_province_daily_df.groupby(['denominazione_provincia'])[kpi_columns[q]].pct_change().apply(lambda x: round(x, 2)* 100).fillna(0).replace([np.inf, -np.inf], 0)
province_df_to_manage_daily = province_df_to_manage_daily.append(temp_province_daily_df)

# **SINGLE PROVINCE RECAP**

In [22]:
COLUMNS_TO_DISPLAY = ['days_from_last_obs', 'data', 'denominazione_provincia', 'totale_casi', 'd_totale_casi', 'dp_totale_casi']

RECAP_PROVINCE = 'Varese'

LAST_n_DAYS = 10

province_df_to_manage_daily_recap = province_df_to_manage_daily[COLUMNS_TO_DISPLAY]
province_df_to_manage_daily_recap[(province_df_to_manage_daily_recap['denominazione_provincia'] == RECAP_PROVINCE) & (province_df_to_manage_daily_recap['days_from_last_obs'] < LAST_n_DAYS)]

Unnamed: 0,days_from_last_obs,data,denominazione_provincia,totale_casi,d_totale_casi,dp_totale_casi
2619,9,2020-03-15 17:00:00,Varese,184.0,26.0,16.0
2747,8,2020-03-16 17:00:00,Varese,202.0,18.0,10.0
2875,7,2020-03-17 17:00:00,Varese,234.0,32.0,16.0
3003,6,2020-03-18 17:00:00,Varese,265.0,31.0,13.0
3131,5,2020-03-19 17:00:00,Varese,310.0,45.0,17.0
3259,4,2020-03-20 17:00:00,Varese,338.0,28.0,9.0
3387,3,2020-03-21 17:00:00,Varese,359.0,21.0,6.0
3515,2,2020-03-22 17:00:00,Varese,386.0,27.0,8.0
3643,1,2020-03-23 17:00:00,Varese,421.0,35.0,9.0
3771,0,2020-03-24 17:00:00,Varese,450.0,29.0,7.0


## **MULTI PROVINCE RECAP**

# **SHOW PIVOT**

In [0]:
PIVOT_MAIN_KPI = 'totale_casi'

PIVOT_PROVINCIES = ['Varese', 'Milano', 'Bergamo', 'Brescia', 'Firenze']

LAST_n_DAYS = 20

province_df_to_manage_daily_for_pivot = province_df_to_manage_daily.groupby(['denominazione_provincia', 'days_from_last_obs']).last().reset_index()
province_df_to_manage_daily_pivot = province_df_to_manage_daily_for_pivot[province_df_to_manage_daily_for_pivot['denominazione_provincia'].isin(PIVOT_PROVINCIES)].pivot(index='denominazione_provincia', columns='days_from_last_obs', values=[PIVOT_MAIN_KPI]).fillna(0)
province_df_to_manage_daily_pivot[province_df_to_manage_daily_pivot.columns[::-1][len(list(province_df_to_manage_daily_pivot.columns))-LAST_n_DAYS:]]

# **PLOT GEODATA**

In [0]:
import os
import folium
from folium import plugins
import geopandas as gpd

In [0]:
# Download Geodata
file_name = 'https://raw.githubusercontent.com/openpolis/geojson-italy/master/geojson/limits_IT_provinces.geojson'
province_geo_data = gpd.read_file(file_name)
province_geo_data['prov_name'] = province_geo_data['prov_name'].str.lower()

In [0]:
KPI_MAP = 'dp_totale_casi'
DAYS_AGO = 20 # 0 is the last data point

map_italy = folium.Map([41.9028, 12.4964], zoom_start=6)

popups = province_df_to_manage_daily[(province_df_to_manage_daily['days_from_last_obs'] == DAYS_AGO) & (province_df_to_manage_daily['totale_casi'] > 100)]
popups = popups[['denominazione_provincia', KPI_MAP]]

popups.columns = ['prov_name', KPI_MAP]
popups['prov_name'] = popups['prov_name'].str.lower()

province_geo_data_short = province_geo_data.merge(popups,on="prov_name")

province_geo_data_short = province_geo_data_short[['prov_name', KPI_MAP, 'geometry']]

folium.Choropleth(
    geo_data=province_geo_data_short,
    data=province_geo_data_short,
    columns=['prov_name', KPI_MAP],
    key_on='feature.properties.prov_name',
    fill_color='YlOrRd'
).add_to(map_italy)

import branca.colormap as cm

style_function = lambda x: {
    'fillColor': '#ffffff',
    'color':'#000000', 
    'fillOpacity': 0.1,
    'weight': 0.1}
highlight_function = lambda x: {
    'fillColor': '#000000',
    'color':'#000000',
    'fillOpacity': 0.50,
    'weight': 0.1}
NIL = folium.features.GeoJson(
    province_geo_data_short,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['prov_name', KPI_MAP],
        aliases=['prov_name: ', KPI_MAP + ' : '],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)
map_italy.add_child(NIL)
map_italy.keep_in_front(NIL)

map_italy