<a href="https://colab.research.google.com/github/Datangels/COVID19_analysis/blob/master/CoronaVirusITALY_REGIONS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **DEFINE LIBRARIES**

In [0]:
import pandas as pd
import io
import numpy as np
import requests
from google.colab import files
pd.options.mode.chained_assignment = None
! pip install geopandas

# **DOWNLOAD REGION DATA**

In [0]:
url_first_part = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni-'

file_dates = [
              '20200224.csv', '20200225.csv', '20200226.csv', '20200227.csv', '20200228.csv', '20200229.csv',   
              '20200301.csv', '20200302.csv', '20200303.csv', '20200304.csv', '20200305.csv', '20200306.csv',
              '20200307.csv', '20200308.csv', '20200309.csv', '20200310.csv', '20200311.csv', '20200312.csv',
              '20200313.csv', '20200314.csv', '20200315.csv', '20200316.csv', '20200317.csv', '20200318.csv',
              '20200319.csv', '20200320.csv', '20200321.csv', '20200322.csv', '20200323.csv', '20200324.csv',
              '20200325.csv', '20200326.csv', '20200327.csv', '20200328.csv']

full_data_list = []

for i in range(0, len(file_dates)):
  url_complete = url_first_part + file_dates[i]
  response = requests.get(url_complete).content
  temp_df = pd.read_csv(io.StringIO(response.decode('utf-8')))
  full_data_list.append(temp_df)

df = pd.concat(full_data_list, ignore_index=True)

# df.to_csv('df.csv')
# files.download('df.csv')

# **FIX AND ADAPT DATA**

In [0]:
# Change name to specific regions to match geo spatial names and add useful column
df.loc[(df['denominazione_regione'] == 'P.A. Trento') | (df['denominazione_regione'] == 'P.A. Bolzano'), 'denominazione_regione'] = 'trentino-alto adige/sudtirol'
df.loc[(df['denominazione_regione'] == 'Emilia Romagna'), 'denominazione_regione'] = 'emilia-romagna'

# Group by Country in order to avoid split by province/region
df = df.groupby(['denominazione_regione', 'data']).agg({
    'ricoverati_con_sintomi':'sum',
    'terapia_intensiva':'sum',
    'totale_ospedalizzati':'sum',
    'isolamento_domiciliare':'sum',
    'totale_attualmente_positivi':'sum',
    'nuovi_attualmente_positivi':'sum',
    'dimessi_guariti':'sum',
    'deceduti':'sum',
    'totale_casi':'sum',
    'tamponi':'sum'}).reset_index()

# Create copy of df to avoid not useful re-estraction
region_df = df

# **ADD NEW INTERESTING COLUMNS**

In [0]:
region_df['tamponi/tot_casi'] = region_df['tamponi'] / region_df['totale_casi']
region_df['positivi'] = region_df['totale_casi'] - region_df['dimessi_guariti'] - region_df['deceduti']

# **ANALYZE DATA**

In [0]:
kpi_columns = [item for item in list(region_df.columns) if item not in ['denominazione_regione', 'data', 'days_from_last_obs']]

ok_region_df = pd.DataFrame()

region_df['days_from_last_obs'] = (pd.to_datetime(region_df.data.max()) - pd.to_datetime(region_df.data, errors='coerce')).dt.days
temp_region_df = region_df.sort_values(by=['denominazione_regione', 'days_from_last_obs'], ascending=[False, False])

for q in range(0, len(kpi_columns)):
  temp_region_df[kpi_columns[q]] = temp_region_df[kpi_columns[q]].fillna(0).replace([np.inf, -np.inf], 0).astype(int)
  temp_region_df['d_' + kpi_columns[q]] = temp_region_df.groupby(['denominazione_regione'])[kpi_columns[q]].diff().fillna(0).replace([np.inf, -np.inf], 0).astype(int)
  temp_region_df['dp_' + kpi_columns[q]] = temp_region_df.groupby(['denominazione_regione'])[kpi_columns[q]].pct_change().apply(lambda x: round(x, 2)* 100).fillna(0).replace([np.inf, -np.inf], 0).astype(int)
ok_region_df = ok_region_df.append(temp_region_df)

# **SHOW RECAP**

In [0]:
COLUMNS_TO_DISPLAY = ['denominazione_regione', 'days_from_last_obs', 'positivi', 'totale_attualmente_positivi']

DAYS_AGO = 0 # 0 is the last data point

ok_region_df_recap = ok_region_df[COLUMNS_TO_DISPLAY]
ok_region_df_recap[ok_region_df_recap['days_from_last_obs'] == DAYS_AGO].sort_values('denominazione_regione')

# **SHOW PIVOT**

In [0]:
PIVOT_MAIN_KPI = 'd_deceduti'

LAST_n_DAYS = 10

ok_region_df_for_pivot = ok_region_df.groupby(['denominazione_regione', 'days_from_last_obs']).last().reset_index()
ok_region_df_pivot = ok_region_df_for_pivot.pivot(index='denominazione_regione', columns='days_from_last_obs', values=[PIVOT_MAIN_KPI]).fillna(0)
ok_region_df_pivot[ok_region_df_pivot.columns[::-1][len(list(ok_region_df_pivot.columns))-LAST_n_DAYS:]]

# **PLOT GEODATA**

In [0]:
import os
import folium
from folium import plugins
import geopandas as gpd

In [0]:
# Download Geodata
file_name = 'https://raw.githubusercontent.com/stefanocudini/leaflet-geojson-selector/master/examples/italy-regions.json'
regional_geo_data = gpd.read_file(file_name)
regional_geo_data['reg_name'] = regional_geo_data['name']

In [33]:
KPI_MAP = 'dp_totale_casi'
DAYS_AGO = 0 # 0 is the last data point

map_italy = folium.Map([41.9028, 12.4964], zoom_start=6)

popups = ok_region_df[ok_region_df['days_from_last_obs'] == DAYS_AGO]
popups = popups[['denominazione_regione', KPI_MAP]]

popups.columns = ['reg_name', KPI_MAP]
popups['reg_name'] = popups['reg_name'].str.lower()

regional_geo_data_short = regional_geo_data.merge(popups,on="reg_name")

regional_geo_data_short = regional_geo_data_short[['reg_name', KPI_MAP, 'geometry']]

folium.Choropleth(
    geo_data=regional_geo_data_short,
    data=regional_geo_data_short,
    columns=['reg_name', KPI_MAP],
    key_on='feature.properties.reg_name',
    fill_color='YlOrRd'
).add_to(map_italy)

style_function = lambda x: {
    'fillColor': '#ffffff',
    'color':'#000000', 
    'fillOpacity': 0.1,
    'weight': 0.1}
highlight_function = lambda x: {
    'fillColor': '#000000',
    'color':'#000000',
    'fillOpacity': 0.50,
    'weight': 0.1}
NIL = folium.features.GeoJson(
    regional_geo_data_short,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['reg_name', KPI_MAP],
        aliases=['reg_name: ', KPI_MAP + ' : '],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)
map_italy.add_child(NIL)
map_italy.keep_in_front(NIL)

map_italy