In [1]:
import glob
import os
import pandas as pd 
import matplotlib.pyplot as plt
from random import choice
from datetime import date

from bokeh.plotting import figure, output_notebook, show
from bokeh.models.tools import HoverTool, PanTool
from bokeh.colors import named

In [2]:
output_notebook()

In [3]:
!git pull --recurse-submodules

Already up to date.
Submodule path 'COVID-19': checked out 'd3f0e79175a481bbc34832f714d7660f39c409e4'
Submodule path 'covid19-opendata-vaccini': checked out '8819058280658bec77f0b90506ed347fccd14723'


Fetching submodule COVID-19
Fetching submodule covid19-opendata-vaccini


In [4]:
!git submodule update --remote

Submodule path 'COVID-19': checked out '1d0ea9a242574477fab976ca99ca03b963922d1d'
Submodule path 'covid19-opendata-vaccini': checked out '8d74adc4258502d72db2a4d851814235890991af'


# COVID-19 INFECTIONS TREND ANALYSIS

In this section you can find an analysis of COVID-19 infections, both in a cumulative way and for day, related to:

- all the Italy globally considered;
- some regions of the Italy;
- some provinces;
- hospitalizations, healed and dead people

In [5]:
nat_path = "COVID-19/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale-*.csv"

In [6]:
df = pd.concat(map(pd.read_csv, glob.glob(os.path.join('', f"{nat_path}"))))

In [7]:
df.sort_values(by='data', inplace=True)

In [8]:
df['only_data'] = pd.to_datetime(df['data']).dt.date

## Italy globally considered

In [9]:
fig = figure(title='Total current infected',
             x_axis_label='date',
             y_axis_label='infected',
             width=800,
             height=400,
             x_axis_type='datetime')

fig.line(df['only_data'],
         df['totale_positivi'],
         line_alpha=0.8, 
         line_width=2)

fig.add_tools(HoverTool(
    tooltips=[('date', '@x{%F}'), ('total current infected', '@y')],
    formatters=dict(x='datetime')
))
fig.x_range.start = df['only_data'].min()

show(fig)

In [10]:
fig = figure(title='New daily infected trend',
             x_axis_label='date',
             y_axis_label='new infected',
             width=800,
             height=400,
             x_axis_type='datetime')

fig.line(df['only_data'],
         df['nuovi_positivi'],
         line_alpha=0.8,
         #legend_label='example value', 
         line_width=2)

fig.add_tools(HoverTool(
    tooltips=[('date', '@x{%F}'), ('new infected', '@y')],
    formatters=dict(x='datetime')
    )
)
fig.add_tools(PanTool(dimensions='width'))
fig.toolbar.active_drag = None
fig.x_range.start = df['only_data'].min()

show(fig)

In [11]:
max_date = df['only_data'].max()
new_infected_today = df.query('only_data==@max_date')['nuovi_positivi'].iloc[0]

In [12]:
print(f'new infected in Italy for date {max_date} are {new_infected_today}') 

new infected in Italy for date 2021-05-05 are 10585


In [13]:
columns_to_show = ['totale_casi', 'dimessi_guariti', 'deceduti']
legend = {'totale_casi': 'total infected', 'dimessi_guariti': 'healed people', 'deceduti': 'dead people'}
colors = []
n = named.__dict__
all_colors = n['__all__']
while len(colors) < (len(columns_to_show)):
    if choice(all_colors) not in colors:
        colors.append(choice(all_colors))

In [14]:
fig = figure(title='Cumulative trend for dead people, healed people and total infected',
             x_axis_label='date',
             y_axis_label='count',
             width=800,
             height=400,
             x_axis_type='datetime')

for cl in columns_to_show:
    fig.line(df['only_data'],
             df[cl],
             color=colors[columns_to_show.index(cl)],
             legend_label=legend[cl], 
             line_width=2)

fig.add_tools(HoverTool(
    tooltips=[('date', '@x{%F}'), ('count', '@y')],
    formatters=dict(x='datetime')
))
fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = df['only_data'].min()

show(fig)

## analysis of the hospitalizations trend

In [15]:
fig = figure(title='hospitalizations trend',
             x_axis_label='date',
             y_axis_label='count',
             width=800,
             height=400,
             x_axis_type='datetime')

fig.line(df['only_data'],
         df['ricoverati_con_sintomi'],
         legend_label='hospitalized with symptoms', 
         color='red',
         line_width=2)
fig.line(df['only_data'],
         df['terapia_intensiva'],
         legend_label='intensive care unit', 
         color='blue',
         line_width=2)

# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('date', '@x{%F}'), ('count', '@y')],
    formatters=dict(x='datetime')
))
fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = df['only_data'].min()

# show the figure
show(fig)

## trend for regions

In [16]:
reg_path = "COVID-19/dati-regioni/dpc-covid19-ita-regioni-*.csv"

In [17]:
df_reg = pd.concat(map(pd.read_csv, glob.glob(os.path.join('', f"{reg_path}"))))

In [18]:
df_reg['only_data'] = pd.to_datetime(df_reg['data']).dt.date
df_reg['week'] = pd.to_datetime(df_reg["data"]).dt.strftime("%Y")+"_"+pd.to_datetime(df_reg["data"]).dt.strftime("%W")

In [19]:
new_infected_puglia = df_reg.query('only_data==@max_date & denominazione_regione=="Puglia"')['nuovi_positivi'].iloc[0]

In [20]:
print(f'new infected in Puglia for date {max_date} are {new_infected_puglia}') 

new infected in Puglia for date 2021-05-05 are 1171


This analysis is done for 'Puglia', 'Sicilia', 'Sardegna' regions..if you want to analyze other regions, put them in the below "regioni" list

In [21]:
regioni = ['Puglia', 'Sicilia', 'Sardegna']
colors = []
n = named.__dict__
all_colors = n['__all__']
while len(colors) < (len(regioni)):
    if choice(all_colors) not in colors:
        colors.append(choice(all_colors))

In [22]:
fig = figure(title='new daily infected for some regions', 
             width=1000, 
             height=400,
             x_axis_label='date',
             y_axis_label='new infected',
             x_axis_type='datetime')

for p in regioni:
    df_reg_filtered = df_reg.query('denominazione_regione==@p').sort_values(by='only_data')
    fig.line(df_reg_filtered['only_data'], df_reg_filtered['nuovi_positivi'], 
             color=colors[regioni.index(p)],
             legend_label=p, 
             line_width=1.5)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('new infected', '@y')],
    formatters=dict(x='datetime')
))
fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = df_reg['only_data'].min()
    
# Show the figure
show(fig)

In [23]:
fig = figure(title='New weekly infected for some regions', 
             width=1000, 
             height=400,
             x_axis_label='date',
             y_axis_label='new infected',
             x_axis_type='datetime')

for p in regioni:
    df_reg_filtered = df_reg.query('denominazione_regione==@p')[['only_data', 'nuovi_positivi']]
    df_reg_filtered = df_reg_filtered.set_index(pd.to_datetime(df_reg_filtered['only_data']))['nuovi_positivi'].resample('W').sum()
    fig.line(df_reg_filtered.index, df_reg_filtered, 
             color=colors[regioni.index(p)],
             legend_label=p, 
             line_width=1.5)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('new infected', '@y')],
    formatters=dict(x='datetime')
))
fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = df_reg['only_data'].min()
    
# Show the figure
show(fig)

### new weekly infected on 100000 inhabitants in Puglia

In [24]:
df_reg_filtered = df_reg.query('denominazione_regione=="Puglia"')[['only_data', 'nuovi_positivi']]
df_reg_filtered = df_reg_filtered.set_index(pd.to_datetime(df_reg_filtered['only_data']))['nuovi_positivi'].resample('W').sum()

In [25]:
fig = figure(title='new weekly infected trend on 100000 inhabitants in Puglia', 
             width=1000, 
             height=400,
             x_axis_label='date',
             y_axis_label='new infected on 100000 inhabitants',
             x_axis_type='datetime')


df_reg_filtered = df_reg.query('denominazione_regione=="Puglia"')[['only_data', 'nuovi_positivi']]
df_reg_filtered = df_reg_filtered.set_index(pd.to_datetime(df_reg_filtered['only_data']))['nuovi_positivi'].resample('W').sum()
fig.line(df_reg_filtered.index, 
         df_reg_filtered*100_000/3_926_931, 
         color='blue',
         legend_label="Puglia", 
         line_width=1.5)

fig.line(df_reg_filtered.index,
         250,
         color='red',
         line_width=1)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('new infected', '@y')],
    formatters=dict(x='datetime')
))
fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = df_reg_filtered.index.min()
    
# Show the figure
show(fig)

## Trend for some provinces

In [26]:
prov_path = "COVID-19/dati-province/dpc-covid19-ita-province-*.csv"

In [27]:
df_prov = pd.concat(map(pd.read_csv, glob.glob(os.path.join('', f"{prov_path}"))))

In [28]:
df_prov['only_data'] = pd.to_datetime(df_prov['data']).dt.date

This analysis is done for 'Taranto', 'Lecce', 'Messina', 'Cagliari' and  'Bari' provinces..if you want to analyze other provinces, put them in the below "province" list

In [29]:
province = ['Taranto', 'Lecce', 'Messina', 'Cagliari', 'Bari']
colors = []
n = named.__dict__
all_colors = n['__all__']
while len(colors) < (len(province)):
    if choice(all_colors) not in colors:
        colors.append(choice(all_colors))

In [30]:
fig = figure(title='Total infected trend for some provinces', 
             width=1000, 
             height=400,
             x_axis_label='date',
             y_axis_label='total infected',
             x_axis_type='datetime')

for p in province:
    df_prov_filtered = df_prov.query('denominazione_provincia==@p').sort_values(by='only_data')
    fig.line(df_prov_filtered['only_data'], df_prov_filtered['totale_casi'], 
             color=colors[province.index(p)],
             legend_label=p, 
             line_width=1.5)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('total infected', '@y')],
    formatters=dict(x='datetime')
))

fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = df_prov['only_data'].min()
    
# Show the figure
show(fig)

# Analysis on the use of vaccines

In this section you can find an analysis of the vaccines use against COVID-19, both in a cumulative way and for day, related to:

- all the Italy globally considered;
- some regions of the Italy;
- for supplier;
- for range of age

In [31]:
dati_vaccini = pd.read_csv('covid19-opendata-vaccini/dati/somministrazioni-vaccini-latest.csv')

In [32]:
dati_vaccini['data_somministrazione'] = pd.to_datetime(dati_vaccini['data_somministrazione']).dt.date

In [33]:
dati_vaccini['totale_somministrazioni'] = dati_vaccini['sesso_maschile'] + dati_vaccini['sesso_femminile']

## Italy globally considered

In [34]:
fig = figure(title='trend for the daily vaccines done',
             x_axis_label='date',
             y_axis_label='total',
             width=800,
             height=400,
             x_axis_type='datetime')

df = dati_vaccini[['data_somministrazione', 'totale_somministrazioni']].groupby('data_somministrazione').sum()
fig.line(df.index,
         df['totale_somministrazioni'],
         #legend_label='example value', 
         line_width=2)

fig.add_tools(HoverTool(
    tooltips=[('date', '@x{%F}'), ('total', '@y')],
    formatters=dict(x='datetime')
))
fig.add_tools(PanTool(dimensions='width'))
fig.x_range.start = dati_vaccini['data_somministrazione'].min()

show(fig)

In [35]:
fig = figure(title='trend for the daily vaccines done as first and second dose',
             x_axis_label='data',
             y_axis_label='total',
             width=800,
             height=400,
             x_axis_type='datetime')

df = dati_vaccini[['data_somministrazione', 'prima_dose', 'seconda_dose']].groupby('data_somministrazione').sum()
fig.line(df.index,
         df['prima_dose'],
         legend_label='only first dose', 
         color='red',
         line_width=2)
fig.line(df.index,
         df['seconda_dose'],
         legend_label='also second dose', 
         color='blue',
         line_width=2)

fig.add_tools(HoverTool(
    tooltips=[('date', '@x{%F}'), ('total', '@y')],
    formatters=dict(x='datetime')
))
fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = dati_vaccini['data_somministrazione'].min()

show(fig)

In [36]:
fig = figure(title='trend for the total vaccines done as first and second dose',
             x_axis_label='data',
             y_axis_label='total',
             width=800,
             height=400,
             x_axis_type='datetime')

# Draw the line
df = dati_vaccini[['data_somministrazione', 'prima_dose', 'seconda_dose']].groupby('data_somministrazione').sum().cumsum()
fig.line(df.index,
         df['prima_dose'],
         legend_label='only first dose', 
         color='red',
         line_width=2)
fig.line(df.index,
         df['seconda_dose'],
         legend_label='also second dose', 
         color='blue',
         line_width=2)

# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('date', '@x{%F}'), ('total', '@y')],
    formatters=dict(x='datetime')
))
fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = dati_vaccini['data_somministrazione'].min()

# show the figure
show(fig)

## Regional trend

In [37]:
regioni_to_show = ['Puglia', 'Sicilia', 'Lombardia']
colors = []
n = named.__dict__
all_colors = n['__all__']
while len(colors) < (len(regioni_to_show)):
    if choice(all_colors) not in colors:
        colors.append(choice(all_colors))

In [38]:
fig = figure(title='trend for the daily vaccines done', 
             width=1000, 
             height=400,
             x_axis_label='data',
             y_axis_label='total',
             x_axis_type='datetime')

for p in regioni_to_show:
    df_filtered = dati_vaccini.query('nome_area==@p')[['data_somministrazione', 'totale_somministrazioni']].groupby('data_somministrazione').sum()
    fig.line(df_filtered.index, 
             df_filtered['totale_somministrazioni'], 
             color=colors[regioni_to_show.index(p)],
             legend_label=p, 
             line_width=1.5)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('totale_somministrazioni', '@y')],
    formatters=dict(x='datetime')
))

fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = dati_vaccini['data_somministrazione'].min()
    
# Show the figure
show(fig)

In [39]:
fig = figure(title='cumulative trend for the total vaccines done', 
             width=1000, 
             height=400,
             x_axis_label='data',
             y_axis_label='total',
             x_axis_type='datetime')

for p in regioni_to_show:
    df_filtered = dati_vaccini.query('nome_area==@p')[['data_somministrazione', 'totale_somministrazioni']].groupby('data_somministrazione').sum().cumsum()
    fig.line(df_filtered.index, 
             df_filtered['totale_somministrazioni'], 
             color=colors[regioni_to_show.index(p)],
             legend_label=p, 
             line_width=1.5)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('total', '@y')],
    formatters=dict(x='datetime')
))

fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = dati_vaccini['data_somministrazione'].min()
    
# Show the figure
show(fig)

## analysis on all types of vaccine

In [40]:
vaccini = list(dati_vaccini.fornitore.unique())
colors = ['red', 'blue', 'green', 'yellow']

In [41]:
fig = figure(title='trend for the daily vaccines done by supplier', 
             width=1000, 
             height=400,
             x_axis_label='date',
             y_axis_label='total',
             x_axis_type='datetime')

for p in vaccini:
    df_filtered = dati_vaccini.query('fornitore==@p')[['data_somministrazione', 'totale_somministrazioni']].groupby('data_somministrazione').sum()
    fig.line(df_filtered.index, df_filtered['totale_somministrazioni'], 
             color=colors[vaccini.index(p)],
             legend_label=p, 
             line_width=1.5)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('total', '@y')],
    formatters=dict(x='datetime')
))

fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = dati_vaccini['data_somministrazione'].min()
    
# Show the figure
show(fig)

In [42]:
fig = figure(title='cumulative trend for the total vaccines done by supplier', 
             width=1000, 
             height=400,
             x_axis_label='date',
             y_axis_label='total',
             x_axis_type='datetime')

for p in vaccini:
    df_filtered = dati_vaccini.query('fornitore==@p')[['data_somministrazione', 'totale_somministrazioni']].groupby('data_somministrazione').sum().cumsum()
    fig.line(df_filtered.index, df_filtered['totale_somministrazioni'], 
             color=colors[vaccini.index(p)],
             legend_label=p, 
             line_width=1.5)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('totale_somministrazioni', '@y')],
    formatters=dict(x='datetime')
))

fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = dati_vaccini['data_somministrazione'].min()
    
# Show the figure
show(fig)

## analysis based on people age

In [43]:
fasce = list(dati_vaccini.fascia_anagrafica.unique())
colors = []
n = named.__dict__
all_colors = n['__all__']
while len(colors) < (len(fasce)):
    if choice(all_colors) not in colors:
        colors.append(choice(all_colors))

In [44]:
fig = figure(title='trend for the daily vaccines done by range of age', 
             width=1000, 
             height=400,
             x_axis_label='date',
             y_axis_label='total',
             x_axis_type='datetime')

for p in fasce:
    df_filtered = dati_vaccini.query('fascia_anagrafica==@p')[['data_somministrazione', 'totale_somministrazioni']].groupby('data_somministrazione').sum().cumsum()
    fig.line(df_filtered.index, 
             df_filtered['totale_somministrazioni'], 
             color=colors[fasce.index(p)],
             legend_label=p, 
             line_width=1.5)


# add the hover tool
fig.add_tools(HoverTool(
    tooltips=[('data', '@x{%F}'), ('total', '@y')],
    formatters=dict(x='datetime')
))

fig.add_tools(PanTool(dimensions='width'))
fig.legend.location = 'top_left'
fig.x_range.start = dati_vaccini['data_somministrazione'].min()
    
# Show the figure
show(fig)