# Italy Region/Province-Level Data Loader

Data from: https://github.com/pcm-dpc/COVID-19

In [1]:
import pandas as pd
import cufflinks as cf
cf.go_offline()

In [2]:
italy_province_url_base = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province-%s.csv"
italy_region_url_base = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni-%s.csv"
def gather_daily_italy_reports(date_rng, province=True):
    collected = pd.DataFrame()
    #Iterate over dates in provided range
    print("Fetching %s data, from %s to %s please wait..." % (
        "province" if province else "region", 
        date_rng[0].strftime("%m-%d-%Y"), 
        date_rng[-1].strftime("%m-%d-%Y"))
     )
    for date in date_rng:
        date_str = date.strftime("%Y%m%d")
        #print("Collecting data from: %s" % date_str)
        try:
            #Try to fetch the day's .csv data from github
            daily_report = pd.read_csv((italy_province_url_base if province else italy_region_url_base) % date_str)
            #Insert a column for the date which this data was collected 
            daily_report.insert(0, "date", date)
            #Append day's data to aggregated dataframe 
            collected = pd.concat([collected, daily_report], axis=0, ignore_index=True)
        except:
            print("FAILED TO FETCH DATA FOR: %s" % date.strftime("%m-%d-%Y"))
    #Renaming columns
    collected.rename(columns={
        'denominazione_regione':'region',
        'denominazione_provincia': 'province',
        'ricoverati_con_sintomi': 'hospitalized with symptoms',
        'intensive care	': 'hospitalized in intensive care',
        'totale_ospedalizzati': 'total hospitalized',
        'isolamento_domiciliare': 'in home isolation',
        'totale_attualmente_positivi': 'total currently positive',
        'nuovi_attualmente_positivi': 'total newly positive',
        'dimessi_guariti': 'recovered',
        'deceased': 'dead',
        'totale_casi': 'total positive cases',
        'swabs': 'tests performed'
    }, inplace=True)
    return collected
#Fetch region-level data
raw_region_data = gather_daily_italy_reports(pd.date_range("02-24-2020", "03-25-2020").to_pydatetime(), province=False)
raw_region_data.to_csv("output/italy_region_report_data.csv", index=0)

#Fetch province-level data
raw_province_data = gather_daily_italy_reports(pd.date_range("02-24-2020", "03-25-2020").to_pydatetime(), province=True)
raw_province_data.to_csv("output/italy_province_report_data.csv", index=0)
print("Finished fetching data!")

Fetching region data, from 02-24-2020 to 03-25-2020 please wait...
Fetching province data, from 02-24-2020 to 03-25-2020 please wait...
Finished fetching data!


## Province-Level Data

In [3]:
clean_province_data = pd.read_csv("output/italy_province_report_data.csv")
province_name = "Milano"
selected_province_data = clean_province_data[clean_province_data["province"] == province_name]
selected_province_data["total positive cases"].iplot(title="Outbreak in Province: " + province_name, xTitle="Date", yTitle="Confirmed Cases")

In [4]:
top_provinces = clean_province_data.groupby("province")["total positive cases"].max().sort_values(ascending=False).head(15)
print(top_provinces)
top_province_data = clean_province_data[clean_province_data["province"].isin(top_provinces.keys())]
province_plot_data = top_province_data.pivot(index="date", columns="province", values="total positive cases")
province_plot_data.iplot(title="Outbreak in Worst Hit Provinces of Italy", xTitle="Date", yTitle="Confirmed Cases")

province
Bergamo                  7072.0
Brescia                  6597.0
Milano                   6074.0
Cremona                  3156.0
Torino                   2813.0
Piacenza                 2122.0
Lodi                     1884.0
Padova                   1636.0
Monza e della Brianza    1587.0
Reggio nell'Emilia       1586.0
Pavia                    1578.0
Modena                   1533.0
Parma                    1525.0
Pesaro e Urbino          1432.0
Roma                     1428.0
Name: total positive cases, dtype: float64


## Region-Level Data

In [5]:
clean_region_data = pd.read_csv("output/italy_region_report_data.csv")
clean_region_data

Unnamed: 0,date,data,stato,codice_regione,region,lat,long,hospitalized with symptoms,terapia_intensiva,total hospitalized,in home isolation,total currently positive,total newly positive,recovered,deceduti,total positive cases,tamponi,note_it,note_en
0,2020-02-24,2020-02-24T18:00:00,ITA,13,Abruzzo,42.351222,13.398438,0,0,0,0,0,0,0,0,0,5,,
1,2020-02-24,2020-02-24T18:00:00,ITA,17,Basilicata,40.639471,15.805148,0,0,0,0,0,0,0,0,0,0,,
2,2020-02-24,2020-02-24T18:00:00,ITA,4,P.A. Bolzano,46.499335,11.356624,0,0,0,0,0,0,0,0,0,1,,
3,2020-02-24,2020-02-24T18:00:00,ITA,18,Calabria,38.905976,16.594402,0,0,0,0,0,0,0,0,0,1,,
4,2020-02-24,2020-02-24T18:00:00,ITA,15,Campania,40.839566,14.250850,0,0,0,0,0,0,0,0,0,10,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
646,2020-03-25,2020-03-25T17:00:00,ITA,9,Toscana,43.769231,11.255889,999,251,1250,1526,2776,257,54,142,2972,17868,,
647,2020-03-25,2020-03-25T17:00:00,ITA,4,P.A. Trento,46.068935,11.121231,308,65,373,685,1058,83,90,74,1222,4114,,
648,2020-03-25,2020-03-25T17:00:00,ITA,10,Umbria,43.106758,12.388247,123,44,167,519,686,62,5,19,710,4707,,
649,2020-03-25,2020-03-25T17:00:00,ITA,2,Valle d'Aosta,45.737503,7.320149,70,25,95,280,375,-4,2,24,401,1200,,


In [6]:
top_regions = clean_region_data.groupby("region")["total positive cases"].max().sort_values(ascending=False).head(10)
print(top_regions)
top_region_data = clean_region_data[clean_region_data["region"].isin(top_regions.keys())]
region_plot_data = top_region_data.pivot(index="date", columns="region", values="total positive cases")
region_plot_data.iplot(title="Outbreak in Worst Hit Regions of Italy", xTitle="Date", yTitle="Confirmed Cases")

region
Lombardia         32346
Emilia Romagna    10054
Veneto             6442
Piemonte           6024
Toscana            2972
Marche             2934
Liguria            2305
Lazio              1901
P.A. Trento        1222
Campania           1199
Name: total positive cases, dtype: int64
