## The Data

#### Import dependencies.

In [1]:
import pandas as pd
import json

# Set pandas options
pd.set_option('display.max_columns', None)

## Covid-19 Data from Dipartimento della Protezione Civile (DPC)

Lets take a look at the **Regional** data.  
  
The data is in a `json` format, lets convert it to a dataframe.  

In [2]:
# Load the json file
with open('../../data/Covid/dpc-covid19-ita-regioni-latest.json') as response:
    regions = json.load(response)
# Have a quick look at the data
regions

[{'data': '2023-05-04T17:00:00',
  'stato': 'ITA',
  'codice_regione': 13,
  'denominazione_regione': 'Abruzzo',
  'lat': 42.35122196,
  'long': 13.39843823,
  'ricoverati_con_sintomi': 92,
  'terapia_intensiva': 4,
  'totale_ospedalizzati': 96,
  'isolamento_domiciliare': 3323,
  'totale_positivi': 3419,
  'variazione_totale_positivi': -47,
  'nuovi_positivi': 131,
  'dimessi_guariti': 650744,
  'deceduti': 3960,
  'casi_da_sospetto_diagnostico': None,
  'casi_da_screening': None,
  'totale_casi': 658123,
  'tamponi': 7483582,
  'casi_testati': 1372234,
  'note': "Il dato ''incremento casi confermati'' è composto da 131 , cioè 76 ''nuovi positivi'' e 55 ''reinfezioni''.",
  'ingressi_terapia_intensiva': 0,
  'note_test': None,
  'note_casi': None,
  'totale_positivi_test_molecolare': 250615,
  'totale_positivi_test_antigenico_rapido': 407508,
  'tamponi_test_molecolare': 2605598,
  'tamponi_test_antigenico_rapido': 4877984,
  'codice_nuts_1': 'ITF',
  'codice_nuts_2': 'ITF1'},
 {'data

Create a dataframe from the `json` file.

In [3]:
# Convert the json file to a dataframe
regions_df = pd.DataFrame(regions)

Lets take a look at the dataframe.

In [4]:
regions_df.head()

Unnamed: 0,data,stato,codice_regione,denominazione_regione,lat,long,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati,note,ingressi_terapia_intensiva,note_test,note_casi,totale_positivi_test_molecolare,totale_positivi_test_antigenico_rapido,tamponi_test_molecolare,tamponi_test_antigenico_rapido,codice_nuts_1,codice_nuts_2
0,2023-05-04T17:00:00,ITA,13,Abruzzo,42.351222,13.398438,92,4,96,3323,3419,-47,131,650744,3960,,,658123,7483582,1372234,Il dato ''incremento casi confermati'' è compo...,0,,,250615,407508,2605598,4877984,ITF,ITF1
1,2023-05-04T17:00:00,ITA,17,Basilicata,40.639471,15.805148,26,2,28,8390,8418,1,11,191039,1027,,,200484,1341696,404577,Il dato relativo al numero dei “Casi in isolam...,0,,,71210,129274,702845,638851,ITF,ITF5
2,2023-05-04T17:00:00,ITA,18,Calabria,38.905976,16.594402,108,5,113,718,831,-106,89,632556,3412,,,636799,4337111,3371574,,1,,,202620,434179,1916246,2420865,ITF,ITF6
3,2023-05-04T17:00:00,ITA,15,Campania,40.839566,14.25085,174,8,182,19737,19919,0,347,2431492,11889,,,2463300,20827201,5408549,,0,,,955713,1507587,9602412,11224789,ITF,ITF3
4,2023-05-04T17:00:00,ITA,8,Emilia-Romagna,44.494367,11.341721,506,18,524,3181,3705,-57,305,2128147,19436,,,2151288,19545251,2982360,,3,,,1092316,1058972,10726333,8818918,ITH,ITH5


Translate the column names to English.

In [5]:
# Use a dictionary to rename the columns
regions_df = regions_df.rename(columns={
    'data': 'date', 'stato': 'state', 'codice_regione': 'reg_code', 'denominazione_regione': 'reg_name',
    'ricoverati_con_sintomi': 'symptons_hospitalised', 'terapia_intensiva': 'in_intensive_care',
    'totale_ospedalizzati': 'hospitalised', 'isolamento_domiciliare': 'home_isolation',
    'totale_positivi': 'positive', 'variazione_totale_positivi': 'variance',
    'nuovi_positivi': 'new_cases', 'dimessi_guariti': 'discharged', 'deceduti': 'deaths',
    'casi_da_sospetto_diagnostico': 'suspected', 'casi_da_screening': 'screened',
    'totale_casi': 'total_cases', 'tamponi': 'swabs', 'casi_testati': 'tested_cases', 'note': 'notes',
    'ingressi_terapia_intensiva': 'intensive_care_entrances', 'note_test': 'test_notes',
    'note_casi': 'cases_notes', 'totale_positivi_test_molecolare': 'molecular_positive',
    'totale_positivi_test_antigenico_rapido': 'antigen_positive', 'tamponi_test_molecolare': 'molecular_swabs',
    'tamponi_test_antigenico_rapido': 'antigen_swabs', 'codice_nuts_1': 'nuts_1_code',
    'codice_nuts_2': 'nuts_2_code', 'codice_nuts_3': 'nuts_3_code'
    })
regions_df.head()

Unnamed: 0,date,state,reg_code,reg_name,lat,long,symptons_hospitalised,in_intensive_care,hospitalised,home_isolation,positive,variance,new_cases,discharged,deaths,suspected,screened,total_cases,swabs,tested_cases,notes,intensive_care_entrances,test_notes,cases_notes,molecular_positive,antigen_positive,molecular_swabs,antigen_swabs,nuts_1_code,nuts_2_code
0,2023-05-04T17:00:00,ITA,13,Abruzzo,42.351222,13.398438,92,4,96,3323,3419,-47,131,650744,3960,,,658123,7483582,1372234,Il dato ''incremento casi confermati'' è compo...,0,,,250615,407508,2605598,4877984,ITF,ITF1
1,2023-05-04T17:00:00,ITA,17,Basilicata,40.639471,15.805148,26,2,28,8390,8418,1,11,191039,1027,,,200484,1341696,404577,Il dato relativo al numero dei “Casi in isolam...,0,,,71210,129274,702845,638851,ITF,ITF5
2,2023-05-04T17:00:00,ITA,18,Calabria,38.905976,16.594402,108,5,113,718,831,-106,89,632556,3412,,,636799,4337111,3371574,,1,,,202620,434179,1916246,2420865,ITF,ITF6
3,2023-05-04T17:00:00,ITA,15,Campania,40.839566,14.25085,174,8,182,19737,19919,0,347,2431492,11889,,,2463300,20827201,5408549,,0,,,955713,1507587,9602412,11224789,ITF,ITF3
4,2023-05-04T17:00:00,ITA,8,Emilia-Romagna,44.494367,11.341721,506,18,524,3181,3705,-57,305,2128147,19436,,,2151288,19545251,2982360,,3,,,1092316,1058972,10726333,8818918,ITH,ITH5


Create a new dataframe with only the column names required:  
  
- date
- state
- region_code
- lat
- lon
- total_cases
- nuts_1_code
- nuts_2_code

In [6]:
# Drop the columns that are not needed
regions_df2 = regions_df.drop(columns=[
    'symptons_hospitalised', 'in_intensive_care',
    'hospitalised', 'home_isolation', 'positive', 'variance',
    'new_cases', 'discharged', 'deaths', 'suspected', 'screened', 'swabs',
    'tested_cases', 'notes', 'intensive_care_entrances', 'test_notes',
    'cases_notes', 'molecular_positive', 'antigen_positive',
    'molecular_swabs', 'antigen_swabs'
    ])
regions_df2.head()

Unnamed: 0,date,state,reg_code,reg_name,lat,long,total_cases,nuts_1_code,nuts_2_code
0,2023-05-04T17:00:00,ITA,13,Abruzzo,42.351222,13.398438,658123,ITF,ITF1
1,2023-05-04T17:00:00,ITA,17,Basilicata,40.639471,15.805148,200484,ITF,ITF5
2,2023-05-04T17:00:00,ITA,18,Calabria,38.905976,16.594402,636799,ITF,ITF6
3,2023-05-04T17:00:00,ITA,15,Campania,40.839566,14.25085,2463300,ITF,ITF3
4,2023-05-04T17:00:00,ITA,8,Emilia-Romagna,44.494367,11.341721,2151288,ITH,ITH5


Lets explore the Regions data.  
  
Using `.info()` will help us identify the data types, size of the data, and any `Null` values.

In [7]:
regions_df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         21 non-null     object 
 1   state        21 non-null     object 
 2   reg_code     21 non-null     int64  
 3   reg_name     21 non-null     object 
 4   lat          21 non-null     float64
 5   long         21 non-null     float64
 6   total_cases  21 non-null     int64  
 7   nuts_1_code  21 non-null     object 
 8   nuts_2_code  21 non-null     object 
dtypes: float64(2), int64(2), object(5)
memory usage: 1.6+ KB


Using `.describe()` will help us understand the values in the data.

In [8]:
regions_df2.describe()

Unnamed: 0,reg_code,lat,long,total_cases
count,21.0,21.0,21.0,21.0
mean,11.857143,43.046293,12.225955,1229010.0
std,6.42873,2.550241,2.724611,1091577.0
min,1.0,38.115697,7.320149,50802.0
25%,7.0,41.125596,11.121231,442756.0
50%,12.0,43.61676,12.388247,666306.0
75%,17.0,45.434905,13.768136,1825465.0
max,22.0,46.499335,16.867367,4154840.0


### Conclusion

The data included in the `json` files is the **sum** of Covid-19 cases across both 2019 and 2020.  
  
For our analysis we need more granular data.