# Overview
This notebook is associated with Trello card https://trello.com/c/y4Nv52JN, specifically the Spain data. 

## Discovery
Spain data was initially found on https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Spain. Following links from this page, led to the following:
- https://www.rtve.es/noticias/20200329/mapa-del-coronavirus-espana/2004681.shtml
- https://www.mscbs.gob.es/profesionales/saludPublica/ccayes/alertasActual/nCov-China/situacionActual.htm
- https://covid19.isciii.es/

## Read Data

In [112]:
import pandas as pd
from datetime import datetime

In [113]:
url = "https://covid19.isciii.es/resources/serie_historica_acumulados.csv"
df = pd.read_csv(url, encoding="latin_1")
df.shape


(761, 7)

In [114]:
df[df['CCAA Codigo ISO']=='AN'].head(10)

Unnamed: 0,CCAA Codigo ISO,Fecha,Casos,Hospitalizados,UCI,Fallecidos,Recuperados
0,AN,20/02/2020,,,,,
19,AN,21/02/2020,,,,,
38,AN,22/02/2020,,,,,
57,AN,23/02/2020,,,,,
76,AN,24/02/2020,,,,,
95,AN,25/02/2020,,,,,
114,AN,26/02/2020,1.0,,,,
133,AN,27/02/2020,6.0,,,,
152,AN,28/02/2020,8.0,,,,
171,AN,29/02/2020,12.0,,,,


In [115]:
df.rename(columns={'CCAA Codigo ISO': 'ccaa-iso-code', 'Fecha': 'date', 'Casos ': 'cases', 'Hospitalizados': 'hospitalized', 'Fallecidos': 'deceased', 'Recuperados': 'recovered'}, inplace=True)
print(df.head())

  ccaa-iso-code        date  cases  hospitalized  UCI  deceased  recovered
0            AN  20/02/2020    NaN           NaN  NaN       NaN        NaN
1            AR  20/02/2020    NaN           NaN  NaN       NaN        NaN
2            AS  20/02/2020    NaN           NaN  NaN       NaN        NaN
3            IB  20/02/2020    1.0           NaN  NaN       NaN        NaN
4            CN  20/02/2020    1.0           NaN  NaN       NaN        NaN


In [116]:
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,ccaa-iso-code,date,cases,hospitalized,UCI,deceased,recovered
0,AN,2020-02-20,,,,,
1,AR,2020-02-20,,,,,
2,AS,2020-02-20,,,,,
3,IB,2020-02-20,1.0,,,,
4,CN,2020-02-20,1.0,,,,


In [132]:
df.fillna(value=0, inplace=True)
df.head()

Unnamed: 0,ccaa-iso-code,date,cases,hospitalized,UCI,deceased,recovered
0,AN,2020-02-20 00:00:00,0.0,0.0,0.0,0.0,0.0
1,AR,2020-02-20 00:00:00,0.0,0.0,0.0,0.0,0.0
2,AS,2020-02-20 00:00:00,0.0,0.0,0.0,0.0,0.0
3,IB,2020-02-20 00:00:00,1.0,0.0,0.0,0.0,0.0
4,CN,2020-02-20 00:00:00,1.0,0.0,0.0,0.0,0.0


In [135]:
c = df.copy()
c.drop(['date', 'hospitalized', 'UCI', 'deceased', 'recovered'], axis=1, inplace=True)
c[c['ccaa-iso-code']=='AN'].head(30)

d = c.groupby(level=0).diff().fillna(c).reset_index()
d[d['ccaa-iso-code']=='AN'].head(30)
# # d[d['ccaa-iso-code']=='AN'].head(30)
# # # df_new.head()

Unnamed: 0,index,ccaa-iso-code,cases
0,0,AN,0.0
19,19,AN,0.0
38,38,AN,0.0
57,57,AN,0.0
76,76,AN,0.0
95,95,AN,0.0
114,114,AN,1.0
133,133,AN,6.0
152,152,AN,8.0
171,171,AN,12.0


In [19]:
df.head(30)

Unnamed: 0,ccaa-iso-code,date,cases,hospitalized,UCI,deceased,recovered
0,AN,2020-02-20,,,,,
1,AR,2020-02-20,,,,,
2,AS,2020-02-20,,,,,
3,IB,2020-02-20,1.0,,,,
4,CN,2020-02-20,1.0,,,,
5,CB,2020-02-20,,,,,
6,CM,2020-02-20,,,,,
7,CL,2020-02-20,0.0,,,,
8,CT,2020-02-20,,,,,
9,CE,2020-02-20,,,,,
