# European Centre for Disease Prevention and Control Dataset

In [None]:
import pandas as pd
import datetime
import pycountry
import re
import os
import numpy as np

In [None]:
# papermill parameters
output_folder = "../output/"

### Fetch data

In [None]:
df = pd.read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv")

### Parse date

In [None]:
df["dateRep"] = pd.to_datetime(df["dateRep"], format="%d/%m/%Y")

### Add difference

In [None]:
df['CASES_SINCE_PREV_DAY'] = df.groupby(['countriesAndTerritories','continentExp'])['cases'].diff().fillna(0).astype(int)
df['DEATHS_SINCE_PREV_DAY'] = df.groupby(['countriesAndTerritories','continentExp'])['deaths'].diff().fillna(0).astype(int)

### Drop cols

In [None]:
df = df.drop(columns=["day", "month", "year", "countryterritoryCode"])

In [None]:
int_conveyance = df["geoId"].loc["JPG11668" == df["geoId"]].index
df["geoId"].iloc[int_conveyance] = np.nan
df["popData2018"].iloc[int_conveyance] = np.nan
df["continentExp"].iloc[int_conveyance] = np.nan
df["countriesAndTerritories"].iloc[int_conveyance] = "Cases on an international conveyance Japan"

### Resolve Country/Region name

In [None]:
country_codes = df["geoId"].unique()
for code in country_codes:
    pyc = pycountry.countries.get(alpha_2=code)
    if pyc:
        df["countriesAndTerritories"].loc[code == df["geoId"]] = pyc.name

### Set Last Update Date and Last Reported Flag

In [None]:
df["LAST_UPDATE_DATE"] = datetime.datetime.utcnow()
df["LAST_REPORTED_FLAG"] = df["dateRep"].max() == df["dateRep"]

### Rename Cols

In [None]:
df = df.rename(columns={
    "dateRep": "DATE", 
    "countriesAndTerritories": "COUNTRY_REGION", 
    "geoId": "ISO3166_1", 
    "popData2018": "POPULATION",
})

In [None]:
df.sample(5)

In [None]:
df.dtypes

### Save dataframe

In [None]:
df.to_csv(output_folder + "ECDC_GLOBAL.csv", index=False, columns=[
    "COUNTRY_REGION",
    "continentExp",
    "ISO3166_1",
    "cases",
    "deaths",
    "CASES_SINCE_PREV_DAY",
    "DEATHS_SINCE_PREV_DAY",
    "POPULATION",
    "DATE",
    "LAST_UPDATE_DATE",
    "LAST_REPORTED_FLAG"
])