In [None]:
import pandas as pd
import datetime as dt
import pycurl
from io import BytesIO

### Load Data ###

In [None]:
with open('data/source_data.csv', 'wb') as f:
    curl = pycurl.Curl()
    curl.setopt(curl.URL, "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
    curl.setopt(curl.WRITEDATA, f)
    curl.perform()
    curl.close()

covid_data = pd.read_csv("data/source_data.csv").rename(columns={'Country/Region': 'Country'})
covid_data = covid_data.drop(columns=['Province/State', 'Lat', 'Long'])
covid_data.head()

### Scandinavia ###

In [None]:
covid_data_scandinavia = covid_data.query('Country == "Denmark" | Country == "Sweden" | Country == "Norway" | Country == "Finland"').drop([92, 93])
covid_data_scandinavia.to_csv('data/covid_data_scandinavia.csv')
covid_data_scandinavia.head()

### Reshape Data

In [None]:
# Melt dataframe and set index
def reshape_data(raw_df):
    cleaned_df = raw_df.melt(id_vars=['Country'], value_name='Cases', var_name='Date')
    cleaned_df = cleaned_df.set_index(['Country', 'Date'])
    return cleaned_df

# Clean dataset(s)
covid_data_scandinavia_reshaped = reshape_data(covid_data_scandinavia)
covid_data_scandinavia_reshaped.to_csv('data/covid_data_scandinavia_reshaped.csv')
covid_data_scandinavia_reshaped.head()

### Calculate and Merge Data

In [None]:
def country_data(cleaned_df, old_name, new_name):
    country_df = cleaned_df.groupby(['Country', 'Date'])['Cases'].sum().reset_index()
    country_df = country_df.set_index(['Country', 'Date'])
    country_df.index = country_df.index.set_levels([country_df.index.levels[0], pd.to_datetime(country_df.index.levels[1])])
    country_df = country_df.sort_values(['Country', 'Date'], ascending=True)
    country_df = country_df.rename(columns={old_name:new_name})
    return country_df
    
cases_country = country_data(covid_data_scandinavia_reshaped, 'Cases', 'Total Confirmed Cases')    

def daily_data(country_df, old_name, new_name):
    country_df_daily = country_df.groupby(level=0).diff().fillna(0)
    country_df_daily = country_df_daily.rename(columns={old_name:new_name})
    return country_df_daily

new_cases_country = daily_data(cases_country, 'Total Confirmed Cases', 'Daily New Cases')

merged_country_data = pd.merge(cases_country, new_cases_country, how='left', left_index=True, right_index=True)
merged_country_data

merged_country_data.to_csv('data/merged_data_scandinavia.csv')

### Long Form ###
Fits best with Altair charts.

In [None]:
dates = covid_data_scandinavia.columns.values.tolist()
dates.remove(dates[0])

long_form = covid_data_scandinavia.melt(id_vars=['Country'], value_vars=dates, var_name='Date', value_name='Cases')
long_form.to_csv('long_form_scandinavia.csv', index=False)

In [None]:
dates = covid_data_scandinavia.columns.values.tolist()
dates.remove(dates[0])
denmark = covid_data_scandinavia.loc[94]
denmark = denmark.drop(denmark.index[0])
finland = covid_data_scandinavia.loc[106]
finland = finland.drop(finland.index[0])
norway = covid_data_scandinavia.loc[175]
norway = norway.drop(norway.index[0])
sweden = covid_data_scandinavia.loc[205]
sweden = sweden.drop(sweden.index[0])
data = {'Date': dates,
        'Denmark': denmark,
        'Finland': finland,
        'Norway': norway,
        'Sweden': sweden}

covid_data_scandinavia_reshaped = pd.DataFrame(data, columns=['Date', 'Denmark', 'Finland', 'Norway', 'Sweden']).set_index('Date')

In [None]:
covid_data_scandinavia_reshaped.to_csv('data/covid_data_scandinavia.csv')

### Denmark ###

In [None]:
covid_data_denmark = covid_data.query('Country == "Denmark"').drop([92, 93])
covid_data_denmark

In [None]:
dates = covid_data_denmark.columns.values.tolist()
dates.remove(dates[0])
denmark = covid_data_denmark.loc[94]
denmark = denmark.drop(denmark.index[0])
data = {'Date': dates,
        'Denmark': denmark}

covid_data_denmark_reshaped = pd.DataFrame(data, columns=['Date', 'Denmark']).set_index('Date')
covid_data_denmark_reshaped.to_csv('denmark.csv')