In [65]:
import pandas as pd
import numpy as np
from functools import reduce

In [56]:
data_path = 'novel_corona_virus_2019_dataset/time_series_covid_19_'
csv_paths_alias = ('confirmed', 'deaths', 'recovered')
pivot_keys = ('Province/State', 'Country/Region', 'Lat', 'Long')
table_keys = ('Province/State', 'Country/Region', 'Lat', 'Long', 'Date')

In [58]:
def pivot_table(dataset, variable_new_name, value_new_name):
    pivoted_dataset = pd.melt(dataset, pivot_keys)
    pivoted_dataset = pivoted_dataset.rename(columns={'variable': variable_new_name, 
                                                      'value': value_new_name})
    return pivoted_dataset

In [62]:
datasets = []

for alias in csv_paths_alias:
    dataset = pd.read_csv(data_path + alias + '.csv')
    pivoted = pivot_table(dataset, 'Date', 'Number of {} cases'.format(alias))
    datasets.append(pivoted)

In [63]:
def merge_tables(left, right, keys=table_keys, join='outer'):
    return pd.merge(left, right, on=keys, how=join)

In [78]:
merged = reduce(merge_tables, datasets)

In [82]:
merged.fillna(value={'Number of confirmed cases': 0, 
                     'Number of deaths cases': 0, 
                     'Number of recovered cases': 0}, inplace=True)

In [83]:
merged = merged.astype({'Number of confirmed cases': 'int', 
                        'Number of deaths cases': 'int', 
                        'Number of recovered cases': 'int'})

In [88]:
merged['Date'] = pd.to_datetime(merged.Date, format='%m/%d/%y')

In [90]:
merged.to_csv('dataset/global_covid_19_time_series.csv')