In [1]:
import csv
import pandas as pd
import numpy as np

In [2]:
#the aim of this programm is to build a CSV file with all measures of the pandemic for countries
with open("Sources/COVID-19-master/time_series_covid19_confirmed_global.csv") as f:
    confirmed_data = pd.read_csv(f,  error_bad_lines=False)

with open("Sources/COVID-19-master/time_series_covid19_deaths_global.csv") as f:
    deaths_data = pd.read_csv(f,  error_bad_lines=False)

with open("Sources/COVID-19-master/time_series_covid19_recovered_global.csv") as f:
    recovered_data = pd.read_csv(f,  error_bad_lines=False)

In [3]:
#the data is grouped by country instead of region
columns = ["Country/Region"] + list(confirmed_data.columns[4:])
confirmed_data= confirmed_data[columns].groupby('Country/Region').sum()
deaths_data = deaths_data[columns].groupby('Country/Region').sum()
recovered_data = recovered_data[columns].groupby('Country/Region').sum()

In [4]:
#this function returns the death rate
def rate_country_by_day(confirmed, deaths):
    if confirmed == 0:
        return np.nan
    else:
        return (deaths/confirmed)

#this function builds the dataframe which contains all measures 
def construction_daily_data_by_country(confirmed_data, deaths_data, recovered_data):
    measures_list = []
    days_nb = confirmed_data.values.shape[1]
    columns = ['Day_number', 'Country', 'Confirmed', 'Deaths', 'Recovered', 'Death_rate', 'Recovery_rate']
    for day in range(days_nb):
        for country in confirmed_data.T.columns:
            confirmed = confirmed_data.T[country][day]
            deaths = deaths_data.T[country][day]
            recovered = recovered_data.T[country][day]
            death_rate = rate_country_by_day(confirmed, deaths)
            recovery_rate = rate_country_by_day(confirmed, recovered)
            list_of_day_by_country = [day, country, confirmed, deaths, recovered, death_rate, recovery_rate]
            measures_list.append(list_of_day_by_country)
    return pd.DataFrame(np.array(measures_list), columns = columns) 

daily_data_by_country = construction_daily_data_by_country(confirmed_data, deaths_data, recovered_data)

In [5]:
#the Dataframe is written inside a CSV file in order to be better available for other python files 
with open("CSV_Creation/daily_data_by_country.csv", 'w', newline='') as f:
    spamwriter = csv.writer(f, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    spamwriter.writerow([i for i in daily_data_by_country])
    for i in range(daily_data_by_country.shape[0]):
        spamwriter.writerow(daily_data_by_country.values[i])