In [46]:
import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt

In [9]:
def load_file(path):
    file = pd.read_csv(path)
    return file

In [14]:
def getDictList(path):
    reader = csv.DictReader(open(path))
    reader = [dict(line) for line in reader]
    return reader

In [42]:
#testing efficiencies (will use pd since efficiency difference does not seem to be significant)
start_time = time.time()
confirmed_US = load_file("./csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
apr1 = load_file("./csse_covid_19_data/csse_covid_19_daily_reports/04-01-2020.csv")
print(f"DictReader took {time.time()-start_time} seconds")
start_time = time.time()
confirmed_US = load_file("./csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
apr1 = load_file("./csse_covid_19_data/csse_covid_19_daily_reports/04-01-2020.csv")
print(f"pd took {time.time()-start_time} seconds")

DictReader took 0.03635597229003906 seconds
pd took 0.0299069881439209 seconds


In [28]:
def printData():
    confirmed = deaths = recovered = 0
    usC = usD = usR = 0
    wdC = wdD = wdR = 0
    for i in range(len(apr1)):
        if apr1.loc[i, 'Province_State'] == 'Georgia':
            confirmed += apr1.loc[i, 'Confirmed']
            deaths += apr1.loc[i, 'Deaths']
            recovered += apr1.loc[i, 'Recovered']
        if apr1.loc[i, 'Country_Region'] == 'US':
            usC += apr1.loc[i, 'Confirmed']
            usD += apr1.loc[i, 'Deaths']
            usR += apr1.loc[i, 'Recovered']
        wdC += apr1.loc[i, 'Confirmed']
        wdD += apr1.loc[i, 'Deaths']
        wdR += apr1.loc[i, 'Recovered'] 
    dr = deaths/confirmed*100
    udr = usD/usC*100
    wdr = wdD/wdC*100
    print(f'GA confirmed: {confirmed}, deaths: {deaths}, recovered: {recovered}, mr: {dr}')
    print(f'US confirmed: {usC}, deaths: {usD}, recovered: {usR}, mr: {udr}')
    print(f'World confirmed: {wdC}, deaths: {wdD}, recovered: {wdR}, mr: {wdr}')

In [29]:
printData()

GA confirmed: 4638, deaths: 139, recovered: 0, mr: 2.9969814575247953
US confirmed: 213372, deaths: 4757, recovered: 8474, mr: 2.229439664060889
World confirmed: 932605, deaths: 46809, recovered: 193177, mr: 5.019166742618793


In [64]:
#using try except is more expensive
def getCountryStats1():
    countries = {}
    for i in range(len(apr1)):
        confirmed = apr1.loc[i, 'Confirmed']
        deaths = apr1.loc[i, 'Deaths']
        country = apr1.loc[i, 'Country_Region']
        try:
            countries[country]['confirmed'] += confirmed
            countries[country]['deaths'] += deaths
        except KeyError:
            countries[country] = {'confirmed': confirmed, 'deaths': deaths}
    return countries

def getCountryStats2():
    countries = {}
    for i in range(len(apr1)):
        confirmed = apr1.loc[i, 'Confirmed']
        deaths = apr1.loc[i, 'Deaths']
        country = apr1.loc[i, 'Country_Region']
        mortality = 0
        if country in countries:
            countries[country]['confirmed'] += confirmed
            countries[country]['deaths'] += deaths
            countries[country]['mortality rate'] = 0
        else:
            countries[country] = {'confirmed': confirmed, 'deaths': deaths, 'mortality rate': mortality}
    for country in countries:
        c = countries[country]['confirmed']
        d = countries[country]['deaths']
        countries[country]['mortality rate'] = d/c*100
    return countries

In [65]:
start = time.time()
getCountryStats1()
print(f'time elapsed: {time.time()-start}')
start = time.time()
getCountryStats2()
print(f'time elapsed: {time.time()-start}')

time elapsed: 0.06132102012634277
time elapsed: 0.05044889450073242


In [66]:
print(getCountryStats2())

{'US': {'confirmed': 213372, 'deaths': 4757, 'mortality rate': 2.229439664060889}, 'Canada': {'confirmed': 9560, 'deaths': 109, 'mortality rate': 1.1401673640167365}, 'United Kingdom': {'confirmed': 29865, 'deaths': 2357, 'mortality rate': 7.892181483341703}, 'China': {'confirmed': 82361, 'deaths': 3316, 'mortality rate': 4.026177438350675}, 'Netherlands': {'confirmed': 13696, 'deaths': 1175, 'mortality rate': 8.579147196261683}, 'Australia': {'confirmed': 4862, 'deaths': 20, 'mortality rate': 0.4113533525298231}, 'Denmark': {'confirmed': 3290, 'deaths': 104, 'mortality rate': 3.1610942249240126}, 'France': {'confirmed': 57749, 'deaths': 4043, 'mortality rate': 7.000987030078443}, 'Afghanistan': {'confirmed': 237, 'deaths': 4, 'mortality rate': 1.6877637130801686}, 'Albania': {'confirmed': 259, 'deaths': 15, 'mortality rate': 5.7915057915057915}, 'Algeria': {'confirmed': 847, 'deaths': 58, 'mortality rate': 6.8476977567886665}, 'Andorra': {'confirmed': 390, 'deaths': 14, 'mortality rat