In [16]:
import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt

In [17]:
def load_file(path):
    file = pd.read_csv(path)
    return file

In [18]:
def getDictList(path):
    reader = csv.DictReader(open(path))
    reader = [dict(line) for line in reader]
    return reader

In [19]:
#testing efficiencies (will use pd since efficiency difference does not seem to be significant)
start_time = time.time()
confirmed_US = load_file("./csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
apr1 = load_file("./csse_covid_19_data/csse_covid_19_daily_reports/04-01-2020.csv")
print(f"DictReader took {time.time()-start_time} seconds")
start_time = time.time()
confirmed_US = load_file("./csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
apr1 = load_file("./csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")
print(f"pd took {time.time()-start_time} seconds")

DictReader took 0.034603118896484375 seconds
pd took 0.03330397605895996 seconds


In [20]:
def printData():
    confirmed = deaths = recovered = 0
    usC = usD = usR = 0
    wdC = wdD = wdR = 0
    for i in range(len(apr1)):
        if apr1.loc[i, 'Province_State'] == 'Georgia':
            confirmed += apr1.loc[i, 'Confirmed']
            deaths += apr1.loc[i, 'Deaths']
            recovered += apr1.loc[i, 'Recovered']
        if apr1.loc[i, 'Country_Region'] == 'US':
            usC += apr1.loc[i, 'Confirmed']
            usD += apr1.loc[i, 'Deaths']
            usR += apr1.loc[i, 'Recovered']
        wdC += apr1.loc[i, 'Confirmed']
        wdD += apr1.loc[i, 'Deaths']
        wdR += apr1.loc[i, 'Recovered'] 
    dr = deaths/confirmed*100
    udr = usD/usC*100
    wdr = wdD/wdC*100
    print(f'GA confirmed: {confirmed}, deaths: {deaths}, recovered: {recovered}, mr: {dr}')
    print(f'US confirmed: {usC}, deaths: {usD}, recovered: {usR}, mr: {udr}')
    print(f'World confirmed: {wdC}, deaths: {wdD}, recovered: {wdR}, mr: {wdr}')

In [21]:
printData()

GA confirmed: 5348, deaths: 163, recovered: 0, mr: 3.0478683620044875
US confirmed: 243453, deaths: 5926, recovered: 9001, mr: 2.4341453997280786
World confirmed: 1013157, deaths: 52983, recovered: 210263, mr: 5.2294955273467


In [22]:
#using try except is more expensive
def getCountryStats1():
    countries = {}
    for i in range(len(apr1)):
        confirmed = apr1.loc[i, 'Confirmed']
        deaths = apr1.loc[i, 'Deaths']
        country = apr1.loc[i, 'Country_Region']
        try:
            countries[country]['confirmed'] += confirmed
            countries[country]['deaths'] += deaths
        except KeyError:
            countries[country] = {'confirmed': confirmed, 'deaths': deaths}
    return countries

def getCountryStats2():
    countries = {}
    for i in range(len(apr1)):
        confirmed = apr1.loc[i, 'Confirmed']
        deaths = apr1.loc[i, 'Deaths']
        country = apr1.loc[i, 'Country_Region']
        mortality = 0
        if country in countries:
            countries[country]['confirmed'] += confirmed
            countries[country]['deaths'] += deaths
            countries[country]['mortality rate'] = 0
        else:
            countries[country] = {'confirmed': confirmed, 'deaths': deaths, 'mortality rate': mortality}
    for country in countries:
        c = countries[country]['confirmed']
        d = countries[country]['deaths']
        countries[country]['mortality rate'] = d/c*100
    return countries

In [23]:
start = time.time()
getCountryStats1()
print(f'time elapsed: {time.time()-start}')
start = time.time()
countries = getCountryStats2()
print(f'time elapsed: {time.time()-start}')

time elapsed: 0.0661478042602539
time elapsed: 0.05374574661254883


In [24]:
print(countries)

{'US': {'confirmed': 243453, 'deaths': 5926, 'mortality rate': 2.4341453997280786}, 'Canada': {'confirmed': 11284, 'deaths': 139, 'mortality rate': 1.2318326834455866}, 'United Kingdom': {'confirmed': 34173, 'deaths': 2926, 'mortality rate': 8.562315278143563}, 'China': {'confirmed': 82432, 'deaths': 3322, 'mortality rate': 4.029988354037267}, 'Netherlands': {'confirmed': 14788, 'deaths': 1341, 'mortality rate': 9.068163375710036}, 'Australia': {'confirmed': 5116, 'deaths': 24, 'mortality rate': 0.46911649726348714}, 'Denmark': {'confirmed': 3573, 'deaths': 123, 'mortality rate': 3.4424853064651555}, 'France': {'confirmed': 59929, 'deaths': 5398, 'mortality rate': 9.007325334979727}, 'Afghanistan': {'confirmed': 273, 'deaths': 6, 'mortality rate': 2.197802197802198}, 'Albania': {'confirmed': 277, 'deaths': 16, 'mortality rate': 5.776173285198556}, 'Algeria': {'confirmed': 986, 'deaths': 86, 'mortality rate': 8.72210953346856}, 'Andorra': {'confirmed': 428, 'deaths': 15, 'mortality rate

In [25]:
country_list = [country for country in countries.keys()]
print(country_list)

['US', 'Canada', 'United Kingdom', 'China', 'Netherlands', 'Australia', 'Denmark', 'France', 'Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Central African Republic', 'Chad', 'Chile', 'Colombia', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Holy See', 'Honduras', 'Hungary', 

In [27]:
confirmed_list = [countries[country]['confirmed'] for country in countries.keys()]
death_list = [countries[country]['deaths'] for country in countries.keys()]
print(confirmed_list)
print(death_list)

[243453, 11284, 34173, 82432, 14788, 5116, 3573, 59929, 273, 277, 986, 428, 8, 9, 1133, 663, 11129, 400, 24, 643, 56, 46, 304, 15348, 3, 13, 5, 123, 533, 4, 8044, 133, 457, 288, 20, 3, 6, 110, 306, 3, 8, 3404, 1161, 22, 134, 396, 194, 1011, 233, 356, 3858, 712, 40, 12, 1380, 3163, 865, 41, 15, 22, 858, 9, 29, 7, 1518, 21, 4, 134, 84794, 204, 1544, 10, 47, 52, 9, 19, 16, 7, 219, 585, 1319, 2543, 1790, 50468, 772, 3849, 6857, 115242, 47, 2495, 299, 435, 110, 9976, 125, 342, 116, 10, 458, 494, 6, 11, 75, 649, 2487, 9, 59, 3, 3116, 19, 36, 196, 6, 169, 1378, 505, 60, 14, 144, 708, 10, 14, 6, 797, 5, 98, 184, 384, 5147, 231, 2421, 1317, 1, 77, 1414, 2633, 2946, 9034, 949, 2738, 3548, 84, 9, 13, 2, 245, 1885, 195, 1171, 10, 2, 1049, 426, 897, 5, 1462, 112065, 151, 8, 10, 5568, 18827, 16, 339, 20, 1875, 1, 39, 94, 455, 18135, 45, 897, 1024, 350, 205, 146, 233, 161, 39, 9]
[5926, 139, 2926, 3322, 1341, 24, 123, 5398, 6, 16, 86, 15, 2, 0, 36, 7, 158, 5, 1, 4, 6, 0, 4, 1011, 0, 0, 0, 8, 16, 1, 3

In [29]:
lethality_list = [death/confirmed*100 for confirmed, death in zip(confirmed_list, death_list)]
print(lethality_list)

[2.4341453997280786, 1.2318326834455866, 8.562315278143563, 4.029988354037267, 9.068163375710036, 0.46911649726348714, 3.4424853064651555, 9.007325334979727, 2.197802197802198, 5.776173285198556, 8.72210953346856, 3.5046728971962615, 25.0, 0.0, 3.1774051191526915, 1.0558069381598794, 1.4197142600413335, 1.25, 4.166666666666666, 0.6220839813374806, 10.714285714285714, 0.0, 1.3157894736842104, 6.587177482408131, 0.0, 0.0, 0.0, 6.504065040650407, 3.0018761726078798, 25.0, 4.027846842366982, 0.7518796992481203, 2.1881838074398248, 5.555555555555555, 5.0, 0.0, 16.666666666666664, 0.0, 2.287581699346405, 0.0, 0.0, 0.5287896592244419, 1.636520241171404, 9.090909090909092, 9.701492537313433, 0.5050505050505051, 0.5154639175257731, 0.6923837784371909, 2.575107296137339, 2.8089887640449436, 1.1404872991187145, 1.544943820224719, 0.0, 0.0, 4.3478260869565215, 3.7938665823585205, 6.705202312138728, 4.878048780487805, 0.0, 0.0, 1.282051282051282, 0.0, 0.0, 0.0, 1.251646903820817, 4.761904761904762,