In [2]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# direct link to John Hopkins timeset data 
# dataset has CHANGED location as of 3/23/20
data = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
covid = pd.read_csv(data, sep=",")

# preview our data
covid.head(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,0,0,0,0,1,1,1,2,4,4
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,2,2,2,2,2,4,5,5,6,8
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,7,9,11,15,17,17,19,21,25,26
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,0,0,0,0,1,1,1,1,3,3
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
today_date = datetime.now()
yesterday = today_date - timedelta(1)
date = yesterday
date = date.strftime('%m/%d/%y').lstrip("0").replace(" 0", " ")
date

'4/03/20'

In [4]:
country_locations = covid[['Country/Region', 'Lat', 'Long']]
country_locations

Unnamed: 0,Country/Region,Lat,Long
0,Afghanistan,33.000000,65.000000
1,Albania,41.153300,20.168300
2,Algeria,28.033900,1.659600
3,Andorra,42.506300,1.521800
4,Angola,-11.202700,17.873900
...,...,...,...
244,Saint Kitts and Nevis,17.357822,-62.782998
245,Canada,64.825500,-124.845700
246,Canada,64.282300,-135.000000
247,Kosovo,42.602636,20.902977


In [5]:
current_totals = covid[['Country/Region', date]]
current_totals

Unnamed: 0,Country/Region,3/27/20
0,Afghanistan,4
1,Albania,8
2,Algeria,26
3,Andorra,3
4,Angola,0
...,...,...
244,Saint Kitts and Nevis,0
245,Canada,0
246,Canada,0
247,Kosovo,1


In [17]:
countries_set = set(current_totals['Country/Region'])
country_totals = {'country': list(countries_set), 'deaths': []} 
for country in countries_set:
    df = current_totals[current_totals['Country/Region'] == country]
    country_totals['deaths'].append(df[date].sum())
country_totals = pd.DataFrame(country_totals)
country_totals = country_totals.set_index('country')
country_totals

Unnamed: 0_level_0,deaths
country,Unnamed: 1_level_1
Ireland,22
Estonia,1
"Korea, South",139
Timor-Leste,0
South Africa,1
...,...
Senegal,0
Sri Lanka,0
West Bank and Gaza,1
Iran,2378


## Merge Datasets

In [18]:
populations = pd.read_csv('data/TotalPopulationBySex.csv')
populations = populations[populations["Time"] == 2020]
populations.head()

Unnamed: 0,LocID,Location,VarID,Variant,Time,MidPeriod,PopMale,PopFemale,PopTotal,PopDensity
70,4,Afghanistan,2,Medium,2020,2020.5,19976.265,18952.076,38928.341,59.627
151,4,Afghanistan,3,High,2020,2020.5,19976.265,18952.076,38928.341,59.627
232,4,Afghanistan,4,Low,2020,2020.5,19976.265,18952.076,38928.341,59.627
313,4,Afghanistan,5,Constant fertility,2020,2020.5,19976.265,18952.076,38928.341,59.627
394,4,Afghanistan,6,Instant replacement,2020,2020.5,19976.265,18952.076,38928.341,59.627


In [19]:
countries_a = list(country_totals.index)
countries_b = populations['Location'].unique()

shared = [country_name for country_name in countries_a if country_name in countries_b]
not_shared_a = [country_name for country_name in countries_a if country_name not in countries_b]
not_shared_b = [country_name for country_name in countries_b if country_name not in countries_a]

In [20]:
not_shared_a.sort()
not_shared_a

['Bolivia',
 'Brunei',
 'Burma',
 'Congo (Brazzaville)',
 'Congo (Kinshasa)',
 "Cote d'Ivoire",
 'Diamond Princess',
 'Iran',
 'Korea, South',
 'Kosovo',
 'Laos',
 'Moldova',
 'Russia',
 'Syria',
 'Taiwan*',
 'Tanzania',
 'US',
 'Venezuela',
 'Vietnam',
 'West Bank and Gaza']

In [21]:
not_shared_b.sort()
not_shared_b

['Africa',
 'African Group',
 'African Union',
 'African Union: Central Africa',
 'African Union: Eastern Africa',
 'African Union: Northern Africa',
 'African Union: Southern Africa',
 'African Union: Western Africa',
 'African, Caribbean and Pacific (ACP) Group of States',
 'American Samoa',
 'Andean Community',
 'Anguilla',
 'Aruba',
 'Asia',
 'Asia-Pacific Economic Cooperation (APEC)',
 'Asia-Pacific Group',
 'Association of Southeast Asian Nations (ASEAN)',
 'Australia/New Zealand',
 'BRIC',
 'BRICS',
 'Belt-Road Initiative (BRI)',
 'Belt-Road Initiative: Africa',
 'Belt-Road Initiative: Asia',
 'Belt-Road Initiative: Europe',
 'Belt-Road Initiative: Latin America and the Caribbean',
 'Belt-Road Initiative: Pacific',
 'Bermuda',
 'Black Sea Economic Cooperation (BSEC)',
 'Bolivarian Alliance for the Americas (ALBA)',
 'Bolivia (Plurinational State of)',
 'Bonaire, Sint Eustatius and Saba',
 'Botswana',
 'British Virgin Islands',
 'Brunei Darussalam',
 'Burundi',
 'Caribbean',
 'Ca

In [22]:
spelling_changes = {
    'Bahamas': 'Bahamas, The',
    'Bolivia (Plurinational State of)': 'Bolivia',
    'Brunei Darussalam': 'Brunei',
    'Côte d\'Ivoire': 'Cote d\'Ivoire',
    'Gambia': 'Gambia, The',
    'Iran (Islamic Republic of)': 'Iran',
    'Republic of Korea': 'Korea, South',
    'Russian Federation': 'Russia',
    'Syrian Arab Republic': 'Syria',
    'China, Taiwan Province of China': 'Taiwan*',
    'United Republic of Tanzania': 'Tanzania',
    'United States of America': 'US',
    'Venezuela (Bolivarian Republic of)': 'Venezuela',
    'Viet Nam': 'Vietnam'
}

In [23]:
current_populations = {'country': [], 'population': []}
for country_name in populations['Location'].unique():
    if country_name in spelling_changes.keys():
        key_name = spelling_changes[country_name]
    elif country_name not in shared:
        continue
    else:
        key_name = country_name
    current_populations['country'].append(key_name)
    current_populations['population'].append(populations[populations['Location'] == country_name]['PopTotal'].iloc[0]) 
current_populations = pd.DataFrame(current_populations)
current_populations = current_populations.set_index('country')
current_populations

Unnamed: 0_level_0,population
country,Unnamed: 1_level_1
Afghanistan,38928.341
Albania,2877.800
Algeria,43851.043
Andorra,77.265
Angola,32866.268
...,...
Uzbekistan,33469.199
Venezuela,28435.943
Vietnam,97338.583
Zambia,18383.956


In [24]:
country_totals = country_totals.join(current_populations)
country_totals

Unnamed: 0_level_0,deaths,population
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Ireland,22,4937.796
Estonia,1,1326.539
"Korea, South",139,51269.183
Timor-Leste,0,1318.442
South Africa,1,59308.690
...,...,...
Senegal,0,16743.930
Sri Lanka,0,21413.250
West Bank and Gaza,1,
Iran,2378,83992.953


In [25]:
country_totals['death rate'] = country_totals['deaths'] / country_totals['population']
country_totals = country_totals.sort_values(by=['death rate'], ascending=False).reset_index()

In [26]:
country_totals

Unnamed: 0,country,deaths,population,death rate
0,San Marino,21,33.938,0.618775
1,Italy,9134,60461.828,0.151071
2,Spain,5138,46754.783,0.109893
3,Andorra,3,77.265,0.038827
4,Netherlands,547,17134.873,0.031923
...,...,...,...,...
171,Laos,0,,
172,Congo (Kinshasa),3,,
173,Bahamas,0,,
174,Moldova,2,,


In [28]:
missing = country_totals[(country_totals['population'].isna())]
missing

Unnamed: 0,country,deaths,population,death rate
166,Gambia,1,,
167,Diamond Princess,10,,
168,Kosovo,1,,
169,Congo (Brazzaville),0,,
170,Burma,0,,
171,Laos,0,,
172,Congo (Kinshasa),3,,
173,Bahamas,0,,
174,Moldova,2,,
175,West Bank and Gaza,1,,


In [29]:
# read in EIU raw_data after running script

EIU = pd.read_csv("test.csv", sep=",")
EIU.head()

Unnamed: 0,Country name,Overall Score,Rank,Electoral process and pluralism,Functioning of government,Political participation,Political culture,Civil liberties
0,Norway,9.87,1,10.0,9.64,10.0,10.0,9.71
1,Iceland,9.58,2,10.0,9.29,8.89,10.0,9.71
2,Sweden,9.39,3,9.58,9.64,8.33,10.0,9.41
3,New Zealand,9.26,4,10.0,9.29,8.89,8.13,10.0
4,Finland,9.25,5,10.0,8.93,8.89,8.75,9.71


In [52]:
EIU['Country name'].unique()

array(['Norway', 'Iceland', 'Sweden', 'New Zealand', 'Finland', 'Ireland',
       'Denmark', 'Canada', 'Australia', 'Switzerland', 'Netherlands',
       'Luxembourg', 'Germany', 'United Kingdom', 'Uruguay', 'Austria',
       'Spain', 'Mauritius', 'Costa Rica', 'France', 'Chile', 'Portugal',
       'Korea, South', 'Japan', 'US', 'Malta', 'Estonia', 'Israel',
       'Botswana', 'Cabo Verde', 'Taiwan*', 'Czechia', 'Belgium',
       'Cyprus', 'Italy', 'Slovenia', 'Lithuania', 'Latvia', 'Greece',
       'South Africa', 'Timor-Leste', 'Slovakia', 'Malaysia',
       'Trinidad and Tobago', 'Colombia', 'Panama', 'Bulgaria',
       'Argentina', 'Suriname', 'Jamaica', 'India', 'Brazil', 'Tunisia',
       'Philippines', 'Ghana', 'Hungary', 'Poland', 'Peru', 'Croatia',
       'Dominican Republic', 'Lesotho', 'Mongolia', 'Romania',
       'Indonesia', 'Namibia', 'Serbia', 'Ecuador', 'Thailand',
       'Sri Lanka', 'Paraguay', 'El Salvador', 'Guyana', 'Mexico',
       'Papua New Guinea', 'Hong Kong',

In [53]:
EIU.sort_values(by="Overall Score", ascending=True).head(15)

Unnamed: 0,Country name,Overall Score,Rank,Electoral process and pluralism,Functioning of government,Political participation,Political culture,Civil liberties
166,North Korea,1.08,167,0.0,2.5,1.67,1.25,0.0
165,Congo (Kinshasa),1.13,166,0.0,0.0,1.67,3.13,0.88
164,Central African Republic,1.32,165,1.25,0.0,1.11,1.88,2.35
163,Syria,1.43,164,0.0,0.0,2.78,4.38,0.0
162,Chad,1.61,163,0.0,0.0,1.67,3.75,2.65
161,Turkmenistan,1.72,162,0.0,0.79,2.22,5.0,0.59
160,Equatorial Guinea,1.92,161,0.0,0.43,3.33,4.38,1.47
159,Tajikistan,1.93,159,0.08,0.79,1.67,6.25,0.88
158,Saudi Arabia,1.93,159,0.0,2.86,2.22,3.13,1.47
157,Yemen,1.95,158,0.0,0.0,3.89,5.0,0.88


In [None]:
replace_dict = {'Bosnia and Hercegovina':'Bosnia and Herzegovina', 
                'Democratic Republic of Congo':'Congo (Kinshasa)', 
                'Czech Republic':'Czechia',
                'South Korea':'Korea, South', 
                'Taiwan':'Taiwan*', 'United States of America':'US'}

In [67]:
EIU.replace({"Country name": replace_dict},inplace=True)

In [68]:
left = country_totals
right = EIU
politics_deaths = pd.merge(left, right, 
              how='outer', on=None, 
              left_on="country", right_on="Country name")
politics_deaths

Unnamed: 0,country,deaths,population,death rate,Country name,Overall Score,Rank,Electoral process and pluralism,Functioning of government,Political participation,Political culture,Civil liberties
0,San Marino,21.0,33.938,0.618775,,,,,,,,
1,Italy,9134.0,60461.828,0.151071,Italy,7.52,35.0,9.58,6.07,7.78,6.25,7.94
2,Spain,5138.0,46754.783,0.109893,Spain,8.29,16.0,9.58,7.14,7.78,8.13,8.82
3,Andorra,3.0,77.265,0.038827,,,,,,,,
4,Netherlands,547.0,17134.873,0.031923,Netherlands,9.01,11.0,9.58,9.29,8.33,8.75,9.12
...,...,...,...,...,...,...,...,...,...,...,...,...
186,,,,,Burundi,2.15,154.0,0.00,0.07,3.33,5.00,2.35
187,,,,,Yemen,1.95,158.0,0.00,0.00,3.89,5.00,0.88
188,,,,,Tajikistan,1.93,159.0,0.08,0.79,1.67,6.25,0.88
189,,,,,Turkmenistan,1.72,162.0,0.00,0.79,2.22,5.00,0.59


In [69]:
politics_deaths.isna().sum()

country                            15
deaths                             15
population                         25
death rate                         25
Country name                       24
Overall Score                      24
Rank                               24
Electoral process and pluralism    24
Functioning of government          24
Political participation            24
Political culture                  24
Civil liberties                    24
dtype: int64

In [72]:
no_replaceA = politics_deaths[politics_deaths['country'].isna()]
no_replaceA

Unnamed: 0,country,deaths,population,death rate,Country name,Overall Score,Rank,Electoral process and pluralism,Functioning of government,Political participation,Political culture,Civil liberties
176,,,,,Botswana,7.81,29.0,9.17,7.14,6.11,7.5,9.12
177,,,,,Lesotho,6.54,60.0,9.17,4.5,6.67,5.63,6.76
178,,,,,Hong Kong,6.02,75.0,3.58,4.36,6.11,7.5,8.53
179,,,,,Malawi,5.5,87.0,6.08,4.29,5.0,6.25,5.88
180,,,,,Sierra Leone,4.86,102.0,6.58,2.86,3.33,6.25,5.29
181,,,,,Côte d’Ivoire,4.05,111.0,4.33,2.86,3.33,5.63,4.12
182,,,,,Palestine,3.89,117.0,3.33,0.14,7.78,4.38,3.82
183,,,,,Myanmar,3.55,122.0,3.08,3.93,2.78,5.63,2.35
184,,,,,Comoros,3.15,131.0,2.08,2.21,3.89,3.75,3.82
185,,,,,eSwatini,3.14,132.0,0.92,2.86,2.78,5.63,3.53


In [73]:
no_replaceB = politics_deaths[politics_deaths['Country name'].isna()]
no_replaceB

Unnamed: 0,country,deaths,population,death rate,Country name,Overall Score,Rank,Electoral process and pluralism,Functioning of government,Political participation,Political culture,Civil liberties
0,San Marino,21.0,33.938,0.618775,,,,,,,,
3,Andorra,3.0,77.265,0.038827,,,,,,,,
108,Somalia,0.0,15893.219,0.0,,,,,,,,
114,Holy See,0.0,0.809,0.0,,,,,,,,
115,Grenada,0.0,112.519,0.0,,,,,,,,
116,Liechtenstein,0.0,38.137,0.0,,,,,,,,
118,Barbados,0.0,287.371,0.0,,,,,,,,
126,Eswatini,0.0,1160.164,0.0,,,,,,,,
128,Brunei,0.0,437.483,0.0,,,,,,,,
133,Saint Lucia,0.0,183.629,0.0,,,,,,,,
