In [1]:
#Import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import linregress

covid_path = "Resources/COVID_19.csv"
GDP_path = "Resources/Global_GDP.csv"
cities_path = "Resources/worldcities.csv"

COVID_data = pd.read_csv(covid_path)
GDP_data = pd.read_csv(GDP_path)
cities_data = pd.read_csv(cities_path)

In [65]:
gdp_2019 = GDP_data[['Country Name', 'Country Code', '2019']]
gdp_2019 = gdp_2019.rename(columns={'Country Name':'Country'})
gdp_2019.set_index('Country')

Unnamed: 0_level_0,Country Code,2019
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Aruba,ABW,
Afghanistan,AFG,1.910135e+10
Angola,AGO,9.463542e+10
Albania,ALB,1.527808e+10
Andorra,AND,3.154058e+09
...,...,...
Kosovo,XKX,7.926108e+09
"Yemen, Rep.",YEM,
South Africa,ZAF,3.514320e+11
Zambia,ZMB,2.306472e+10


In [54]:
COVID_data = COVID_data[['dateRep', 'cases', 'deaths', 'Country', 'countryterritoryCode', 'popData2019' ]]
COVID_data = COVID_data.rename(columns={"countriesAndTerritories": "Country"})
COVID_data.set_index("Country")

covid_country_groups = COVID_data.groupby("Country")

covid_death_totals = covid_country_groups['deaths'].sum()
covid_infection_totals = covid_country_groups['cases'].sum()
covid_countries = COVID_data['Country'].unique()
country_pops = COVID_data['popData2019'].unique()

cleaned_covid = pd.DataFrame({
    "Cases" : covid_infection_totals,
    "Deaths" : covid_death_totals,
    "Population" : country_pops,
})

cleaned_covid.head()

Unnamed: 0_level_0,Cases,Deaths,Population
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,39616,1470,38041757.0
Albania,14730,407,2862427.0
Algeria,52520,1779,43053054.0
Andorra,2568,53,76177.0
Angola,5725,211,31825299.0


In [66]:
cities_data.head()
cities_data.set_index('country')
capitals = cities_data.loc[cities_data['capital'] == 'primary']
capitals = capitals[['country', 'city', 'lat', 'lng']]
capitals = capitals.rename(columns={'country':'Country'})
capitals.set_index('Country')

Unnamed: 0_level_0,city,lat,lng
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Japan,Tokyo,35.6850,139.7514
Mexico,Mexico City,19.4424,-99.1310
Bangladesh,Dhaka,23.7231,90.4086
Argentina,Buenos Aires,-34.6025,-58.3975
Egypt,Cairo,30.0500,31.2500
...,...,...,...
West Bank,Al Quds,31.7764,35.2269
Sint Maarten,Philipsburg,18.0255,-63.0450
Burundi,Gitega,-3.4271,29.9246
Kosovo,Pristina,42.6666,21.1724


In [71]:
gdp_lat = pd.merge(capitals, gdp_2019, on='Country', how='outer')
gdp_lat

Unnamed: 0,Country,city,lat,lng,Country Code,2019
0,Japan,Tokyo,35.6850,139.7514,JPN,5.081770e+12
1,Mexico,Mexico City,19.4424,-99.1310,MEX,1.258290e+12
2,Bangladesh,Dhaka,23.7231,90.4086,BGD,3.025710e+11
3,Argentina,Buenos Aires,-34.6025,-58.3975,ARG,4.496630e+11
4,Egypt,Cairo,30.0500,31.2500,,
...,...,...,...,...,...,...
301,"Venezuela, RB",,,,VEN,
302,British Virgin Islands,,,,VGB,
303,Virgin Islands (U.S.),,,,VIR,
304,World,,,,WLD,8.769750e+13


In [73]:
merged = pd.merge(gdp_lat, cleaned_covid, on='Country', how='outer')
merged

Unnamed: 0,Country,city,lat,lng,Country Code,2019,Cases,Deaths,Population
0,Japan,Tokyo,35.6850,139.7514,JPN,5.081770e+12,87020.0,1613.0,126860299.0
1,Mexico,Mexico City,19.4424,-99.1310,MEX,1.258290e+12,774020.0,80083.0,127575529.0
2,Bangladesh,Dhaka,23.7231,90.4086,BGD,3.025710e+11,373151.0,5440.0,163046173.0
3,Argentina,Buenos Aires,-34.6025,-58.3975,ARG,4.496630e+11,840902.0,22226.0,44780675.0
4,Egypt,Cairo,30.0500,31.2500,,,104035.0,6010.0,100388076.0
...,...,...,...,...,...,...,...,...,...
358,United_Kingdom,,,,,,544275.0,42515.0,66647112.0
359,United_Republic_of_Tanzania,,,,,,509.0,21.0,58005461.0
360,United_States_Virgin_Islands,,,,,,1327.0,20.0,329064917.0
361,United_States_of_America,,,,,,7549794.0,211801.0,104579.0


In [76]:
merged['Country'].unique()

array(['Japan', 'Mexico', 'Bangladesh', 'Argentina', 'Egypt', 'China',
       'Philippines', 'Russia', 'France', 'Korea, South', 'Indonesia',
       'United Kingdom', 'Peru', 'Iran', 'Congo (Kinshasa)', 'Colombia',
       'Taiwan', 'Thailand', 'Chile', 'Spain', 'United States',
       'Singapore', 'Angola', 'Iraq', 'Sudan', 'Saudi Arabia', 'Vietnam',
       'Burma', 'Côte D’Ivoire', 'Brazil', 'Turkey', 'Germany', 'Algeria',
       'Italy', 'Korea, North', 'Afghanistan', 'Greece', 'South Africa',
       'Ethiopia', 'Kenya', 'Venezuela', 'Tanzania', 'Portugal',
       'Ukraine', 'Senegal', 'Syria', 'Tunisia', 'Austria', 'Libya',
       'Uzbekistan', 'Cuba', 'Dominican Republic', 'Azerbaijan', 'Ghana',
       'Kuwait', 'Yemen', 'Haiti', 'Romania', 'Paraguay', 'Lebanon',
       'Belarus', 'Belgium', 'Poland', 'Morocco', 'Ecuador', 'Madagascar',
       'Hungary', 'Cameroon', 'Bolivia', 'Nigeria', 'Zimbabwe', 'Uruguay',
       'Mali', 'Guinea', 'Cambodia', 'Togo', 'Qatar', 'Malaysia',
      