In [9]:
import pandas as pd
import seaborn as sns
import plotly.express as px
pd.options.display.max_rows = 10
gdp = pd.read_csv("gdp.csv")

In [11]:
gdp ["gdp"] = gdp["GDP (constant 2010 US$)"] / 1_000_000_000
gdp.head(5)

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),gdp
0,Afghanistan,AFG,2002,8013233000.0,8.013233
1,Afghanistan,AFG,2003,8689884000.0,8.689884
2,Afghanistan,AFG,2004,8781610000.0,8.78161
3,Afghanistan,AFG,2005,9762979000.0,9.762979
4,Afghanistan,AFG,2006,10305230000.0,10.305228


In [17]:
px.line(gdp, x = "Year", y = "gdp", color  = "Entity")

In [20]:
# Read in population.csv
pop = pd.read_csv("population.csv")
pop

Unnamed: 0,Entity,Code,Year,"Total population (Gapminder, HYDE & UN)"
0,Afghanistan,AFG,1800,3280000
1,Afghanistan,AFG,1801,3280000
2,Afghanistan,AFG,1802,3280000
3,Afghanistan,AFG,1803,3280000
4,Afghanistan,AFG,1804,3280000
...,...,...,...,...
46878,Zimbabwe,ZWE,2015,13815000
46879,Zimbabwe,ZWE,2016,14030000
46880,Zimbabwe,ZWE,2017,14237000
46881,Zimbabwe,ZWE,2018,14439000


In [22]:
#Try plotting data, eventually narrowing to recent history
# Note: Includes Entities Which are not countries, e.g Africa
px.line(pop.query("Year > 1900"), x = "Year", y = "Total population (Gapminder, HYDE & UN)", color = "Entity")

In [24]:
pop = pop.rename(columns = {"Total population (Gapminder, HYDE & UN)" : "population"})

In [25]:
pop

Unnamed: 0,Entity,Code,Year,population
0,Afghanistan,AFG,1800,3280000
1,Afghanistan,AFG,1801,3280000
2,Afghanistan,AFG,1802,3280000
3,Afghanistan,AFG,1803,3280000
4,Afghanistan,AFG,1804,3280000
...,...,...,...,...
46878,Zimbabwe,ZWE,2015,13815000
46879,Zimbabwe,ZWE,2016,14030000
46880,Zimbabwe,ZWE,2017,14237000
46881,Zimbabwe,ZWE,2018,14439000


In [31]:
# set aside 2017 gdp
gdp2017 = gdp.query("Year == 2017")
gdp2017

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),gdp
15,Afghanistan,AFG,2017,2.196941e+10,21.969414
53,Albania,ALB,2017,1.398856e+10,13.988556
111,Algeria,DZA,2017,1.993674e+11,199.367414
174,Andorra,AND,2017,3.382068e+09,3.382068
212,Angola,AGO,2017,1.037860e+11,103.785984
...,...,...,...,...,...
8597,Uzbekistan,UZB,2017,6.577995e+10,65.779950
8636,Vanuatu,VUT,2017,8.120250e+08,0.812025
8725,Vietnam,VNM,2017,1.752841e+11,175.284081
8810,Zambia,ZMB,2017,2.813940e+10,28.139397


In [33]:
# set aside 2017 pop
pop2017 = pop.query("Year == 2017")
pop2017

Unnamed: 0,Entity,Code,Year,population
217,Afghanistan,AFG,2017,36296000
339,Africa,,2017,1244221952
559,Albania,ALB,2017,2884000
779,Algeria,DZA,2017,41389000
849,American Samoa,ASM,2017,56000
...,...,...,...,...
46069,Western Sahara,ESH,2017,553000
46220,World,OWID_WRL,2017,7547858944
46440,Yemen,YEM,2017,27835000
46660,Zambia,ZMB,2017,16854000


In [36]:
# outer join the two datasets
gdp_and_pop_2017 = pd.merge(left = gdp2017, right = pop2017, left_on = "Entity", right_on = "Entity", how = "outer")
gdp_and_pop_2017

Unnamed: 0,Entity,Code_x,Year_x,GDP (constant 2010 US$),gdp,Code_y,Year_y,population
0,Afghanistan,AFG,2017.0,2.196941e+10,21.969414,AFG,2017.0,3.629600e+07
1,Albania,ALB,2017.0,1.398856e+10,13.988556,ALB,2017.0,2.884000e+06
2,Algeria,DZA,2017.0,1.993674e+11,199.367414,DZA,2017.0,4.138900e+07
3,Andorra,AND,2017.0,3.382068e+09,3.382068,AND,2017.0,7.700000e+04
4,Angola,AGO,2017.0,1.037860e+11,103.785984,AGO,2017.0,2.981700e+07
...,...,...,...,...,...,...,...,...
238,Venezuela,,,,,VEN,2017.0,2.940200e+07
239,Wallis and Futuna,,,,,WLF,2017.0,1.200000e+04
240,Western Sahara,,,,,ESH,2017.0,5.530000e+05
241,World,,,,,OWID_WRL,2017.0,7.547859e+09


In [41]:
gdp_and_pop_2017["gdp_per_capita"] = gdp_and_pop_2017["GDP (constant 2010 US$)"] / gdp_and_pop_2017["population"]
gdp_and_pop_2017

Unnamed: 0,Entity,Code_x,Year_x,GDP (constant 2010 US$),gdp,Code_y,Year_y,population,gdp_per_capita
0,Afghanistan,AFG,2017.0,2.196941e+10,21.969414,AFG,2017.0,3.629600e+07,605.284718
1,Albania,ALB,2017.0,1.398856e+10,13.988556,ALB,2017.0,2.884000e+06,4850.400743
2,Algeria,DZA,2017.0,1.993674e+11,199.367414,DZA,2017.0,4.138900e+07,4816.917876
3,Andorra,AND,2017.0,3.382068e+09,3.382068,AND,2017.0,7.700000e+04,43922.964124
4,Angola,AGO,2017.0,1.037860e+11,103.785984,AGO,2017.0,2.981700e+07,3480.765485
...,...,...,...,...,...,...,...,...,...
238,Venezuela,,,,,VEN,2017.0,2.940200e+07,
239,Wallis and Futuna,,,,,WLF,2017.0,1.200000e+04,
240,Western Sahara,,,,,ESH,2017.0,5.530000e+05,
241,World,,,,,OWID_WRL,2017.0,7.547859e+09,


In [45]:
gdp_and_pop_2017_nan = gdp_and_pop_2017.dropna()
gdp_and_pop_2017_nan

Unnamed: 0,Entity,Code_x,Year_x,GDP (constant 2010 US$),gdp,Code_y,Year_y,population,gdp_per_capita
0,Afghanistan,AFG,2017.0,2.196941e+10,21.969414,AFG,2017.0,36296000.0,605.284718
1,Albania,ALB,2017.0,1.398856e+10,13.988556,ALB,2017.0,2884000.0,4850.400743
2,Algeria,DZA,2017.0,1.993674e+11,199.367414,DZA,2017.0,41389000.0,4816.917876
3,Andorra,AND,2017.0,3.382068e+09,3.382068,AND,2017.0,77000.0,43922.964124
4,Angola,AGO,2017.0,1.037860e+11,103.785984,AGO,2017.0,29817000.0,3480.765485
...,...,...,...,...,...,...,...,...,...
180,Uzbekistan,UZB,2017.0,6.577995e+10,65.779950,UZB,2017.0,31960000.0,2058.196177
181,Vanuatu,VUT,2017.0,8.120250e+08,0.812025,VUT,2017.0,285000.0,2849.210379
182,Vietnam,VNM,2017.0,1.752841e+11,175.284081,VNM,2017.0,94601000.0,1852.877677
183,Zambia,ZMB,2017.0,2.813940e+10,28.139397,ZMB,2017.0,16854000.0,1669.597527


In [47]:
px.bar(gdp_and_pop_2017_nan, x = "Entity", y = "gdp_per_capita")