In [35]:
import pandas as pd
import seaborn as sns
import plotly.express as px

pd.options.display.max_rows = 10

gdp = pd.read_csv("data/gdp.csv")
print(gdp)
gdp["gdp"] = gdp["GDP (constant 2010 US$)"] / 1_000_000_000

px.line(gdp, x="Year", y="gdp", color = "Entity")


#Now let's find GDP per capita, but the data set doesn't have population
#So, let's get it from another data set
pop = pd.read_csv("data/population.csv")
print(pop)

px.line(pop, x="Year", y="Total population (Gapminder, HYDE & UN)", color="Entity")
#for a more recent history
px.line(pop.query("Year > 1900"), x="Year", y="Total population (Gapminder, HYDE & UN)", color="Entity")

pop = pop.rename(columns = {"Total population (Gapminder, HYDE & UN)" : "population"})
pop

gdp2017 = gdp.query("Year == 2017")
pop2017 = pop.query("Year == 2017")

#This doesn't work
#percapita = gdp2017 / pop2017
# What we want to do is merge the tables
gdp_and_pop_2017 = pd.merge(left=gdp2017, right=pop2017, left_on="Entity", right_on="Entity", how="outer")
gdp_and_pop_2017

#then
gdp_and_pop_2017["gdp per capita"] = gdp_and_pop_2017["GDP (constant 2010 US$)"] / gdp_and_pop_2017["population"]
gdp_and_pop_2017

           Entity Code  Year  GDP (constant 2010 US$)
0     Afghanistan  AFG  2002             8.013233e+09
1     Afghanistan  AFG  2003             8.689884e+09
2     Afghanistan  AFG  2004             8.781610e+09
3     Afghanistan  AFG  2005             9.762979e+09
4     Afghanistan  AFG  2006             1.030523e+10
...           ...  ...   ...                      ...
8864     Zimbabwe  ZWE  2013             1.418193e+10
8865     Zimbabwe  ZWE  2014             1.448359e+10
8866     Zimbabwe  ZWE  2015             1.472830e+10
8867     Zimbabwe  ZWE  2016             1.481899e+10
8868     Zimbabwe  ZWE  2017             1.532981e+10

[8869 rows x 4 columns]
            Entity Code  Year  Total population (Gapminder, HYDE & UN)
0      Afghanistan  AFG  1800                                  3280000
1      Afghanistan  AFG  1801                                  3280000
2      Afghanistan  AFG  1802                                  3280000
3      Afghanistan  AFG  1803              

Unnamed: 0,Entity,Code_x,Year_x,GDP (constant 2010 US$),gdp,Code_y,Year_y,population,gdp per capita
0,Afghanistan,AFG,2017.0,2.196941e+10,21.969414,AFG,2017.0,3.629600e+07,605.284718
1,Albania,ALB,2017.0,1.398856e+10,13.988556,ALB,2017.0,2.884000e+06,4850.400743
2,Algeria,DZA,2017.0,1.993674e+11,199.367414,DZA,2017.0,4.138900e+07,4816.917876
3,Andorra,AND,2017.0,3.382068e+09,3.382068,AND,2017.0,7.700000e+04,43922.964124
4,Angola,AGO,2017.0,1.037860e+11,103.785984,AGO,2017.0,2.981700e+07,3480.765485
...,...,...,...,...,...,...,...,...,...
238,Venezuela,,,,,VEN,2017.0,2.940200e+07,
239,Wallis and Futuna,,,,,WLF,2017.0,1.200000e+04,
240,Western Sahara,,,,,ESH,2017.0,5.530000e+05,
241,World,,,,,OWID_WRL,2017.0,7.547859e+09,


In [36]:
gdp_and_pop_2017.sort_values("gdp per capita")

Unnamed: 0,Entity,Code_x,Year_x,GDP (constant 2010 US$),gdp,Code_y,Year_y,population,gdp per capita
27,Burundi,BDI,2017.0,2.309167e+09,2.309167,BDI,2017.0,1.082700e+07,213.278588
32,Central African Republic,CAF,2017.0,1.560913e+09,1.560913,CAF,2017.0,4.596000e+06,339.624311
94,Liberia,LBR,2017.0,1.666984e+09,1.666984,LBR,2017.0,4.702000e+06,354.526588
121,Niger,NER,2017.0,8.503683e+09,8.503683,NER,2017.0,2.160200e+07,393.652575
44,Democratic Republic of Congo,COD,2017.0,3.327760e+10,33.277602,COD,2017.0,8.139900e+07,408.820771
...,...,...,...,...,...,...,...,...,...
238,Venezuela,,,,,VEN,2017.0,2.940200e+07,
239,Wallis and Futuna,,,,,WLF,2017.0,1.200000e+04,
240,Western Sahara,,,,,ESH,2017.0,5.530000e+05,
241,World,,,,,OWID_WRL,2017.0,7.547859e+09,


In [37]:
px.bar(gdp_and_pop_2017.sort_values("gdp per capita"), x="Entity", y="gdp per capita")

In [38]:
# this isn't what we want, and it's only for one year
gdp_and_pop = pd.merge(left=gdp, right=pop, left_on="Entity", right_on="Entity", how="inner")
gdp_and_pop

Unnamed: 0,Entity,Code_x,Year_x,GDP (constant 2010 US$),gdp,Code_y,Year_y,population
0,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1800,3280000
1,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1801,3280000
2,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1802,3280000
3,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1803,3280000
4,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1804,3280000
...,...,...,...,...,...,...,...,...
1904165,Zimbabwe,ZWE,2017,1.532981e+10,15.329811,ZWE,2015,13815000
1904166,Zimbabwe,ZWE,2017,1.532981e+10,15.329811,ZWE,2016,14030000
1904167,Zimbabwe,ZWE,2017,1.532981e+10,15.329811,ZWE,2017,14237000
1904168,Zimbabwe,ZWE,2017,1.532981e+10,15.329811,ZWE,2018,14439000


In [39]:
gdp_and_pop.query('Entity == "Afghanistan"')
#Note below that this gives us duplicate entries

Unnamed: 0,Entity,Code_x,Year_x,GDP (constant 2010 US$),gdp,Code_y,Year_y,population
0,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1800,3280000
1,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1801,3280000
2,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1802,3280000
3,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1803,3280000
4,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,1804,3280000
...,...,...,...,...,...,...,...,...
3515,Afghanistan,AFG,2017,2.196941e+10,21.969414,AFG,2015,34414000
3516,Afghanistan,AFG,2017,2.196941e+10,21.969414,AFG,2016,35383000
3517,Afghanistan,AFG,2017,2.196941e+10,21.969414,AFG,2017,36296000
3518,Afghanistan,AFG,2017,2.196941e+10,21.969414,AFG,2018,37172000


In [40]:
#To fix this, join by Entity and Year
pd.merge(left=gdp, right=pop, left_on=["Entity","Year"], right_on=["Entity","Year"], how="outer")

#do it as a left join so that we only preserve the GDP and population
gdp_and_pop_real = pd.merge(left=gdp, right=pop, left_on=["Entity","Year"], right_on=["Entity","Year"], how="left")
gdp_and_pop_real

Unnamed: 0,Entity,Code_x,Year,GDP (constant 2010 US$),gdp,Code_y,population
0,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,22601000.0
1,Afghanistan,AFG,2003,8.689884e+09,8.689884,AFG,23681000.0
2,Afghanistan,AFG,2004,8.781610e+09,8.781610,AFG,24727000.0
3,Afghanistan,AFG,2005,9.762979e+09,9.762979,AFG,25654000.0
4,Afghanistan,AFG,2006,1.030523e+10,10.305228,AFG,26433000.0
...,...,...,...,...,...,...,...
8864,Zimbabwe,ZWE,2013,1.418193e+10,14.181927,ZWE,13350000.0
8865,Zimbabwe,ZWE,2014,1.448359e+10,14.483588,ZWE,13587000.0
8866,Zimbabwe,ZWE,2015,1.472830e+10,14.728302,ZWE,13815000.0
8867,Zimbabwe,ZWE,2016,1.481899e+10,14.818986,ZWE,14030000.0


In [41]:
gdp_and_pop_real["gdp_per_capita"] = gdp_and_pop_real["GDP (constant 2010 US$)"] / gdp_and_pop_real["population"]
gdp_and_pop_real

Unnamed: 0,Entity,Code_x,Year,GDP (constant 2010 US$),gdp,Code_y,population,gdp_per_capita
0,Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,22601000.0,354.552149
1,Afghanistan,AFG,2003,8.689884e+09,8.689884,AFG,23681000.0,366.955940
2,Afghanistan,AFG,2004,8.781610e+09,8.781610,AFG,24727000.0,355.142564
3,Afghanistan,AFG,2005,9.762979e+09,9.762979,AFG,25654000.0,380.563610
4,Afghanistan,AFG,2006,1.030523e+10,10.305228,AFG,26433000.0,389.862222
...,...,...,...,...,...,...,...,...
8864,Zimbabwe,ZWE,2013,1.418193e+10,14.181927,ZWE,13350000.0,1062.316603
8865,Zimbabwe,ZWE,2014,1.448359e+10,14.483588,ZWE,13587000.0,1065.988675
8866,Zimbabwe,ZWE,2015,1.472830e+10,14.728302,ZWE,13815000.0,1066.109450
8867,Zimbabwe,ZWE,2016,1.481899e+10,14.818986,ZWE,14030000.0,1056.235654


In [42]:
#Now we can explore the history of various categories
px.line( gdp_and_pop_real.query('Entity == "India"'), x="Year", y="gdp_per_capita")

In [43]:
#ok, let's try to get how much each economy has grown since 1960
gdp_and_pop_by_entity = gdp_and_pop_real.set_index("Entity")
gdp_and_pop_by_entity

Unnamed: 0_level_0,Code_x,Year,GDP (constant 2010 US$),gdp,Code_y,population,gdp_per_capita
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,22601000.0,354.552149
Afghanistan,AFG,2003,8.689884e+09,8.689884,AFG,23681000.0,366.955940
Afghanistan,AFG,2004,8.781610e+09,8.781610,AFG,24727000.0,355.142564
Afghanistan,AFG,2005,9.762979e+09,9.762979,AFG,25654000.0,380.563610
Afghanistan,AFG,2006,1.030523e+10,10.305228,AFG,26433000.0,389.862222
...,...,...,...,...,...,...,...
Zimbabwe,ZWE,2013,1.418193e+10,14.181927,ZWE,13350000.0,1062.316603
Zimbabwe,ZWE,2014,1.448359e+10,14.483588,ZWE,13587000.0,1065.988675
Zimbabwe,ZWE,2015,1.472830e+10,14.728302,ZWE,13815000.0,1066.109450
Zimbabwe,ZWE,2016,1.481899e+10,14.818986,ZWE,14030000.0,1056.235654


In [44]:
#make a temporary dataframe as our base economic state
#Divide all further dates by this
gdp_per_capitas_1960 = gdp_and_pop_by_entity.query("Year == 1960")[["gdp_per_capita"]]
gdp_per_capitas_1960

Unnamed: 0_level_0,gdp_per_capita
Entity,Unnamed: 1_level_1
Algeria,2480.954892
Argentina,5642.704253
Australia,19452.581069
Austria,12987.795692
Bahamas,17659.437251
...,...
United States,16484.868935
Uruguay,5473.869009
Venezuela,12456.963693
Zambia,1495.596123


In [45]:
gdp_and_pop_by_entity["gdp_per_capita_ratio"] = gdp_and_pop_by_entity["gdp_per_capita"] / gdp_per_capitas_1960["gdp_per_capita"]
gdp_and_pop_by_entity

Unnamed: 0_level_0,Code_x,Year,GDP (constant 2010 US$),gdp,Code_y,population,gdp_per_capita,gdp_per_capita_ratio
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Afghanistan,AFG,2002,8.013233e+09,8.013233,AFG,22601000.0,354.552149,
Afghanistan,AFG,2003,8.689884e+09,8.689884,AFG,23681000.0,366.955940,
Afghanistan,AFG,2004,8.781610e+09,8.781610,AFG,24727000.0,355.142564,
Afghanistan,AFG,2005,9.762979e+09,9.762979,AFG,25654000.0,380.563610,
Afghanistan,AFG,2006,1.030523e+10,10.305228,AFG,26433000.0,389.862222,
...,...,...,...,...,...,...,...,...
Zimbabwe,ZWE,2013,1.418193e+10,14.181927,ZWE,13350000.0,1062.316603,1.192968
Zimbabwe,ZWE,2014,1.448359e+10,14.483588,ZWE,13587000.0,1065.988675,1.197091
Zimbabwe,ZWE,2015,1.472830e+10,14.728302,ZWE,13815000.0,1066.109450,1.197227
Zimbabwe,ZWE,2016,1.481899e+10,14.818986,ZWE,14030000.0,1056.235654,1.186139


In [47]:
gdp_and_pop_by_entity_2 = gdp_and_pop_by_entity.reset_index()

#px.line(gdp_and_pop_by_entity)

In [48]:
gdp_and_pop_by_entity_3 = gdp_and_pop_by_entity_2.dropna()


In [50]:
px.line(gdp_and_pop_by_entity_3, x="Year", y="gdp_per_capita_ratio", color="Entity")