### Indexing and Multi-Indexing

In [43]:
import pandas as pd
import numpy as np
import plotly.express as px
pd.options.display.max_rows = 10

# read the gdp data from gdp.csv
df = pd.read_csv("gdp.csv")

# create a newdgp column which is in billions
df["gdp"] = df["GDP (constant 2010 US$)"] / 1_000_000_000

In [3]:
# What fraction of world GDP did each country each year?
df

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),gdp
0,Afghanistan,AFG,2002,8.013233e+09,8.013233
1,Afghanistan,AFG,2003,8.689884e+09,8.689884
2,Afghanistan,AFG,2004,8.781610e+09,8.781610
3,Afghanistan,AFG,2005,9.762979e+09,9.762979
4,Afghanistan,AFG,2006,1.030523e+10,10.305228
...,...,...,...,...,...
8864,Zimbabwe,ZWE,2013,1.418193e+10,14.181927
8865,Zimbabwe,ZWE,2014,1.448359e+10,14.483588
8866,Zimbabwe,ZWE,2015,1.472830e+10,14.728302
8867,Zimbabwe,ZWE,2016,1.481899e+10,14.818986


In [5]:
total_gdp = df.groupby("Year").agg(sum)
total_gdp

  total_gdp = df.groupby("Year").agg(sum)


Unnamed: 0_level_0,GDP (constant 2010 US$),gdp
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1960,9.025835e+12,9025.834854
1961,9.434022e+12,9434.021757
1962,9.959005e+12,9959.004555
1963,1.047782e+13,10477.823331
1964,1.117623e+13,11176.230223
...,...,...
2013,7.095199e+13,70951.987117
2014,7.296737e+13,72967.372451
2015,7.462795e+13,74627.950281
2016,7.641749e+13,76417.485962


In [6]:
# Observe... this does not work!
df/total_gdp

# This happends because df and total_gdp dont have the same index

Unnamed: 0,Code,Entity,GDP (constant 2010 US$),Year,gdp
0,,,,,
1,,,,,
2,,,,,
3,,,,,
4,,,,,
...,...,...,...,...,...
8864,,,,,
8865,,,,,
8866,,,,,
8867,,,,,


In [30]:
# get only chinese dgp, indexed by year
chinese_gdp_indexed_by_year = df.query("Entity == 'China'").set_index("Year")


In [31]:
chinese_gdp_indexed_by_year

Unnamed: 0_level_0,Entity,Code,GDP (constant 2010 US$),gdp
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1960,China,CHN,1.279381e+11,127.938142
1961,China,CHN,9.304941e+10,93.049411
1962,China,CHN,8.785725e+10,87.857254
1963,China,CHN,9.690655e+10,96.906551
1964,China,CHN,1.145242e+11,114.524162
...,...,...,...,...
2013,China,CHN,7.766513e+12,7766.512756
2014,China,CHN,8.333287e+12,8333.286913
2015,China,CHN,8.908301e+12,8908.300778
2016,China,CHN,9.505157e+12,9505.156931


In [32]:
chinese_gdp_indexed_by_year/total_gdp

Unnamed: 0_level_0,Code,Entity,GDP (constant 2010 US$),gdp
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1960,,,0.014175,0.014175
1961,,,0.009863,0.009863
1962,,,0.008822,0.008822
1963,,,0.009249,0.009249
1964,,,0.010247,0.010247
...,...,...,...,...
2013,,,0.109462,0.109462
2014,,,0.114206,0.114206
2015,,,0.119369,0.119369
2016,,,0.124385,0.124385


In [33]:
# plot only the ratio above, showing only gdp column
px.line((chinese_gdp_indexed_by_year/total_gdp)["gdp"])

In [35]:
# use set_index to change the index of df
df = df.set_index("Year")

In [36]:
df

Unnamed: 0_level_0,Entity,Code,GDP (constant 2010 US$),gdp
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2002,Afghanistan,AFG,8.013233e+09,8.013233
2003,Afghanistan,AFG,8.689884e+09,8.689884
2004,Afghanistan,AFG,8.781610e+09,8.781610
2005,Afghanistan,AFG,9.762979e+09,9.762979
2006,Afghanistan,AFG,1.030523e+10,10.305228
...,...,...,...,...
2013,Zimbabwe,ZWE,1.418193e+10,14.181927
2014,Zimbabwe,ZWE,1.448359e+10,14.483588
2015,Zimbabwe,ZWE,1.472830e+10,14.728302
2016,Zimbabwe,ZWE,1.481899e+10,14.818986


In [44]:
# use set_index to change the index of df
df.set_index("Year")

Unnamed: 0_level_0,Entity,Code,GDP (constant 2010 US$),gdp
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2002,Afghanistan,AFG,8.013233e+09,8.013233
2003,Afghanistan,AFG,8.689884e+09,8.689884
2004,Afghanistan,AFG,8.781610e+09,8.781610
2005,Afghanistan,AFG,9.762979e+09,9.762979
2006,Afghanistan,AFG,1.030523e+10,10.305228
...,...,...,...,...
2013,Zimbabwe,ZWE,1.418193e+10,14.181927
2014,Zimbabwe,ZWE,1.448359e+10,14.483588
2015,Zimbabwe,ZWE,1.472830e+10,14.728302
2016,Zimbabwe,ZWE,1.481899e+10,14.818986


In [None]:
# compute the ratio of the df indexed by year by the total_gdp (also indexed by year)
# observe that the names are getting divided by the total_gdp values...

In [64]:
# show multi_intexing by Year and Entity
df_indexed_by_year_and_entity = df.set_index(["Year","Entity"])
df_indexed_by_year_and_entity

Unnamed: 0_level_0,Unnamed: 1_level_0,Code,GDP (constant 2010 US$),gdp
Year,Entity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2002,Afghanistan,AFG,8.013233e+09,8.013233
2003,Afghanistan,AFG,8.689884e+09,8.689884
2004,Afghanistan,AFG,8.781610e+09,8.781610
2005,Afghanistan,AFG,9.762979e+09,9.762979
2006,Afghanistan,AFG,1.030523e+10,10.305228
...,...,...,...,...
2013,Zimbabwe,ZWE,1.418193e+10,14.181927
2014,Zimbabwe,ZWE,1.448359e+10,14.483588
2015,Zimbabwe,ZWE,1.472830e+10,14.728302
2016,Zimbabwe,ZWE,1.481899e+10,14.818986


In [65]:
# show results of dividing indexed by year_and_entity by total gdp
share_of_world_economy_ny_year_and_entity = (df_indexed_by_year_and_entity/total_gdp)[["gdp"]]
share_of_world_economy_ny_year_and_entity

Unnamed: 0_level_0,Unnamed: 1_level_0,gdp
Year,Entity,Unnamed: 2_level_1
2002,Afghanistan,0.000155
2003,Afghanistan,0.000163
2004,Afghanistan,0.000158
2005,Afghanistan,0.000169
2006,Afghanistan,0.000171
...,...,...
2013,Zimbabwe,0.000200
2014,Zimbabwe,0.000198
2015,Zimbabwe,0.000197
2016,Zimbabwe,0.000194


In [53]:
# reset the index
share_of_world_economy = share_of_world_economy_ny_year_and_entity.reset_index()
share_of_world_economy

Unnamed: 0,Year,Entity,gdp
0,2002,Afghanistan,0.000155
1,2003,Afghanistan,0.000163
2,2004,Afghanistan,0.000158
3,2005,Afghanistan,0.000169
4,2006,Afghanistan,0.000171
...,...,...,...
8864,2013,Zimbabwe,0.000200
8865,2014,Zimbabwe,0.000198
8866,2015,Zimbabwe,0.000197
8867,2016,Zimbabwe,0.000194


In [55]:
px.line(share_of_world_economy, x="Year", y="gdp", color = "Entity")

In [57]:
list_of_countries = ["China", "United States", "India",
                     "Japan", "Germany", "United Kingdom"]
top6_share = share_of_world_economy.query("Entity in @list_of_countries")
top6_share

Unnamed: 0,Year,Entity,gdp
1726,1960,China,0.014175
1727,1961,China,0.009863
1728,1962,China,0.008822
1729,1963,China,0.009249
1730,1964,China,0.010247
...,...,...,...
8490,2013,United States,0.222726
8491,2014,United States,0.222138
8492,2015,United States,0.223411
8493,2016,United States,0.221420


In [58]:
px.line(top6_share, x="Year", y="gdp", color = "Entity")