### Reading in Data, Basic Queries

In [1]:
import pandas as pd
pd.options.display.max_rows = 10

In [2]:
# read the gdp data from gdp.csv
df = pd.read_csv("gdp.csv")

In [5]:
# show first few, last few, random sample of the dataframe
df.sample(5)

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$)
5559,Myanmar,MMR,2001,17798050000.0
8727,Yemen,YEM,1991,12464470000.0
6081,Oman,OMN,1982,14935260000.0
7485,South Korea,KOR,2008,1020510000000.0
2949,Gabon,GAB,1989,10063450000.0


In [18]:
# create a newdgp column which is in billions
df["gdp"] = df["GDP (constant 2010 US$)"] / 1_000_000_000

In [8]:
df

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),dgp
0,Afghanistan,AFG,2002,8.013233e+09,8.013233
1,Afghanistan,AFG,2003,8.689884e+09,8.689884
2,Afghanistan,AFG,2004,8.781610e+09,8.781610
3,Afghanistan,AFG,2005,9.762979e+09,9.762979
4,Afghanistan,AFG,2006,1.030523e+10,10.305228
...,...,...,...,...,...
8864,Zimbabwe,ZWE,2013,1.418193e+10,14.181927
8865,Zimbabwe,ZWE,2014,1.448359e+10,14.483588
8866,Zimbabwe,ZWE,2015,1.472830e+10,14.728302
8867,Zimbabwe,ZWE,2016,1.481899e+10,14.818986


In [9]:
# See the GDP of only China
df[df["Entity"] == "China"]

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),dgp
1726,China,CHN,1960,1.279381e+11,127.938142
1727,China,CHN,1961,9.304941e+10,93.049411
1728,China,CHN,1962,8.785725e+10,87.857254
1729,China,CHN,1963,9.690655e+10,96.906551
1730,China,CHN,1964,1.145242e+11,114.524162
...,...,...,...,...,...
1779,China,CHN,2013,7.766513e+12,7766.512756
1780,China,CHN,2014,8.333287e+12,8333.286913
1781,China,CHN,2015,8.908301e+12,8908.300778
1782,China,CHN,2016,9.505157e+12,9505.156931


In [10]:
# GDP of China in 3027 using &
df[(df["Entity"] == "China") & (df["Year"] == 2017)]

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),dgp
1783,China,CHN,2017,10161010000000.0,10161.012759


In [12]:
# GDP of China in 2017 using query
df.query('Entity == "China" and Year == 2017')

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),dgp
1783,China,CHN,2017,10161010000000.0,10161.012759


In [16]:
# Seeing GDP only from a list of countries using isin
list_of_countries = ["China", "United States", "India",
                     "Japan", "Germany", "United Kingdom"]
top6 = df[df["Entity"].isin(list_of_countries)]
top6.sample(5)

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),dgp
3736,India,IND,1964,166279000000.0,166.279034
1734,China,CHN,1968,133923400000.0,133.92343
8396,United Kingdom,GBR,1977,1162624000000.0,1162.624076
4216,Japan,JPN,1972,2214794000000.0,2214.793986
4204,Japan,JPN,1960,796213200000.0,796.213204


In [17]:
# Get GDP from the list of countries but this time using .query(in @)
top6 = df.query('Entity in @list_of_countries')
top6.sample(5)

Unnamed: 0,Entity,Code,Year,GDP (constant 2010 US$),dgp
8392,United Kingdom,GBR,1973,1147632000000.0,1147.632113
8492,United States,USA,2015,16672690000000.0,16672.691918
8407,United Kingdom,GBR,1988,1589728000000.0,1589.728156
1765,China,CHN,1999,2061987000000.0,2061.986772
3104,Germany,DEU,1991,2699846000000.0,2699.846419
