In [None]:
#!pip install wbdata

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import wbdata


In [3]:
wbdata.search_indicators("GDP")

id                          name
--------------------------  -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
5.51.01.10.gdp              Per capita GDP growth
6.0.GDP_current             GDP (current $)
6.0.GDP_growth              GDP growth (annual %)
6.0.GDP_usd                 GDP (constant 2005 $)
6.0.GDPpc_constant          GDP per capita, PPP (constant 2011 international $)
BG.GSR.NFSV.GD.ZS           Trade in services (% of GDP)
BG.KAC.FNEI.GD.PP.ZS        Gross private capital flows (% of GDP, PPP)
BG.KAC.FNEI.GD.ZS           Gross private capital flows (% of GDP)
BG.KLT.DINV.GD.PP.ZS        Gross foreign direct investment (% of GDP, PPP)
BG.KLT.DINV.GD.ZS           Gross foreign direct investment (% of GDP)
BI.WAG.TOTL.GD.ZS           Wage bill as a percentage of GDP
BM.GSR.MRCH.ZS              Merchandise imports (BOP): percentage of GDP (%)
BM.KLT.DINV.GD

In [4]:
indicators = {'Indicator Name': [
        'Gross Domestic Product (GDP)',
        'Gross National Income (GNI)',
        'Inflation rate',
        'Unemployment rate',
        'Poverty rate',
        'Life expectancy',
        'Literacy rate',
        'Access to electricity',
        'Mobile phone subscriptions',
        'Government expenditure on education',
        'Foreign direct investment (FDI)',
        'Exports of goods and services',
        'Imports of goods and services',
        'Gross capital formation',
        'Agricultural land area',
        'Agricultural production index'
    ],
    'Indicator ID': [
        'NY.GDP.MKTP.CD',
        'NY.GNP.MKTP.CD',
        'FP.CPI.TOTL.ZG',
        'SL.UEM.TOTL.ZS',
        'SI.POV.NAHC',
        'SP.DYN.LE00.IN',
        'SE.ADT.LITR.ZS',
        'EG.ELC.ACCS.ZS',
        'IT.CEL.SETS.P2',
        'SE.XPD.TOTL.GB.ZS',
        'BX.KLT.DINV.WD.GD.ZS',
        'NE.EXP.GNFS.ZS',
        'NE.IMP.GNFS.ZS',
        'NE.GDI.TOTL.ZS',
        'AG.LND.AGRI.ZS',
        'AG.PRD.FOOD.XD'
    ]
             }

In [5]:
indicators

{'Indicator Name': ['Gross Domestic Product (GDP)',
  'Gross National Income (GNI)',
  'Inflation rate',
  'Unemployment rate',
  'Poverty rate',
  'Life expectancy',
  'Literacy rate',
  'Access to electricity',
  'Mobile phone subscriptions',
  'Government expenditure on education',
  'Foreign direct investment (FDI)',
  'Exports of goods and services',
  'Imports of goods and services',
  'Gross capital formation',
  'Agricultural land area',
  'Agricultural production index'],
 'Indicator ID': ['NY.GDP.MKTP.CD',
  'NY.GNP.MKTP.CD',
  'FP.CPI.TOTL.ZG',
  'SL.UEM.TOTL.ZS',
  'SI.POV.NAHC',
  'SP.DYN.LE00.IN',
  'SE.ADT.LITR.ZS',
  'EG.ELC.ACCS.ZS',
  'IT.CEL.SETS.P2',
  'SE.XPD.TOTL.GB.ZS',
  'BX.KLT.DINV.WD.GD.ZS',
  'NE.EXP.GNFS.ZS',
  'NE.IMP.GNFS.ZS',
  'NE.GDI.TOTL.ZS',
  'AG.LND.AGRI.ZS',
  'AG.PRD.FOOD.XD']}

In [6]:
indicators = pd.DataFrame(indicators)

In [7]:
indicators

Unnamed: 0,Indicator Name,Indicator ID
0,Gross Domestic Product (GDP),NY.GDP.MKTP.CD
1,Gross National Income (GNI),NY.GNP.MKTP.CD
2,Inflation rate,FP.CPI.TOTL.ZG
3,Unemployment rate,SL.UEM.TOTL.ZS
4,Poverty rate,SI.POV.NAHC
5,Life expectancy,SP.DYN.LE00.IN
6,Literacy rate,SE.ADT.LITR.ZS
7,Access to electricity,EG.ELC.ACCS.ZS
8,Mobile phone subscriptions,IT.CEL.SETS.P2
9,Government expenditure on education,SE.XPD.TOTL.GB.ZS


In [8]:
from datetime import datetime

In [9]:
start_date ='2015-01-01'
end_date ='2020-12-31'

start_date_obj = datetime.strptime(start_date,"%Y-%m-%d")
end_date_obj = datetime.strptime(end_date,"%Y-%m-%d")

data_date = (start_date_obj, end_date_obj)

In [10]:
data_date

(datetime.datetime(2015, 1, 1, 0, 0), datetime.datetime(2020, 12, 31, 0, 0))

In [11]:
new_df = pd.DataFrame()

In [15]:
pd.DataFrame(wbdata.get_data('NY.GDP.MKTP.CD', country='all', data_date=data_date, pandas=True))

Unnamed: 0_level_0,Unnamed: 1_level_0,value
country,date,Unnamed: 2_level_1
Africa Eastern and Southern,2020,9.274845e+11
Africa Eastern and Southern,2019,1.001017e+12
Africa Eastern and Southern,2018,1.007240e+12
Africa Eastern and Southern,2017,1.021119e+12
Africa Eastern and Southern,2016,8.827213e+11
...,...,...
Zimbabwe,2019,2.183223e+10
Zimbabwe,2018,3.415607e+10
Zimbabwe,2017,1.758489e+10
Zimbabwe,2016,2.054868e+10


In [21]:
indicators[indicators['Indicator ID'] == 'NY.GDP.MKTP.CD']['Indicator Name'].values[0]

'Gross Domestic Product (GDP)'

In [22]:
df=pd.DataFrame()

In [23]:
for i in indicators['Indicator ID']:
    new_df = wbdata.get_data(indicator=i, country='all', data_date=data_date, pandas=True)
    new_df = pd.DataFrame(new_df).reset_index()
    new_df.columns = ['country','year',indicators[indicators['Indicator ID'] == i]['Indicator Name'].values[0]]
    df = pd.concat([df,new_df],axis=1)

In [27]:
df.head()

Unnamed: 0,country,year,Gross Domestic Product (GDP),country.1,year.1,Gross National Income (GNI),country.2,year.2,Inflation rate,country.3,...,Imports of goods and services,country.4,year.3,Gross capital formation,country.5,year.4,Agricultural land area,country.6,year.5,Agricultural production index
0,Africa Eastern and Southern,2020,927484500000.0,Africa Eastern and Southern,2020,905026800000.0,Africa Eastern and Southern,2020,6.362961,Africa Eastern and Southern,...,23.672022,Africa Eastern and Southern,2020,20.97849,Africa Eastern and Southern,2020,44.248721,Africa Eastern and Southern,2020,
1,Africa Eastern and Southern,2019,1001017000000.0,Africa Eastern and Southern,2019,971895600000.0,Africa Eastern and Southern,2019,4.120246,Africa Eastern and Southern,...,26.459442,Africa Eastern and Southern,2019,22.520138,Africa Eastern and Southern,2019,44.194846,Africa Eastern and Southern,2019,
2,Africa Eastern and Southern,2018,1007240000000.0,Africa Eastern and Southern,2018,976376100000.0,Africa Eastern and Southern,2018,4.720811,Africa Eastern and Southern,...,28.257503,Africa Eastern and Southern,2018,21.861204,Africa Eastern and Southern,2018,44.168345,Africa Eastern and Southern,2018,
3,Africa Eastern and Southern,2017,1021119000000.0,Africa Eastern and Southern,2017,989645800000.0,Africa Eastern and Southern,2017,6.399343,Africa Eastern and Southern,...,24.81549,Africa Eastern and Southern,2017,24.546074,Africa Eastern and Southern,2017,44.045088,Africa Eastern and Southern,2017,
4,Africa Eastern and Southern,2016,882721300000.0,Africa Eastern and Southern,2016,859814700000.0,Africa Eastern and Southern,2016,6.571396,Africa Eastern and Southern,...,26.196798,Africa Eastern and Southern,2016,23.806263,Africa Eastern and Southern,2016,43.901452,Africa Eastern and Southern,2016,


In [28]:
df_1 = df.copy()

In [29]:
df_1 = df_1.drop(['country','year'],axis=1)

In [30]:
df_1.head()

Unnamed: 0,Gross Domestic Product (GDP),Gross National Income (GNI),Inflation rate,Unemployment rate,Poverty rate,Life expectancy,Literacy rate,Access to electricity,Mobile phone subscriptions,Government expenditure on education,Foreign direct investment (FDI),Exports of goods and services,Imports of goods and services,Gross capital formation,Agricultural land area,Agricultural production index
0,927484500000.0,905026800000.0,6.362961,7.517253,,63.313856,71.889908,45.609604,71.060007,14.56409,1.349968,22.01363,23.672022,20.97849,44.248721,
1,1001017000000.0,971895600000.0,4.120246,6.873493,,63.755674,71.574051,44.073912,68.75224,15.35272,1.502256,23.651724,26.459442,22.520138,44.194846,
2,1007240000000.0,976376100000.0,4.720811,6.695958,,63.365858,70.42025,42.880977,72.279077,17.306705,1.345315,25.133678,28.257503,21.861204,44.168345,
3,1021119000000.0,989645800000.0,6.399343,6.687952,,62.922385,69.999451,40.092163,64.226496,17.150761,0.983252,21.57095,24.81549,24.546074,44.045088,
4,882721300000.0,859814700000.0,6.571396,6.613772,,62.444045,70.059601,38.733352,64.284955,17.198811,1.892862,21.568935,26.196798,23.806263,43.901452,


In [31]:
df.iloc[:,0:2]

Unnamed: 0,country,year
0,Africa Eastern and Southern,2020
1,Africa Eastern and Southern,2019
2,Africa Eastern and Southern,2018
3,Africa Eastern and Southern,2017
4,Africa Eastern and Southern,2016
...,...,...
1591,Zimbabwe,2019
1592,Zimbabwe,2018
1593,Zimbabwe,2017
1594,Zimbabwe,2016


In [32]:
df_1 = pd.concat([df.iloc[:,0:2],df_1],axis=1)

In [33]:
df_1.head()

Unnamed: 0,country,year,Gross Domestic Product (GDP),Gross National Income (GNI),Inflation rate,Unemployment rate,Poverty rate,Life expectancy,Literacy rate,Access to electricity,Mobile phone subscriptions,Government expenditure on education,Foreign direct investment (FDI),Exports of goods and services,Imports of goods and services,Gross capital formation,Agricultural land area,Agricultural production index
0,Africa Eastern and Southern,2020,927484500000.0,905026800000.0,6.362961,7.517253,,63.313856,71.889908,45.609604,71.060007,14.56409,1.349968,22.01363,23.672022,20.97849,44.248721,
1,Africa Eastern and Southern,2019,1001017000000.0,971895600000.0,4.120246,6.873493,,63.755674,71.574051,44.073912,68.75224,15.35272,1.502256,23.651724,26.459442,22.520138,44.194846,
2,Africa Eastern and Southern,2018,1007240000000.0,976376100000.0,4.720811,6.695958,,63.365858,70.42025,42.880977,72.279077,17.306705,1.345315,25.133678,28.257503,21.861204,44.168345,
3,Africa Eastern and Southern,2017,1021119000000.0,989645800000.0,6.399343,6.687952,,62.922385,69.999451,40.092163,64.226496,17.150761,0.983252,21.57095,24.81549,24.546074,44.045088,
4,Africa Eastern and Southern,2016,882721300000.0,859814700000.0,6.571396,6.613772,,62.444045,70.059601,38.733352,64.284955,17.198811,1.892862,21.568935,26.196798,23.806263,43.901452,


In [34]:
countries=['Argentina', 'Australia','Brazil', 'Canada', 'China', 'France', 'Germany', 'India', 'Indonesia', 'Italy', 'Japan', 'Republic of Korea', 'Mexico', 'Russia', 'Saudi Arabia', 'South Africa', 'Turkiye', 'United Kingdom', 'United States']

In [35]:
dff = df_1[df_1['country'].isin(countries)]

In [36]:
dff.head()

Unnamed: 0,country,year,Gross Domestic Product (GDP),Gross National Income (GNI),Inflation rate,Unemployment rate,Poverty rate,Life expectancy,Literacy rate,Access to electricity,Mobile phone subscriptions,Government expenditure on education,Foreign direct investment (FDI),Exports of goods and services,Imports of goods and services,Gross capital formation,Agricultural land area,Agricultural production index
336,Argentina,2020,385540200000.0,375535800000.0,,11.46,42.0,75.892,,100.0,121.600189,11.9169,1.224943,16.610872,13.608927,14.130595,39.603229,109.82
337,Argentina,2019,447754600000.0,430166700000.0,,9.84,35.5,77.284,,100.0,125.940981,12.53007,1.485007,17.924878,14.705737,14.21003,39.603229,112.94
338,Argentina,2018,524819700000.0,506093900000.0,,9.22,32.0,76.999,99.003868,99.989578,131.937169,12.52334,2.232532,14.436686,16.32585,16.613885,39.603229,97.41
339,Argentina,2017,643628700000.0,627200700000.0,,8.35,25.7,76.833,,100.0,140.501461,13.26392,1.789364,11.320283,13.969318,18.212567,40.647424,106.07
340,Argentina,2016,557531400000.0,545250700000.0,,8.111,30.3,76.308,99.125008,99.849579,145.917957,13.35554,0.58475,12.527095,13.566793,17.663228,41.716051,100.94


In [37]:
dff.shape

(102, 18)

In [39]:
dff.reset_index(drop=True, inplace=True)

In [40]:
dff

Unnamed: 0,country,year,Gross Domestic Product (GDP),Gross National Income (GNI),Inflation rate,Unemployment rate,Poverty rate,Life expectancy,Literacy rate,Access to electricity,Mobile phone subscriptions,Government expenditure on education,Foreign direct investment (FDI),Exports of goods and services,Imports of goods and services,Gross capital formation,Agricultural land area,Agricultural production index
0,Argentina,2020,3.855402e+11,3.755358e+11,,11.460,42.0,75.892000,,100.000000,121.600189,11.91690,1.224943,16.610872,13.608927,14.130595,39.603229,109.82
1,Argentina,2019,4.477546e+11,4.301667e+11,,9.840,35.5,77.284000,,100.000000,125.940981,12.53007,1.485007,17.924878,14.705737,14.210030,39.603229,112.94
2,Argentina,2018,5.248197e+11,5.060939e+11,,9.220,32.0,76.999000,99.003868,99.989578,131.937169,12.52334,2.232532,14.436686,16.325850,16.613885,39.603229,97.41
3,Argentina,2017,6.436287e+11,6.272007e+11,,8.350,25.7,76.833000,,100.000000,140.501461,13.26392,1.789364,11.320283,13.969318,18.212567,40.647424,106.07
4,Argentina,2016,5.575314e+11,5.452507e+11,,8.111,30.3,76.308000,99.125008,99.849579,145.917957,13.35554,0.584750,12.527095,13.566793,17.663228,41.716051,100.94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,United States,2019,2.138098e+13,2.176454e+13,1.812210,3.670,,78.787805,,100.000000,106.414019,13.38055,1.472075,11.872470,14.579479,21.319232,44.363367,100.09
98,United States,2018,2.053306e+13,2.093736e+13,2.442583,3.900,,78.639024,,100.000000,104.847944,13.14618,1.045704,12.360989,15.249390,21.205917,44.363367,103.35
99,United States,2017,1.947734e+13,1.988555e+13,2.130110,4.360,,78.539024,,100.000000,103.129789,13.86467,1.955211,12.261738,15.017423,20.813431,44.363367,101.95
100,United States,2016,1.869511e+13,1.902048e+13,1.261583,4.870,,78.539024,,100.000000,103.370250,13.15421,2.537498,11.939539,14.647461,20.566778,44.303705,104.85
