# Retrieval of Macroeconomic Predictors from the World Bank

In [18]:
import wbgapi as wb
import pandas as pd
import yfinance as yf

In [19]:
indicators = wb.series.info(q='gdp growth')
indicators

id,value
NY.GDP.MKTP.KD.ZG,GDP growth (annual %)
,1 elements


In [20]:

# gdp growth rate
gdp = wb.data.DataFrame('NY.GDP.MKTP.KD.ZG', time=range(2006, 2021), labels=True).reset_index()
gdp = gdp.melt(id_vars=['economy', 'Country'], var_name='Year', value_name='GDP_growth_rate')
gdp.insert(3, "temp", gdp["Year"].str[2:])
gdp = gdp.drop(columns=['Year'])
gdp = gdp.rename(columns={'temp': 'Year'})
gdp = gdp.astype({'Year': 'int64'})
gdp = gdp.sort_values(['economy', 'Year'])
gdp = gdp.dropna()
print(gdp.shape)
print(gdp.info())

(3829, 4)
<class 'pandas.core.frame.DataFrame'>
Index: 3829 entries, 207 to 3724
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   economy          3829 non-null   object 
 1   Country          3829 non-null   object 
 2   Year             3829 non-null   int64  
 3   GDP_growth_rate  3829 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 149.6+ KB
None


In [21]:
gdp.head()

Unnamed: 0,economy,Country,Year,GDP_growth_rate
207,ABW,Aruba,2006,1.127412
473,ABW,Aruba,2007,3.089544
739,ABW,Aruba,2008,1.835756
1005,ABW,Aruba,2009,-11.677742
1271,ABW,Aruba,2010,-2.733456


In [22]:
# create gdp growth rate lag variable
gdp['GDP_growth_rate_lag'] = gdp.groupby('economy')['GDP_growth_rate'].shift(1)

gdp.head(18)

Unnamed: 0,economy,Country,Year,GDP_growth_rate,GDP_growth_rate_lag
207,ABW,Aruba,2006,1.127412,
473,ABW,Aruba,2007,3.089544,1.127412
739,ABW,Aruba,2008,1.835756,3.089544
1005,ABW,Aruba,2009,-11.677742,1.835756
1271,ABW,Aruba,2010,-2.733456,-11.677742
1537,ABW,Aruba,2011,3.369238,-2.733456
1803,ABW,Aruba,2012,-1.0408,3.369238
2069,ABW,Aruba,2013,6.431482,-1.0408
2335,ABW,Aruba,2014,-1.586575,6.431482
2601,ABW,Aruba,2015,-0.623626,-1.586575


In [10]:
# !pip install yfinance

Collecting yfinance
  Downloading yfinance-0.2.40-py2.py3-none-any.whl.metadata (11 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting lxml>=4.9.1 (from yfinance)
  Downloading lxml-5.2.2-cp39-cp39-win_amd64.whl.metadata (3.5 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.4-cp39-cp39-win_amd64.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.17.5.tar.gz (3.0 MB)
     ---------------------------------------- 0.0/3.0 MB ? eta -:--:--
     - -------------------------------------- 0.1/3.0 MB 2.6 MB/s eta 0:00:02
     --- ------------------------------------ 0.2/3.0 MB 2.8 MB/s eta 0:00:01
     ---- ----------------------------------- 0.3/3.0 MB 2.5 MB/s eta 0:00:02
     ----- ---------------------------------- 0.4/3.0 MB 2.1 MB/s eta 0:00:02
     ------ --------------------------------- 0.5/3.0 MB 2.2 MB/s eta 0:00:02
     ------- -------------



In [11]:
# Get VIX data
vix = yf.download('^VIX', start='2006-12-01', end='2020-12-01')

# Resample to monthly frequency and get the last trading day's data of each month
vix_monthly = vix.resample('M').last()

# Print the resampled data
print(vix_monthly.head())

[*********************100%%**********************]  1 of 1 completed

                 Open       High    Low  Close  Adj Close  Volume
Date                                                             
2007-01-31  11.090000  11.260000  10.27  10.42      10.42       0
2007-02-28  17.209999  17.290001  14.50  15.42      15.42       0
2007-03-31  14.940000  15.820000  14.14  14.64      14.64       0
2007-04-30  12.900000  14.310000  12.78  14.22      14.22       0
2007-05-31  12.780000  13.180000  12.62  13.05      13.05       0





In [12]:
vix_monthly = vix_monthly.reset_index()

# keep only Close column
vix_monthly = vix_monthly[['Date', 'Close']]
vix_monthly = vix_monthly.rename(columns={'Close': 'VIX'})

vix_monthly.head()

Unnamed: 0,Date,VIX
0,2007-01-31,10.42
1,2007-02-28,15.42
2,2007-03-31,14.64
3,2007-04-30,14.22
4,2007-05-31,13.05


In [13]:
# change the date format so that the day of the month is always 01
vix_monthly['Date'] = vix_monthly['Date'].dt.to_period('M').dt.to_timestamp()

vix_monthly.head()

Unnamed: 0,Date,VIX
0,2007-01-01,10.42
1,2007-02-01,15.42
2,2007-03-01,14.64
3,2007-04-01,14.22
4,2007-05-01,13.05


In [14]:
# create one-month lagged VIX
vix_monthly['VIX_lag'] = vix_monthly['VIX'].shift(1)

vix_monthly.head()

Unnamed: 0,Date,VIX,VIX_lag
0,2007-01-01,10.42,
1,2007-02-01,15.42,10.42
2,2007-03-01,14.64,15.42
3,2007-04-01,14.22,14.64
4,2007-05-01,13.05,14.22
