In [1]:
import pandas as pd
import datetime as dt
import numpy as np
import statsmodels.tsa.filters.hp_filter as hpfilter
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

In [2]:
start_year = 1970
end_year = 2023
year_obs = end_year - start_year +1

In [3]:
dfWolf = pd.read_excel("../CBRDATA/LDTEST1.XLS", dtype = np.float64, na_values = '.')[:-3]
dfWolf['NAM'] = dfWolf['NAM'].astype('int64')
dfWolf['YRS'] = dfWolf['YRS'].astype('int64')
wolf_country_list = set(dfWolf['NAM'])
print(len(set(dfWolf['NAM'])))
dfWolf

167


Unnamed: 0,NAM,YRS,NGDP,NGDP_R,NGDPD,PPPPC,BCA,GCB,GGB,NX,...,Unnamed: 246,Unnamed: 247,Unnamed: 248,Unnamed: 249,Unnamed: 250,Unnamed: 251,Unnamed: 252,Unnamed: 253,Unnamed: 254,Unnamed: 255
0,111,1970,1040.0,3578.0,1040.00,4968.0,2.329,-13.23,-11.400,56.98,...,0.0,,,0.0,0.0,0.0,,0.0,,0.0
1,111,1971,1129.0,3698.0,1129.00,5325.0,-1.435,-21.73,-19.250,59.35,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,111,1972,1240.0,3898.0,1240.00,5791.0,-5.797,-17.25,-3.825,66.23,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,111,1973,1386.0,4123.0,1386.00,6409.0,7.138,-6.50,6.950,91.75,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,111,1974,1501.0,4099.0,1501.00,6879.0,1.958,-11.55,-4.450,124.30,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5506,968,1998,373800.0,371300.0,42.09,6138.0,-2.986,-11420.00,-20190.000,87720.00,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
5507,968,1999,545700.0,366800.0,35.73,6094.0,-1.429,-18780.00,-19650.000,158500.00,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
5508,968,2000,800300.0,373400.0,36.90,6339.0,-1.439,-15580.00,-31860.000,272600.00,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
5509,968,2001,1154000.0,393200.0,39.64,6811.0,-2.339,-20920.00,-38640.000,418300.00,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
per_capita_series = {series:series+'PC' for series in ['NGDP','NGDPD']}
per_capita_series['NGDP_R']='NGDPRPC'

for (orig_series, capita_series) in per_capita_series.items():
    dfWolf.insert(dfWolf.columns.get_loc(orig_series) + 1,capita_series,dfWolf[orig_series]/dfWolf['LP'])

In [5]:
natural_log_series = {series:series+'_LN' for series in ['NGDP_R']}

for (orig_series, ln_series) in natural_log_series.items():
    dfWolf.insert(dfWolf.columns.get_loc(orig_series) + 1,ln_series, np.log(dfWolf[orig_series]))

In [6]:
real_series = {series:series+'_R' for series in ['FMB','NX','NM']}

for (orig_series, real_series) in real_series.items():
    dfWolf.insert(dfWolf.columns.get_loc(orig_series) + 1,real_series,dfWolf[orig_series]/dfWolf['PCPI'])

In [7]:
for country in set(dfWolf['NAM']):
    numerator = dfWolf.loc[(dfWolf['NAM'] == country) & (dfWolf['YRS'] == start_year), 'PPPPC'].values
    denominator = dfWolf.loc[(dfWolf['NAM'] == 111) & (dfWolf['YRS'] == start_year), 'PPPPC'].values 
    dfWolf.loc[(dfWolf['NAM'] == country), 'GAP_US'] = np.divide(numerator, denominator)[0]*100
   

In [8]:
growth_pct_series = {series:series+'PCH' for series in ['NGDP_R','NGDPRPC','NGDP','NGDPPC','NGDPD','NGDPDPC','PCPI','TX_R','TXG_R','TM_R','TMG_R','FMB','FMB_R','LP','NGDPRPC','TT','NX_R','NM_R']}

for (orig_series, growth_series) in growth_pct_series.items():
    dfWolf.insert(dfWolf.columns.get_loc(orig_series) + 1,growth_series, dfWolf[orig_series].pct_change()*100)
    dfWolf.loc[dfWolf['YRS'] == start_year, growth_series] = np.nan
    dfWolf.loc[dfWolf[orig_series].isna(), growth_series] = np.nan
    dfWolf.loc[dfWolf[orig_series].shift(1).isna(), growth_series] = np.nan

In [9]:
# Ex-Post Real Interest Rate
dfWolf.insert(dfWolf.columns.get_loc('IRATE') + 1,'RRATE', dfWolf['IRATE']-dfWolf['PCPIPCH'].shift(-1))
dfWolf.loc[dfWolf['YRS'] == end_year, 'RRATE'] = np.nan

In [10]:
# Rescaled series
rescale_series = {series:series+'_X' for series in ['PCPIPCH','FMBPCH']}

for (orig_series, rescaled_series) in rescale_series.items():
    dfWolf.insert(dfWolf.columns.get_loc(orig_series) + 1,rescaled_series, 100*dfWolf[orig_series]/(100+dfWolf[orig_series]))

In [11]:
hp_filter_series = {series:series+'_HP' for series in ['NGDP_R','NGDP_R_LN']}

for (orig_series, hp_series) in hp_filter_series.items():
    dfWolf.insert(dfWolf.columns.get_loc(orig_series) + 1,hp_series, np.nan)
    for country in set(dfWolf['NAM']):
        [cycle, trend] = hpfilter.hpfilter(dfWolf.loc[dfWolf['NAM'] == country,orig_series], lamb = 100)
        dfWolf.loc[dfWolf['NAM'] == country,hp_series] = trend

hp_gap_series = {series:series+'_GAP' for series in ['NGDP_R_LN']}
for (orig_series, gap_series) in hp_gap_series.items():
    dfWolf.insert(dfWolf.columns.get_loc(orig_series) + 1,gap_series,dfWolf[orig_series+'_HP'] - dfWolf[orig_series])


In [12]:
dfWolf.to_excel('Wolf-base.xlsx')