In [7]:
resource = "../../data/generated/"
results = "../../data/generated/"

In [8]:
import pandas as pd
import numpy as np
from linearmodels.panel import PanelOLS, compare
import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
from gmm import *
from linearmodels.asset_pricing import LinearFactorModelGMM
from tabprintin.beautify import *
from statsmodels.sandbox.regression.gmm import GMM
from numpy import hstack, zeros, ones, array, mat, tile, reshape, squeeze, eye, asmatrix, column_stack, roll

In [9]:
# Set the start and end dates of the analysis period
# start_date = pd.to_datetime('1978-01-01')
start_date = pd.to_datetime('1975-01-01')
end_date = pd.to_datetime('2008-04-30')

In [10]:
ts = pd.read_csv(resource + 'time_series.csv', parse_dates=['date'], index_col=['date'])
ts.index.freq = 'M'

# Compute the log change of industrial production over next 12 months (or just growth)
# ts['log_indprod_growth_nextyear'] = np.log(ts['ind_prod'].shift(-12) / ts['ind_prod'].shift(-1)) #wrong
ts['log_indprod_growth_nextyear'] = np.log(ts['ind_prod'].shift(-12) / ts['ind_prod'])
# ts['indprod_growth_nextyear'] = ts['ind_prod'].shift(-12) / ts['ind_prod'].shift(-1) - 1

##################
## Base assets ###
##################

# Market return
# ts['ex_mkt'] = ts['ex_mkt'] /100
ts['lag_ex_mkt'] = ts['ex_mkt'].shift(1)

# Compute the excess return of the long-term government bond portfolio (bloomberg data)
# ts['ex_long_gov_ret'] = ts['long_gov_ret'] - ts['rf']
# ts['lag_ex_long_gov_ret'] = ts['ex_long_gov_ret'].shift(1)

# Compute the excess return of the 10 year treasury government bond
ts['ex_b10ret'] = ts['b10ret'] - ts['rf']
ts['lag_ex_b10ret'] = ts['ex_b10ret'].shift(1)

# Compute the excess return of the intermediate-term government bond portfolio (bloomberg data)
# ts['ex_medium_gov_ret'] = ts['medium_gov_ret'] - ts['rf']
# ts['lag_ex_medium_gov_ret'] = ts['ex_medium_gov_ret'].shift(1)

# Compute the excess return of the 5 year treasury government bond
ts['ex_b5ret'] = ts['b5ret'] - ts['rf']
ts['lag_ex_b5ret'] = ts['ex_b5ret'].shift(1)

# Compute the excess return of the high-yield bond portfolio
ts['ex_high_yd_bd_ret'] = ts['high_yd_bd_ret'] - ts['rf']
ts['lag_ex_high_yd_bd_ret'] = ts['ex_high_yd_bd_ret'].shift(1)

# Compute the return for gold index
ts['ex_gold_ret'] = ts['gold'].pct_change() - ts['rf']
ts['lag_ex_gold_ret'] = ts['ex_gold_ret'].shift(1)

# Create dummies for 1987 (stock market crash) and 1996-2002 (Internet bubble period)
ts['dummy_87'] = (ts.index.year == 1987).astype(int)
ts['dummy_96_02'] = ((ts.index.year >= 1996) & (ts.index.year <= 2002)).astype(int)

#########################
### Control Variables ###
#########################
# Compute the 10 year minus 3 month government bond yield
ts['lag_10y_3m_gov_bd_yd'] = (ts['DGS10'] - ts['DTB3']).shift(1)

# Compute the 1 year minus 3 month government bond yield
ts['lag_1y_3m_gov_bd_yd'] = (ts['DGS1'] - ts['DTB3']).shift(1)

# Baa minus Aaa corporate bond yield
ts['lag_Baa_Aaa_bd_yd'] = (ts['BAA'] - ts['AAA']).shift(1)

# Compute the dividend yield on the S&P 500 index
ts['lag_sp_div_yd'] = (ts['sp_div'] / ts['sp_price']).shift(1)

# Compute the log change of industrial production over last 12 months (or just growth)
ts['log_indprod_growth_lastyear'] = np.log(ts['ind_prod'].shift(13) / ts['ind_prod'].shift(1))
# ts['indprod_growth_lastyear'] = ts['ind_prod'].shift(13) / ts['ind_prod'].shift(1)  - 1

# Compute the inflation over last 12 months
# ts['infl_lastyear'] = (ts['cpi'].shift(1) - ts['cpi'].shift(13)) / ts['cpi'].shift(13)
ts['infl_lastyear'] = np.log(ts['cpi'].shift(1) / ts['cpi'].shift(13))

# Compute the market portfolio excess return over last 12 months
# [Controllare e sbagliato]
ts['ex_mkt_lastyear'] = (((ts['ex_mkt'] + 100)/100).rolling(13).apply(lambda x: x[:-1].prod()) - 1) * 100

# Interactions
ts['slope_ex_mkt_87'] = ts['ex_mkt'] * ts['dummy_87']
ts['slope_ex_mkt_9602'] = ts['ex_mkt'] * ts['dummy_96_02']

ts['lag_slope_ex_mkt_87'] = ts['slope_ex_mkt_87'].shift(1)
ts['lag_slope_ex_mkt_9602'] = ts['slope_ex_mkt_9602'].shift(1)

In [11]:
# Set analysis period
ts1 = ts.loc[(ts.index >= start_date) & (ts.index <= end_date)]
# ts1 = ts.loc[(ts.index >= pd.to_datetime('1975-02-01')) & (ts.index <= end_date)]

# with Bloomberg bond portfolio return data
# mod1 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_long_gov_ret+ex_medium_gov_ret+ex_high_yd_bd_ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear',
#                 data=ts1)

# mod2 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_long_gov_ret+ex_medium_gov_ret+ex_high_yd_bd_ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear+ lag_ex_mkt+lag_ex_long_gov_ret+lag_ex_medium_gov_ret+lag_ex_high_yd_bd_ret+lag_ex_gold_ret+lag_slope_ex_mkt_87+lag_slope_ex_mkt_9602',
#                 data=ts1)

# mod1 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_b10ret+ex_b5ret+ex_high_yd_bd_ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear',
#                 data=ts1)

# mod2 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_b10ret+ex_b5ret+ex_high_yd_bd_ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear+ lag_ex_mkt+lag_ex_b10ret+lag_ex_b5ret+lag_ex_high_yd_bd_ret+lag_ex_gold_ret+lag_slope_ex_mkt_87+lag_slope_ex_mkt_9602',
#                 data=ts1)

mod1 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_b10ret+ex_b5ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear',
                data=ts1)

mod2 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_b10ret+ex_b5ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear+ lag_ex_mkt+lag_ex_b10ret+lag_ex_b5ret+lag_ex_gold_ret+lag_slope_ex_mkt_87+lag_slope_ex_mkt_9602',
                data=ts1)

mimick1 = mod1.fit(cov_type='HAC',cov_kwds={'maxlags':11})
mimick2 = mod2.fit(cov_type='HAC',cov_kwds={'maxlags':11})

%store mimick1
%store mimick2
mimick2.summary()

Stored 'mimick1' (RegressionResultsWrapper)
Stored 'mimick2' (RegressionResultsWrapper)


0,1,2,3
Dep. Variable:,log_indprod_growth_nextyear,R-squared:,0.42
Model:,OLS,Adj. R-squared:,0.39
Method:,Least Squares,F-statistic:,8.595
Date:,"Wed, 10 May 2023",Prob (F-statistic):,5.670000000000001e-21
Time:,17:42:50,Log-Likelihood:,817.04
No. Observations:,400,AIC:,-1592.0
Df Residuals:,379,BIC:,-1508.0
Df Model:,20,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0263,0.015,1.703,0.089,-0.004,0.057
ex_mkt,0.1306,0.086,1.515,0.130,-0.038,0.300
ex_b10ret,-0.4243,0.188,-2.262,0.024,-0.792,-0.057
ex_b5ret,0.4648,0.226,2.057,0.040,0.022,0.908
ex_gold_ret,-0.0481,0.028,-1.699,0.089,-0.104,0.007
slope_ex_mkt_87,-0.0809,0.084,-0.968,0.333,-0.245,0.083
slope_ex_mkt_9602,0.0098,0.104,0.094,0.925,-0.194,0.214
rf,-9.4570,4.197,-2.253,0.024,-17.683,-1.231
lag_10y_3m_gov_bd_yd,-0.1142,0.543,-0.210,0.833,-1.178,0.950

0,1,2,3
Omnibus:,148.221,Durbin-Watson:,0.233
Prob(Omnibus):,0.0,Jarque-Bera (JB):,737.201
Skew:,-1.521,Prob(JB):,8.29e-161
Kurtosis:,8.914,Cond. No.,1070.0


In [12]:
# class mimick(GMM):
#     def momcond(self, params):
#         x = self.exog
#         y = self.endog
#         T,K = x.shape
#         y = reshape(y,(T,1))
#         b = params
#         b = squeeze(array(params)) 
#         b = reshape(b,(K,1))
#         err = (y - x @ b)
#         moments = x * kron(err,ones((1,K)))
#         return moments

# # gmm_mod = mimick(endog=log_indprod_growth_nextyear, exog=macro_factors, instrument=None)
# # gmm_fit8 = gmm_mod.fit(start_params=zeros(23), maxiter=1, inv_weights=eye(N), weights_method='hac', wargs={'maxlag':11}, optim_method='bfgs', optim_args={'gtol': 1e-10, 'maxiter': 1000})
# # gmm_fit8.summary()

# mimick_exog = ts1[['ex_mkt',
#                    'ex_b10ret',
#                    'ex_b5ret',
#                 #    'ex_high_yd_bd_ret',
#                    'ex_gold_ret', 
#                    'slope_ex_mkt_87', 
#                    'slope_ex_mkt_9602', 
#                    'rf', 
#                    'lag_10y_3m_gov_bd_yd',
#                    'lag_1y_3m_gov_bd_yd',
#                    'lag_Baa_Aaa_bd_yd',
#                    'lag_sp_div_yd',
#                    'log_indprod_growth_lastyear',
#                    'infl_lastyear',
#                    'ex_mkt_lastyear',
#                    'lag_ex_mkt',
#                    'lag_ex_b10ret',
#                    'lag_ex_b5ret',
#                    'lag_ex_gold_ret',
#                    'lag_slope_ex_mkt_87',
#                    'lag_slope_ex_mkt_9602'
#                    ]]
# mimick_exog = sm.add_constant(mimick_exog)
# mimick_endog = ts1['log_indprod_growth_nextyear']
# T,K = mimick_exog.shape


# mimick_mod = mimick(endog=mimick_endog, exog=mimick_exog, instrument=None)
# mimick_fit = mimick_mod.fit(start_params=np.zeros(K), maxiter=1, inv_weights=np.eye(K), weights_method='hac', wargs={'maxlag':11}, optim_method='bfgs', optim_args={'gtol': 1e-12, 'maxiter': 1000})
# # gmm_fit8 = gmm_mod.fit(start_params=np.zeros(K), maxiter=1, inv_weights=np.eye(K), optim_method='bfgs', optim_args={'gtol': 1e-15, 'maxiter': 10000})
# mimick_fit.summary()

Summary statistics

In [13]:
# Get the one-year ahead industrial production growth expectatitions factor
coef = mimick2.params
# ts['myp'] = coef['ex_mkt'] * ts['ex_mkt'] + coef['ex_long_gov_ret'] * ts['ex_long_gov_ret'] + coef['ex_medium_gov_ret'] * ts['ex_medium_gov_ret'] + coef['ex_high_yd_bd_ret'] * ts['ex_high_yd_bd_ret'] + coef['ex_gold_ret'] * ts['ex_gold_ret'] + coef['slope_ex_mkt_87'] * ts['slope_ex_mkt_87'] + coef['slope_ex_mkt_9602'] * ts['slope_ex_mkt_9602']
ts['myp'] = coef['ex_mkt'] * ts['ex_mkt'] + coef['ex_b10ret'] * ts['ex_b10ret'] + coef['ex_b5ret'] * ts['ex_b5ret'] + coef['ex_gold_ret'] * ts['ex_gold_ret'] + coef['slope_ex_mkt_87'] * ts['slope_ex_mkt_87'] + coef['slope_ex_mkt_9602'] * ts['slope_ex_mkt_9602']

# def compute_myp(row):
#     if pd.isna(row['ex_high_yd_bd_ret']):
#         return coef['ex_mkt'] * row['ex_mkt'] + coef['ex_long_gov_ret'] * row['ex_long_gov_ret'] + coef['ex_medium_gov_ret'] * row['ex_medium_gov_ret'] + coef['ex_gold_ret'] * row['ex_gold_ret'] + coef['slope_ex_mkt_87'] * row['slope_ex_mkt_87'] + coef['slope_ex_mkt_9602'] * row['slope_ex_mkt_9602']
#     else:
#         return coef['ex_mkt'] * row['ex_mkt'] + coef['ex_long_gov_ret'] * row['ex_long_gov_ret'] + coef['ex_medium_gov_ret'] * row['ex_medium_gov_ret'] + coef['ex_high_yd_bd_ret'] * row['ex_high_yd_bd_ret'] + coef['ex_gold_ret'] * row['ex_gold_ret'] + coef['slope_ex_mkt_87'] * row['slope_ex_mkt_87'] + coef['slope_ex_mkt_9602'] * row['slope_ex_mkt_9602']
    
# def compute_myp(row):
#     if pd.isna(row['ex_high_yd_bd_ret']):
#         return coef['ex_mkt'] * row['ex_mkt'] + coef['ex_b10ret'] * row['ex_b10ret'] + coef['ex_b5ret'] * row['ex_b5ret'] + coef['ex_gold_ret'] * row['ex_gold_ret'] + coef['slope_ex_mkt_87'] * row['slope_ex_mkt_87'] + coef['slope_ex_mkt_9602'] * row['slope_ex_mkt_9602']
#     else:
#         return coef['ex_mkt'] * row['ex_mkt'] + coef['ex_b10ret'] * row['ex_b10ret'] + coef['ex_b5ret'] * row['ex_b5ret'] + coef['ex_high_yd_bd_ret'] * row['ex_high_yd_bd_ret'] + coef['ex_gold_ret'] * row['ex_gold_ret'] + coef['slope_ex_mkt_87'] * row['slope_ex_mkt_87'] + coef['slope_ex_mkt_9602'] * row['slope_ex_mkt_9602']

# ts['myp'] = ts.apply(compute_myp, axis=1)

# Get the unexpected inflation factor
ts['infl'] = np.log(ts['cpi'] / ts['cpi'].shift(1))
ts['delta_infl'] = ts['infl'] - ts['infl'].shift(1)
inf_ma1 = sm.tsa.arima.ARIMA(ts['delta_infl'], order=(0,0,1)).fit()
ts['fit_delta_infl'] = inf_ma1.fittedvalues
ts['ui'] = ts['delta_infl'] - ts['fit_delta_infl']

# Get the change in the aggregate survival probability factor (with my dsv)
mod3 = smf.ols('v_dsv ~ 1 + my_dsv', data=ts).fit()
parm = mod3.params
ts['fit_dsv'] = parm['Intercept'] + parm['my_dsv'] * ts['my_dsv']
ts['dsv'] = np.where(ts['v_dsv'].notna(), ts['v_dsv'], ts['fit_dsv'])

# Get the change in the aggregate survival probability factor (with dsv from CRI)
# mod3 = smf.ols('v_dsv ~ 1 + cri_dsv', data=ts).fit()
# parm = mod3.params
# ts['fit_dsv'] = parm['Intercept'] + parm['cri_dsv'] * ts['cri_dsv']
# ts['dsv'] = np.where(ts['v_dsv'].notna(), ts['v_dsv'], ts['fit_dsv'])

# Get the change in the average level of the term structure factor
ts['mean_term_structure'] = ts[['DTB3', 'DGS10']].mean(axis=1)
ts['ats'] = ts['mean_term_structure'] - ts['mean_term_structure'].shift(1)

# Get the change in the slope of the term strucutre factor
ts['diff_term_structure'] = ts['DGS10'] - ts['DTB3']
ts['sts'] = ts['diff_term_structure'] - ts['diff_term_structure'].shift(1)

# Get the change in the multilateral US dollar exchange rate factor
mod4 = smf.ols('TWEXM ~ 1 + DTWEXAFEGS', data=ts).fit()
parm = mod4.params
ts['fit_TWEXM'] = parm['Intercept'] + parm['DTWEXAFEGS'] * ts['DTWEXAFEGS']
ts['exchange_rate'] = np.where(ts['TWEXM'].notna(), ts['TWEXM'], ts['fit_TWEXM'])
ts['fx'] = ts['exchange_rate'] - ts['exchange_rate'].shift(1)
# ts['fx'] = np.log(ts['exchange_rate'] / ts['exchange_rate'].shift(1))


In [14]:
# ts['lag_hml'] = ts['hml'].shift(1)
# ts['lag_smb'] = ts['smb'].shift(1)
# ts['lag_mom'] = ts['mom'].shift(1)

# ts['lag_myp'] = ts['myp'].shift(1)
# ts['lag_ui'] = ts['ui'].shift(1)
# ts['lag_dsv'] = ts['dsv'].shift(1)
# ts['lag_ats'] = ts['ats'].shift(1)
# ts['lag_sts'] = ts['sts'].shift(1)
# ts['lag_fx'] = ts['fx'].shift(1)

In [15]:
# save the dataset so far
ts.to_csv(results + 'time_series_1.csv')

In [16]:
# Set the start and end dates of the analysis period
# start_date = pd.to_datetime('1975-01-01') ##############
# end_date = pd.to_datetime('1999-12-31') ##############
ts2 = ts.loc[(ts.index >= start_date) & (ts.index <= end_date)]

summary = ts2[['myp','ui','dsv','ats','sts','fx']].describe().T  
%store summary

Stored 'summary' (DataFrame)


Granger causality

In [17]:
# Create VAR model with constant term
# model = VAR(ts2[['hml','smb','mom','myp','ui','dsv','ats','sts','fx']])
# var = model.fit(maxlags=1, trend='c')
# %store var
# var.summary()

In [18]:
# class var(GMM):
#     def momcond(self, params):
#         x = self.exog
#         y = self.endog
#         T,K = x.shape
#         T,N = y.shape
#         y = reshape(y,(T,N))
#         x = reshape(x,(T,K))
#         b = params
#         b = squeeze(array(params)) 
#         b = reshape(b,(K,N))
#         err = (y - x @ b)
#         moments = tile(x, (1, N)) * kron(err,ones((1,K)))
#         return moments
    
# var_exog = ts2[['lag_hml','lag_smb','lag_mom','lag_myp','lag_ui','lag_dsv','lag_ats','lag_sts','lag_fx']]
# # var_exog = var_exog.iloc[1:]
# var_exog = sm.add_constant(var_exog)
# var_endog = ts2[['hml','smb','mom','myp','ui','dsv','ats','sts','fx']]
# # var_endog = var_endog.iloc[1:]
# T,K = var_exog.shape
# T,N = var_endog.shape

# var_mod = var(endog=var_endog, exog=var_exog, instrument=None)
# var_fit = var_mod.fit(start_params=zeros((K,N)), maxiter=1, inv_weights=eye(N*K), optim_method='bfgs', optim_args={'gtol': 1e-12, 'maxiter': 1000})
# # var_fit = var_mod.fit(start_params=zeros((K+1),9), maxiter=1, inv_weights=eye(N*(K+1)), optim_method='bfgs', optim_args={'gtol': 1e-15, 'maxiter': 1000})
# # gmm_fit8 = gmm_mod.fit(start_params=np.zeros(K), maxiter=1, inv_weights=np.eye(K), optim_method='bfgs', optim_args={'gtol': 1e-15, 'maxiter': 10000})
# print(var_fit.summary())
# coeff = reshape(var_fit.params,(K,N))
# print(coeff)

In [26]:
mimick = ts2[['log_indprod_growth_nextyear',
                    'ex_mkt',
                   'ex_b10ret',
                   'ex_b5ret',
                #    'ex_high_yd_bd_ret',
                   'ex_gold_ret', 
                   'slope_ex_mkt_87', 
                   'slope_ex_mkt_9602', 
                   'rf', 
                   'lag_10y_3m_gov_bd_yd',
                   'lag_1y_3m_gov_bd_yd',
                   'lag_Baa_Aaa_bd_yd',
                   'lag_sp_div_yd',
                   'log_indprod_growth_lastyear',
                   'infl_lastyear',
                   'ex_mkt_lastyear',
                   'lag_ex_mkt',
                   'lag_ex_b10ret',
                   'lag_ex_b5ret',
                   'lag_ex_gold_ret',
                   'lag_slope_ex_mkt_87',
                   'lag_slope_ex_mkt_9602'
                   ]].values
T,K = mimick.shape
# mimick_endog = ts2[['log_indprod_growth_nextyear']].values
var_exog = ts2[['hml','smb','mom','ui','dsv','ats','sts','fx']].values
T,M = var_exog.shape


class var(GMM):
    def momcond(self, params):
        var_exog = self.exog
        y = self.endog
        mimick = self.instrument
        coeff = squeeze(array(params)) 
        
        # first stage
        mimick_endog = mimick[:,0]
        mimick_exog = mimick[:,1:]
        mimick_exog = sm.add_constant(mimick_exog)
        T,K = mimick_exog.shape
        mimick_exog = reshape(mimick_exog,(T,K))
        mimick_endog = reshape(mimick_endog,(T,1))
        mimick_coeff = coeff[:K]
        mimick_coeff = reshape(mimick_coeff,(K,1))
        mimick_err = (mimick_endog - mimick_exog @ mimick_coeff)
        moments1 = mimick_exog * kron(mimick_err,ones((1,K)))
        moments1 = moments1[1:] # delete the first row

        # compute myp
        myp = mimick[:, 1:7] @ mimick_coeff[1:7] 

        # var
        var_exog = column_stack((myp, var_exog))
        var_lag = var_exog[:-1]
        var_lag = sm.add_constant(var_lag)
        var_exog = var_exog[1:]
        T,M = var_exog.shape
        T,N = var_lag.shape
        var_exog = reshape(var_exog,(T,M))
        var_lag = reshape(var_lag,(T,N))
        var_coeff = params[K:]
        var_coeff = reshape(var_coeff,(N,M))
        var_err = (var_exog - var_lag @ var_coeff)
        moments2 = tile(var_lag, (1, M)) * kron(var_err,ones((1,N)))
        return column_stack((moments1,moments2))
    
var_mod = var(endog=zeros(var_exog.shape[0]), exog=var_exog, instrument=mimick)
var_fit = var_mod.fit(start_params=zeros(K+(M+1)*(M+2)), maxiter=1, inv_weights=eye(K+(M+1)*(M+2)), optim_method='bfgs', optim_args={'gtol': 1e-15, 'maxiter': 10000})
var_fit.summary()

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 2857
         Function evaluations: 2914
         Gradient evaluations: 2914


0,1,2,3
Dep. Variable:,y,Hansen J:,1.606e-18
Model:,var,Prob (Hansen J):,
Method:,GMM,,
Date:,"Wed, 10 May 2023",,
Time:,18:07:10,,
No. Observations:,400,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p 0,0.0256,0.007,3.825,0.000,0.013,0.039
p 1,0.1195,0.065,1.833,0.067,-0.008,0.247
p 2,-0.4215,0.193,-2.182,0.029,-0.800,-0.043
p 3,0.4659,0.315,1.479,0.139,-0.152,1.083
p 4,-0.0443,0.032,-1.374,0.170,-0.108,0.019
p 5,-0.0657,0.087,-0.759,0.448,-0.235,0.104
p 6,0.0202,0.099,0.205,0.837,-0.173,0.213
p 7,-9.2779,1.946,-4.768,0.000,-13.092,-5.464
p 8,-0.0905,0.247,-0.367,0.714,-0.574,0.393


In [None]:
mimick_exog = ts2[['ex_mkt',
                   'ex_b10ret',
                   'ex_b5ret',
                #    'ex_high_yd_bd_ret',
                   'ex_gold_ret', 
                   'slope_ex_mkt_87', 
                   'slope_ex_mkt_9602', 
                   'rf', 
                   'lag_10y_3m_gov_bd_yd',
                   'lag_1y_3m_gov_bd_yd',
                   'lag_Baa_Aaa_bd_yd',
                   'lag_sp_div_yd',
                   'log_indprod_growth_lastyear',
                   'infl_lastyear',
                   'ex_mkt_lastyear',
                   'lag_ex_mkt',
                   'lag_ex_b10ret',
                   'lag_ex_b5ret',
                   'lag_ex_gold_ret',
                   'lag_slope_ex_mkt_87',
                   'lag_slope_ex_mkt_9602'
                   ]].values
T,K = mimick_exog.shape
mimick_endog = ts2[['log_indprod_growth_nextyear']].values
var_endog = ts2[['hml','smb','mom','ui','dsv','ats','sts','fx']].values
T,M = var_endog.shape


class var(GMM):
    def momcond(self, params):
        x = self.exog
        y = self.endog
        z = self.instrument
        coeff = squeeze(array(params)) 
        
        # first stage
        z = sm.add_constant(z)
        T,K = z.shape
        z = reshape(z,(T,K))
        x = reshape(x,(T,1))
        mimick_coeff = coeff[:K]
        mimick_coeff = reshape(mimick_coeff,(K,1))
        mimick_err = (x - z @ mimick_coeff)
        moments1 = z * kron(mimick_err,ones((1,K)))
        moments1 = moments1[1:] # delete the first row

        # compute myp
        myp = z[:, 1:7] @ mimick_coeff[1:7] 

        # var
        y = column_stack((myp, y))
        y_lag = y[:-1]
        y_lag = sm.add_constant(y_lag)
        y = y[1:]
        T,M = y.shape
        T,N = y_lag.shape
        y = reshape(y,(T,M))
        y_lag = reshape(y_lag,(T,N))
        var_coeff = params[K:]
        var_coeff = reshape(var_coeff,(N,M))
        var_err = (y - y_lag @ var_coeff)
        moments2 = tile(y_lag, (1, M)) * kron(var_err,ones((1,N)))
        return column_stack((moments1,moments2))
    
var_mod = var(endog=var_endog, exog=mimick_endog, instrument=mimick_exog)
var_fit = var_mod.fit(start_params=zeros((K+1)+(M+1)*(M+2)), maxiter=1, inv_weights=eye((K+1)+(M+1)*(M+2)), optim_method='bfgs', optim_args={'gtol': 1e-15, 'maxiter': 10000})
var_fit.summary()


Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 2857
         Function evaluations: 2914
         Gradient evaluations: 2914


0,1,2,3
Dep. Variable:,"['y1', 'y2', 'y3', 'y4', 'y5', 'y6', 'y7', 'y8']",Hansen J:,1.606e-18
Model:,var,Prob (Hansen J):,
Method:,GMM,,
Date:,"Tue, 09 May 2023",,
Time:,19:17:05,,
No. Observations:,400,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p 0,0.0256,0.007,3.825,0.000,0.013,0.039
p 1,0.1195,0.065,1.833,0.067,-0.008,0.247
p 2,-0.4215,0.193,-2.182,0.029,-0.800,-0.043
p 3,0.4659,0.315,1.479,0.139,-0.152,1.083
p 4,-0.0443,0.032,-1.374,0.170,-0.108,0.019
p 5,-0.0657,0.087,-0.759,0.448,-0.235,0.104
p 6,0.0202,0.099,0.205,0.837,-0.173,0.213
p 7,-9.2779,1.946,-4.768,0.000,-13.092,-5.464
p 8,-0.0905,0.247,-0.367,0.714,-0.574,0.393


In [None]:
def reshape_varfit(item, nmimick, nvar):
    mimick = item[:nmimick]
    var = reshape(item[nmimick:],((nvar+2),(nvar+1)))
    row_2 = var[1]
    var = np.delete(var, 1, axis=0)
    var = np.insert(var, 4, row_2, axis=0)
    var = np.concatenate((var[:, 1:4], var[:, 0].reshape(-1, 1), var[:, 4:]), axis=1)
    return mimick, var

coeff_mimick, coeff_var = reshape_varfit(var_fit.params, K+1, M)
tvalues_mimick, tvalues_var = reshape_varfit(var_fit.tvalues, K+1, M)
pvalues_mimick, pvalues_var = reshape_varfit(var_fit.pvalues, K+1, M)

%store coeff_var
%store tvalues_var
%store pvalues_var

Stored 'coeff_var' (ndarray)
Stored 'tvalues_var' (ndarray)
Stored 'pvalues_var' (ndarray)


GMM

In [38]:
port = 'bm'
ports_data = pd.read_csv(resource + f'{port}_port.csv', parse_dates=['date'], index_col=['date'])
ts3 = pd.merge(ts, ports_data, on='date', how='left')

# Set the start and end dates of the analysis period
# start_date = pd.to_datetime('1984-01-01') ##############
# end_date = pd.to_datetime('1999-12-31') ##############
ts3 = ts3.loc[(ts3.index >= start_date) & (ts3.index <= end_date)]


mimick = ts3[['log_indprod_growth_nextyear',
                'ex_mkt',
                   'ex_b10ret',
                   'ex_b5ret',
                #    'ex_high_yd_bd_ret',
                   'ex_gold_ret', 
                   'slope_ex_mkt_87', 
                   'slope_ex_mkt_9602', 
                   'rf', 
                   'lag_10y_3m_gov_bd_yd',
                   'lag_1y_3m_gov_bd_yd',
                   'lag_Baa_Aaa_bd_yd',
                   'lag_sp_div_yd',
                   'log_indprod_growth_lastyear',
                   'infl_lastyear',
                   'ex_mkt_lastyear',
                   'lag_ex_mkt',
                   'lag_ex_b10ret',
                   'lag_ex_b5ret',
                   'lag_ex_gold_ret',
                   'lag_slope_ex_mkt_87',
                   'lag_slope_ex_mkt_9602'
                   ]].values
T,K = mimick.shape
exog_macro_factors = ts3[['ui','dsv','ats','sts','fx']].values
T,M = exog_macro_factors.shape
riskfree = ts3['rf'].values
portfolios = ts3[['dec_1','dec_2','dec_3','dec_4','dec_5','dec_6','dec_7','dec_8','dec_9','dec_10']].values
T,P = portfolios.shape
excessRet = portfolios - reshape(riskfree,(T,1))

class gmm_lambeta(GMM):
    def momcond(self, params):
        fRets = self.exog
        pRets = self.endog
        mimick = self.instrument
        coeff = squeeze(array(params)) 
        
        # first stage
        mimick_endog = mimick[:,0]
        mimick_exog = mimick[:,1:]
        mimick_exog = sm.add_constant(mimick_exog)
        T,K = mimick_exog.shape
        mimick_exog = reshape(mimick_exog,(T,K))
        mimick_endog = reshape(mimick_endog,(T,1))
        mimick_coeff = coeff[:K]
        mimick_coeff = reshape(mimick_coeff,(K,1))
        mimick_err = (mimick_endog - mimick_exog @ mimick_coeff)
        moments1 = mimick_exog * kron(mimick_err,ones((1,K)))
      #   moments1 = moments1[1:] # delete the first row

        # compute myp
        myp = mimick_exog[:, 1:7] @ mimick_coeff[1:7] 

        # gmm
        full_fRets = column_stack((myp, fRets))
        T,P = pRets.shape
        T,M = full_fRets.shape
        betalam_params = params[K:]
        # var_coeff = reshape(var_coeff,(N,M))
      #   print(P, M)
        beta = squeeze(array(betalam_params[:(P*M)]))
        lam = squeeze(array(betalam_params[(P*M):]))
        beta = reshape(beta,(P,M))
        lam = reshape(lam,(M,1))
        betalam = beta @ lam
        expectedRet = full_fRets @ beta.T
        e = pRets - expectedRet
        # print(M, P)
        moments_beta = kron(e,ones((1,M))) * tile(full_fRets,P)     # E[(R^{ex,i} - beta^i*FF)*FF]=0 (orthogon. conditions for the time series regression) 
        moments_lam = pRets - betalam.T    # E[R^{ex,i} – beta^i*lambda] = 0 (pricing equations using the MPR)
        moments2 = hstack((moments_beta,moments_lam))
        # print(moments2.shape)

        return column_stack((moments1,moments2))
    
lambeta_mod = gmm_lambeta(endog=excessRet, exog=exog_macro_factors, instrument=mimick)
lambeta_fit = lambeta_mod.fit(start_params=zeros(K+(P+1)*(M+1)), maxiter=1, inv_weights=eye(K+P*(M+2)), optim_method='bfgs', optim_args={'gtol': 1e-15, 'maxiter': 100000})
lambeta_fit.summary()

21 11 6
         Current function value: 0.000000
         Iterations: 61373
         Function evaluations: 64787
         Gradient evaluations: 64775


0,1,2,3
Dep. Variable:,"['y1', 'y2', 'y3', 'y4', 'y5', 'y6', 'y7', 'y8', 'y9', 'y10']",Hansen J:,1.158e-06
Model:,gmm_lambeta,Prob (Hansen J):,
Method:,GMM,,
Date:,"Thu, 11 May 2023",,
Time:,14:12:54,,
No. Observations:,400,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p 0,0.0285,0.007,4.141,0.000,0.015,0.042
p 1,0.1566,0.067,2.342,0.019,0.026,0.288
p 2,-1.0335,0.207,-4.994,0.000,-1.439,-0.628
p 3,1.3332,0.310,4.307,0.000,0.727,1.940
p 4,-0.0616,0.034,-1.814,0.070,-0.128,0.005
p 5,-0.1505,0.093,-1.612,0.107,-0.333,0.033
p 6,-0.0139,0.097,-0.143,0.886,-0.205,0.177
p 7,-7.2378,1.955,-3.702,0.000,-11.070,-3.406
p 8,0.2057,0.263,0.781,0.435,-0.311,0.722


In [43]:
# prova
port = 'bm'
ports_data = pd.read_csv(resource + f'{port}_port.csv', parse_dates=['date'], index_col=['date'])
ts3 = pd.merge(ts, ports_data, on='date', how='left')

# Set the start and end dates of the analysis period
# start_date = pd.to_datetime('1984-01-01') ##############
# end_date = pd.to_datetime('1999-12-31') ##############
ts3 = ts3.loc[(ts3.index >= start_date) & (ts3.index <= end_date)]


mimick = ts3[['log_indprod_growth_nextyear',
                'ex_mkt',
                   'ex_b10ret',
                   'ex_b5ret',
                #    'ex_high_yd_bd_ret',
                   'ex_gold_ret', 
                   'slope_ex_mkt_87', 
                   'slope_ex_mkt_9602', 
                   'rf', 
                   'lag_10y_3m_gov_bd_yd',
                   'lag_1y_3m_gov_bd_yd',
                   'lag_Baa_Aaa_bd_yd',
                   'lag_sp_div_yd',
                   'log_indprod_growth_lastyear',
                   'infl_lastyear',
                   'ex_mkt_lastyear',
                   'lag_ex_mkt',
                   'lag_ex_b10ret',
                   'lag_ex_b5ret',
                   'lag_ex_gold_ret',
                   'lag_slope_ex_mkt_87',
                   'lag_slope_ex_mkt_9602'
                   ]].values
T,K = mimick.shape
exog_macro_factors = ts3[['ui','dsv','ats','sts','fx']].values
T,M = exog_macro_factors.shape
riskfree = ts3['rf'].values
portfolios = ts3[['dec_1','dec_2','dec_3','dec_4','dec_5','dec_6','dec_7','dec_8','dec_9','dec_10']].values
T,P = portfolios.shape
excessRet = portfolios - reshape(riskfree,(T,1))

class gmm_lambeta(GMM):
    def momcond(self, params):
        fRets = self.exog
        pRets = self.endog
        mimick = self.instrument
        coeff = squeeze(array(params)) 
        
        # first stage
        mimick_endog = mimick[:,0]
        mimick_exog = mimick[:,1:]
        mimick_exog = sm.add_constant(mimick_exog)
        T,K = mimick_exog.shape
        mimick_exog = reshape(mimick_exog,(T,K))
        mimick_endog = reshape(mimick_endog,(T,1))
        mimick_coeff = coeff[:K]
        mimick_coeff = reshape(mimick_coeff,(K,1))
        mimick_err = (mimick_endog - mimick_exog @ mimick_coeff)
        moments1 = mimick_exog * kron(mimick_err,ones((1,K)))
      #   moments1 = moments1[1:] # delete the first row

        # compute myp
        myp = mimick_exog[:, 1:7] @ mimick_coeff[1:7] 

        # gmm
        full_fRets = column_stack((myp, fRets))
        T,P = pRets.shape
        T,M = full_fRets.shape
        betalam_params = params[K:]
        # var_coeff = reshape(var_coeff,(N,M))
      #   print(P, M)
        beta = squeeze(array(betalam_params[:(P*M)]))
        lam = squeeze(array(betalam_params[(P*M):]))
        beta = reshape(beta,(P,M))
        lam = reshape(lam,(M,1))
        betalam = beta @ lam
        expectedRet = full_fRets @ beta.T
        e = pRets - expectedRet
        # print(M, P)
        moments_beta = kron(e,ones((1,M))) * tile(full_fRets,P)     # E[(R^{ex,i} - beta^i*FF)*FF]=0 (orthogon. conditions for the time series regression) 
        moments_lam = pRets - betalam.T    # E[R^{ex,i} – beta^i*lambda] = 0 (pricing equations using the MPR)
        moments2 = hstack((moments_beta,moments_lam))
        # print(moments2.shape)

        return column_stack((moments1,moments2))
    
lambeta_mod = gmm_lambeta(endog=excessRet, exog=exog_macro_factors, instrument=mimick)
lambeta_fit = lambeta_mod.fit(start_params=zeros(K+(P+1)*(M+1)), maxiter=1, inv_weights=eye(K+P*(M+2)), weights_method='hac', wargs={'maxlag':11}, optim_method='bfgs', optim_args={'gtol': 1e-12, 'maxiter': 100000})
lambeta_fit.summary()

KeyboardInterrupt: 

In [None]:
# premia_port = []
# premia_t_stat_port = []
# beta_port = []
# beta_t_stat_port = []

# # For book-to-market portfolios
# for index,ports in enumerate(['bm','size','mom']):
#     ports_data = pd.read_csv(resource + f'{ports}_port.csv', parse_dates=['date'], index_col=['date'])
#     ts3 = pd.merge(ts, ports_data, on='date', how='left')

#     # Set the start and end dates of the analysis period
#     # start_date = pd.to_datetime('1984-01-01') ##############
#     # end_date = pd.to_datetime('1999-12-31') ##############
#     ts3 = ts3.loc[(ts3.index >= start_date) & (ts3.index <= end_date)]

#     macro_factors = ts3[['myp','ui','dsv','ats','sts','fx']].values
#     financial_factors = ts3[['ex_mkt','smb','hml','mom']].values
#     riskfree = ts3['rf'].values
#     portfolios = ts3[['dec_1','dec_2','dec_3','dec_4','dec_5','dec_6','dec_7','dec_8','dec_9','dec_10']].values

#     T,N = portfolios.shape
#     excessRet = portfolios - np.reshape(riskfree,(T,1))
#     K = np.size(macro_factors,1)

#     # Starting values for the factor loadings and rick premia are estimated using OLS and simple means.
#     betas = []
#     for i in range(N):
#         res = sm.OLS(excessRet[:,i],sm.add_constant(macro_factors)).fit()
#         betas.append(res.params[1:])

#     avgReturn = excessRet.mean(axis=0)
#     avgReturn.shape = N,1
#     betas = array(betas)
#     res = sm.OLS(avgReturn, betas).fit()
#     riskPremia = res.params

#     # The starting values are computed the first step estimates are found using the non-linear optimizer. The initial weighting matrix is just the identify matrix.
#     riskPremia.shape = K
#     startingVals = np.concatenate((betas.flatten(),riskPremia))

#     Winv = np.eye(N*(K+1))
#     args = (excessRet, macro_factors, Winv)
#     iteration = 0
#     functionCount = 0
#     # step1opt = fmin_bfgs(gmm_objective, startingVals, args=args, callback=iter_print)
#     step1opt = fmin_bfgs(gmm_objective, startingVals, args=args)

#     # Here we look at the risk premia estimates from the first step (inefficient) estimates.
#     premia = step1opt[-K:]
#     premia = Series(premia,index=['myp','ui','dsv','ats','sts','fx'])
#     # print('Annualized Risk Premia (First step)')
#     # print(100 * premia)

#     # Next the first step estimates are used to estimate the moment conditions which are in-turn used to estimate the optimal weighting matrix for the moment conditions. This is then used as an input for the 2nd-step estimates.
#     out = gmm_objective(step1opt, excessRet, macro_factors, Winv, out=True)
#     S = np.cov(out[1].T)
#     Winv2 = inv(S)
#     args = (excessRet, macro_factors, Winv2)

#     iteration = 0
#     functionCount = 0
#     # step2opt = fmin_bfgs(gmm_objective, step1opt, args=args, callback=iter_print)   
#     step2opt = fmin_bfgs(gmm_objective, step1opt, args=args)  

#     # The annualized risk premia.
#     premia = step2opt[-K:]
#     # premia = Series(premia,index=['myp','ui','dsv','ats','sts','fx'])
#     # print('Annualized Risk Premia')
#     # print(100 * premia)

#     # Finally the VCV of the parameter estimates is computed.
#     out = gmm_objective(step2opt, excessRet, macro_factors, Winv2, out=True)
#     G = gmm_G(step2opt, excessRet, macro_factors)
#     S = np.cov(out[1].T)
#     vcv = inv(G @ inv(S) @ G.T)/T
#     premia_vcv = vcv[-K:,-K:]
#     premia_stderr = np.diag(premia_vcv)
#     # premia_stderr = Series(premia_stderr,index=['myp','ui','dsv','ats','sts','fx'])
#     # print('t-stats')
#     # print(premia / premia_stderr)
#     premia_t_stat = premia / premia_stderr

#     beta = reshape(step2opt[:-K],(N,K))
#     beta_vcv = vcv[:-K,:-K]
#     beta_stderr = np.diag(beta_vcv)
#     beta_t_stat = step2opt[:-K] / beta_stderr
#     beta_t_stat = reshape(beta_t_stat,(N,K))

#     premia_port.append(premia)
#     premia_t_stat_port.append(premia_t_stat)
#     beta_port.append(beta)
#     beta_t_stat_port.append(beta_t_stat)

# %store beta_port
# %store beta_t_stat_port
# %store premia_port
# %store premia_t_stat_port

         Current function value: 0.000000
         Iterations: 65
         Function evaluations: 10865
         Gradient evaluations: 162
         Current function value: 1.900360
         Iterations: 210
         Function evaluations: 19576
         Gradient evaluations: 292
         Current function value: 0.000000
         Iterations: 14
         Function evaluations: 5237
         Gradient evaluations: 78
         Current function value: 2.955226
         Iterations: 75
         Function evaluations: 8118
         Gradient evaluations: 121
         Current function value: 0.000000
         Iterations: 13
         Function evaluations: 3695
         Gradient evaluations: 55
         Current function value: 0.209559
         Iterations: 96
         Function evaluations: 9391
         Gradient evaluations: 140
Stored 'beta_port' (list)
Stored 'beta_t_stat_port' (list)
Stored 'premia_port' (list)
Stored 'premia_t_stat_port' (list)


In [None]:
# # model = LinearFactorModelGMM(portfolios, macro_factors)
# model = LinearFactorModelGMM(excessRet, macro_factors, risk_free=False)

# # Estimate the model parameters
# results = model.fit(cov_type='kernel',bandwidth=12)

# # Print the summary of results
# print(results.full_summary)


In [None]:
# results.params
# results.tstats
# results.pvalues

In [None]:
# startingVals = np.zeros((1,K))
# Winv = np.eye(N)
# args = (excessRet, macro_factors, Winv)
# iteration = 0
# functionCount = 0
# opt_b = fmin_bfgs(gmm_objective_b, startingVals, args=args, callback=iter_print)
# sdf_loading = Series(opt_b,index=['myp','ui','dsv','ats','sts','fx'])
# sdf_loading

In [None]:
# # The GMM objective which needs to be minimized (to get factor loading b)
# def moment_b(params, fRets):
#     b = params
#     error = 1 - fRets @ b
#     return error

# def moment_consumption1(params, exog):
#     beta, gamma = params
#     r_forw1, c_forw1, c = exog.T  # unwrap iterable (ndarray)
    
#     # moment condition without instrument    
#     err = 1 - beta * (1 + r_forw1) * np.power(c_forw1 / c, -gamma)
#     return -err

# endog1 = np.zeros(macro_factors.shape[0])    
# mod10 = gmm.NonlinearIVGMM(endog1, macro_factors, excessRet, moment_b)
# w0inv = np.eye(N)
# res10 = mod10.fit(inv_weights=w0inv, maxiter=100, weights_method='hac', wargs={'maxlag':4}) 
# print(res10.summary())