In [1]:
from numpy import hstack, ones, array, mat, tile, reshape, squeeze, eye, asmatrix
from numpy.linalg import inv
from pandas import read_csv, Series 
from scipy.linalg import kron
from scipy.optimize import fmin_bfgs
import numpy as np
import statsmodels.api as sm

In [2]:
# STATIC FUNCTIONS

def gmm_obj(params, pRets, fRets, Winv, out=False):
    global lastValue, functionCount
    T,N = pRets.shape
    T,K = fRets.shape
    beta = squeeze(array(params[:(N*K)]))
    lam = squeeze(array(params[(N*K):]))
    beta = reshape(beta,(N,K))
    lam = reshape(lam,(K,1))
    betalam = beta @ lam
    expectedRet = fRets @ beta.T
    e = pRets - expectedRet
    instr = tile(fRets,N)
    moments1  = kron(e,ones((1,K)))
    moments1 = moments1 * instr
    moments2 = pRets - betalam.T
    moments = hstack((moments1,moments2))

    avgMoment = moments.mean(axis=0)
    
    J = T * mat(avgMoment) * mat(Winv) * mat(avgMoment).T
    J = J[0,0]
    lastValue = J
    functionCount += 1
    if not out:
        return J
    else:
        return J, moments

iteration = 0
lastValue = 0
functionCount = 0

def iter_print(params):
    global iteration, lastValue, functionCount
    iteration += 1
    print('Func value: {0:}, Iteration: {1:}, Function Count: {2:}'.format(lastValue, iteration, functionCount))

In [3]:
from statsmodels.sandbox.regression.gmm import GMM

class gmm(GMM):
    def momcond(self, params):
        endog = self.endog
        exog = self.exog
        inst = self.instrument   

        T,N = endog.shape
        T,K = exog.shape
        beta = squeeze(array(params[:(N*K)]))
        lam = squeeze(array(params[(N*K):]))
        beta = reshape(beta,(N,K))
        lam = reshape(lam,(K,1))
        betalam = beta @ lam
        expectedRet = exog @ beta.T
        e = endog - expectedRet
        instr = tile(exog,N)
        moments1 = kron(e,ones((1,K)))
        moments1 = moments1 * instr
        moments2 = endog - betalam.T

        g = np.column_stack((moments1, moments2))
        self.moments = hstack((moments1,moments2))
        #self.moments = moments2
        return self.moments
    
    def gmm_objective(self, params, Winv, out=False):
        global lastValue, functionCount
        endog = self.endog
        exog = self.exog
        inst = self.instrument
        
        moments = self.momcond(params)
        avgMoment = moments.mean(axis=0)

        J = T * mat(avgMoment) * mat(Winv) * mat(avgMoment).T
        J = J[0,0]
        lastValue = J
        functionCount += 1
        if not out:
            return J
        else:
            return J, moments
    
    def gmm_G(self, params):
        pRets = self.endog
        fRets = self.exog
        
        T,N = pRets.shape
        T,K = fRets.shape
        beta = squeeze(array(params[:(N*K)]))
        lam = squeeze(array(params[(N*K):]))
        beta = reshape(beta,(N,K))
        lam = reshape(lam,(K,1))
        G = np.zeros((N*K+K,N*K+N))
        ffp = (fRets.T @ fRets) / T
        G[:(N*K),:(N*K)]=kron(eye(N),ffp)
        G[:(N*K),(N*K):] = kron(eye(N),-lam)
        G[(N*K):,(N*K):] = -beta.T

        return G

In [4]:
data = read_csv('EA3_data.csv')
# data = data[data['Date'] < '1964']

# write date, factor, riskfree and portfolio columns
dates = data['Date'].values
factors = data[['NdrMinus','Ncf']].values*100
riskfree = data['Rf'].values*100
portfolios = data.iloc[:,6:].values*100

# T,N = portfolios.shape
# portfolios = portfolios[:,np.arange(0,N,2)]
T,N = portfolios.shape
excessRet = portfolios - np.reshape(riskfree,(T,1))
K = np.size(factors,1)

In [5]:
# generate starting values from an ols regression
betas = []
for i in range(N):
    res = sm.OLS(excessRet[:,i],sm.add_constant(factors)).fit()
    betas.append(res.params[1:])

avgReturn = excessRet.mean(axis=0)
avgReturn.shape = N,1
betas = array(betas)
res = sm.OLS(avgReturn, betas).fit()
riskPremia = res.params
riskPremia.shape = 2

# concatenate endog betas with start riskpremia
starting_vals = np.concatenate((betas.flatten(),riskPremia))

# generate a starting weight array using np.eye
starting_weights = np.eye(N*(K+1))

# generate starting values
# args = (excessRet, factors, starting_weights)
# iteration = 0
# functionCount = 0
# step0_vals = fmin_bfgs(gmm_obj, starting_vals, args=args, callback=iter_print)

In [6]:
# tile factors against the length of the portfolios to get instruments
inst = tile(factors,N)

# run GMM
GMM1 = gmm(endog=excessRet, 
          exog=factors, 
          instrument=inst, 
          k_moms=27, 
          k_params=3)
res = GMM1.fit(start_params=starting_vals, inv_weights=starting_weights)
step1_vals = res.params
step1_tvals = res.tvalues
step1_pvals = res.pvalues
_, step1_moments = GMM1.gmm_objective(step1_vals, starting_weights, out=True)
res.summary()

Optimization terminated successfully.
         Current function value: 4.557010
         Iterations: 32
         Function evaluations: 54
         Gradient evaluations: 54
Optimization terminated successfully.
         Current function value: 0.057957
         Iterations: 39
         Function evaluations: 43
         Gradient evaluations: 43
Optimization terminated successfully.
         Current function value: 0.048009
         Iterations: 38
         Function evaluations: 42
         Gradient evaluations: 42
Optimization terminated successfully.
         Current function value: 0.046815
         Iterations: 38
         Function evaluations: 42
         Gradient evaluations: 42
Optimization terminated successfully.
         Current function value: 0.046759
         Iterations: 38
         Function evaluations: 41
         Gradient evaluations: 41
Optimization terminated successfully.
         Current function value: 0.046777
         Iterations: 30
         Function evaluations: 33
  

0,1,2,3
Dep. Variable:,"['y1', 'y2', 'y3', 'y4', 'y5', 'y6', 'y7', 'y8', 'y9']",Hansen J:,13.66
Model:,gmm,Prob (Hansen J):,0.0575
Method:,GMM,,
Date:,"Sun, 16 Oct 2022",,
Time:,21:49:28,,
No. Observations:,292,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p 0,1.8150,0.183,9.915,0.000,1.456,2.174
p 1,1.7917,0.184,9.740,0.000,1.431,2.152
p 2,1.6213,0.175,9.239,0.000,1.277,1.965
p 3,1.8798,0.205,9.170,0.000,1.478,2.282
p 4,1.6017,0.177,9.040,0.000,1.254,1.949
p 5,2.1830,0.232,9.394,0.000,1.728,2.638
p 6,1.4239,0.104,13.645,0.000,1.219,1.628
p 7,1.2957,0.139,9.298,0.000,1.023,1.569
p 8,1.2147,0.099,12.232,0.000,1.020,1.409


In [7]:
premia = step1_vals[-2:]
tvals = step1_tvals[-2:]
pvals = step1_pvals[-2:]

premia = Series(premia, index=['NdrMinus', 'Ncf'])
tvals = Series(tvals, index=['NdrMinus', 'Ncf'])
pvals = Series(pvals, index=['NdrMinus', 'Ncf'])

print('Annualized Risk Premia')
print(premia*4)
print('T-stats')
print(tvals)
print('P-values')
print(pvals)

Annualized Risk Premia
NdrMinus    -5.967723
Ncf         12.784393
dtype: float64
T-stats
NdrMinus   -1.482939
Ncf         3.989124
dtype: float64
P-values
NdrMinus    0.138091
Ncf         0.000066
dtype: float64


In [8]:
# overidentification test
val, pval, dof = res.jtest()

print(f"Hansen J test with {dof} degrees of freedom returned {round(val,4)}, at {round(pval,4)} significance")

Hansen J test with 7 degrees of freedom returned 13.6637, at 0.0575 significance


In [9]:
# get weights from step 1
step1_weights = np.cov(step1_moments.T)

# run a stage 2 gmm
GMM2 = gmm(endog=excessRet, 
          exog=factors,
          instrument=inst, 
          k_moms=27, 
          k_params=3)
res = GMM2.fit(start_params=step1_vals, inv_weights=inv(step1_weights))
step2_vals = res.params
step2_tvals = res.tvalues
step2_pvals = res.pvalues
_, step2_moments = GMM1.gmm_objective(step2_vals, inv(step1_weights), out=True)
res.summary()

Optimization terminated successfully.
         Current function value: 274.122251
         Iterations: 31
         Function evaluations: 46
         Gradient evaluations: 46
Optimization terminated successfully.
         Current function value: 0.057893
         Iterations: 39
         Function evaluations: 44
         Gradient evaluations: 44
Optimization terminated successfully.
         Current function value: 0.048056
         Iterations: 38
         Function evaluations: 43
         Gradient evaluations: 43
Optimization terminated successfully.
         Current function value: 0.046813
         Iterations: 38
         Function evaluations: 42
         Gradient evaluations: 42
Optimization terminated successfully.
         Current function value: 0.046759
         Iterations: 36
         Function evaluations: 39
         Gradient evaluations: 39
Optimization terminated successfully.
         Current function value: 0.046777
         Iterations: 35
         Function evaluations: 38


0,1,2,3
Dep. Variable:,"['y1', 'y2', 'y3', 'y4', 'y5', 'y6', 'y7', 'y8', 'y9']",Hansen J:,13.66
Model:,gmm,Prob (Hansen J):,0.0575
Method:,GMM,,
Date:,"Sun, 16 Oct 2022",,
Time:,21:49:33,,
No. Observations:,292,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p 0,1.8150,0.183,9.915,0.000,1.456,2.174
p 1,1.7917,0.184,9.740,0.000,1.431,2.152
p 2,1.6214,0.175,9.239,0.000,1.277,1.965
p 3,1.8798,0.205,9.170,0.000,1.478,2.282
p 4,1.6017,0.177,9.040,0.000,1.254,1.949
p 5,2.1830,0.232,9.395,0.000,1.728,2.638
p 6,1.4239,0.104,13.645,0.000,1.219,1.628
p 7,1.2957,0.139,9.298,0.000,1.023,1.569
p 8,1.2148,0.099,12.232,0.000,1.020,1.409


In [10]:
G = GMM2.gmm_G(step2_vals)
S = np.cov(step2_moments.T)
vcv = inv(G @ inv(S) @ G.T)/T
premia_vcv = vcv[-2:,-2:]
premia_stderr = np.diag(premia_vcv)

In [11]:
premia = step2_vals[-2:]
tvals = step2_tvals[-2:]
pvals = step2_pvals[-2:]

premia = Series(premia, index=['NdrMinus', 'Ncf'])
tvals = Series(tvals, index=['NdrMinus', 'Ncf'])
pvals = Series(pvals, index=['NdrMinus', 'Ncf'])
premia_stderr = Series(premia_stderr,index=['NdrMinus', 'Ncf'])

print('Annualized Risk Premia')
print(premia*4)
print('T-stats')
print(premia_stderr)
print('P-values')
print(pvals)

Annualized Risk Premia
NdrMinus    -5.967574
Ncf         12.784512
dtype: float64
T-stats
NdrMinus    1.514429
Ncf         1.302236
dtype: float64
P-values
NdrMinus    0.138102
Ncf         0.000066
dtype: float64


In [12]:
# overidentification test
val, pval, dof = res.jtest()

print(f"Hansen J test with {dof} degrees of freedom returned {round(val,4)}, at {round(pval,4)} significance")

Hansen J test with 7 degrees of freedom returned 13.6636, at 0.0575 significance
