In [124]:
from numpy import hstack, ones, array, mat, tile, reshape, squeeze, eye, asmatrix
from numpy.linalg import inv
from pandas import read_csv, Series 
from scipy.linalg import kron
from scipy.optimize import fmin_bfgs
import numpy as np
import statsmodels.api as sm

Next a callable function is used to produce iteration-by-iteration output when using the non-linear optimizer.

In [125]:
iteration = 0
lastValue = 0
functionCount = 0

def iter_print(params):
    global iteration, lastValue, functionCount
    iteration += 1
    print('Func value: {0:}, Iteration: {1:}, Function Count: {2:}'.format(lastValue, iteration, functionCount))

The GMM objective, which is minimized, is defined next.

In [126]:
def gmm_objective(params, pRets, fRets, Winv, out=False):
    global lastValue, functionCount
    T,N = pRets.shape
    T,K = fRets.shape
    beta = squeeze(array(params[:(N*K)]))
    lam = squeeze(array(params[(N*K):]))
    beta = reshape(beta,(N,K))
    lam = reshape(lam,(K,1))
    betalam = beta @ lam
    expectedRet = fRets @ beta.T
    e = pRets - expectedRet
    instr = tile(fRets,N)
    moments1  = kron(e,ones((1,K)))
    moments1 = moments1 * instr -1
    moments2 = pRets - betalam.T -1
    moments = hstack((moments1,moments2))

    avgMoment = moments.mean(axis=0)
    
    J = T * mat(avgMoment) * mat(Winv) * mat(avgMoment).T
    J = J[0,0]
    lastValue = J
    functionCount += 1
    if not out:
        return J
    else:
        return J, moments

The G matrix, which is the derivative of the GMM moments with respect to the parameters, is defined.

In [127]:
def gmm_G(params, pRets, fRets):
    T,N = pRets.shape
    T,K = fRets.shape
    beta = squeeze(array(params[:(N*K)]))
    lam = squeeze(array(params[(N*K):]))
    beta = reshape(beta,(N,K))
    lam = reshape(lam,(K,1))
    G = np.zeros((N*K+K,N*K+N))
    ffp = (fRets.T @ fRets) / T
    G[:(N*K),:(N*K)]=kron(eye(N),ffp)
    G[:(N*K),(N*K):] = kron(eye(N),-lam)
    G[(N*K):,(N*K):] = -beta.T
    
    return G

Next, the data is imported and a subset of the test portfolios is selected to make the estimation faster.

In [128]:
data = read_csv('EA3_data.csv')
data.iloc[:,1:] = data.iloc[:,1:]
# data = data[data['Date'] < '1964']
# data.reset_index(inplace=True)

# write date, factor, riskfree and portfolio columns
dates = data['Date'].values
factors = (np.exp(data[['NdrMinus','Ncf']].values)-1)*100
riskfree = data['Rf'].values*100
portfolios = data.iloc[:,6:].values*100

T,N = portfolios.shape
portfolios = portfolios[:,np.arange(0,N,2)]
T,N = portfolios.shape
excessRet = portfolios - np.reshape(riskfree,(T,1))
K = np.size(factors,1)

Starting values for the factor loadings and risk premia are estimated using OLS and simple means.

In [130]:
betas = []
for i in range(N):
    res = sm.OLS(excessRet[:,i],sm.add_constant(factors)).fit()
    betas.append(res.params[1:])

avgReturn = excessRet.mean(axis=0)
avgReturn.shape = N,1
betas = array(betas)
res = sm.OLS(avgReturn, betas).fit()
riskPremia = res.params

The starting values are computed the first step estimates are found using the non-linear optimizer. The initial weighting matrix is just the identity matrix.

In [131]:
riskPremia.shape = 2
startingVals = np.concatenate((betas.flatten(),riskPremia))

Winv = np.eye(N*(K+1))
args = (excessRet, factors, Winv)
iteration = 0
functionCount = 0
step1opt = fmin_bfgs(gmm_objective, startingVals, args=args, callback=iter_print)

Func value: 3274.191552657447, Iteration: 1, Function Count: 39
Func value: 2491.602143941232, Iteration: 2, Function Count: 52
Func value: 1641.1084674191623, Iteration: 3, Function Count: 65
Func value: 1641.0298289376256, Iteration: 4, Function Count: 91
Func value: 1640.9224539714294, Iteration: 5, Function Count: 104
Func value: 1573.5019326731108, Iteration: 6, Function Count: 169
Func value: 1573.490503885541, Iteration: 7, Function Count: 195
Func value: 1573.4716001585937, Iteration: 8, Function Count: 208
Func value: 1562.6283626000377, Iteration: 9, Function Count: 273
Func value: 1182.5550885755856, Iteration: 10, Function Count: 312
Func value: 1177.807254153965, Iteration: 11, Function Count: 338
Func value: 1177.8060160100597, Iteration: 12, Function Count: 364
Func value: 1001.132185792592, Iteration: 13, Function Count: 481
Func value: 1001.1105249226483, Iteration: 14, Function Count: 507
Func value: 949.6303927217804, Iteration: 15, Function Count: 585
Func value: 89

In [132]:
out = gmm_objective(step1opt, excessRet, factors, Winv, out=True)
G = gmm_G(step1opt, excessRet, factors)
S = np.cov(out[1].T)
vcv = inv(G @ inv(S) @ G.T)/T
Winv2 = inv(S)

Here we look at the risk premia estimates from the first step (inefficient) estimates.

In [137]:
# premia = step1opt[-2:]
# premia = Series(premia,index=['NdrMinus', 'Ncf'])
# print('Annualized Risk Premia (First step)')
# print(premia*4)

premia = step1opt[-2:]
premia = Series(premia,index=['NdrMinus', 'Ncf'])
premia_vcv = vcv[-2:,-2:]
print('Annualized Risk Premia')
print(premia*4)

premia_stderr = np.diag(premia_vcv)
premia_stderr = Series(premia_stderr,index=['NdrMinus', 'Ncf'])
print('Annualized T-stats')
print((premia / premia_stderr))

Annualized Risk Premia
NdrMinus   -13.217565
Ncf         16.423608
dtype: float64
Annualized T-stats
NdrMinus   -0.187357
Ncf         0.202521
dtype: float64


Next the first step estimates are used to estimate the moment conditions which are in-turn used to estimate the optimal weighting matrix for the moment conditions. This is then used as an input for the 2nd-step estimates.

In [138]:
args = (excessRet, factors, Winv2)

iteration = 0
functionCount = 0
step2opt = fmin_bfgs(gmm_objective, step1opt, args=args, callback=iter_print)

Func value: 14.5833841056896, Iteration: 1, Function Count: 39
Func value: 12.414886118064993, Iteration: 2, Function Count: 65
Func value: 11.841425793995713, Iteration: 3, Function Count: 91
Func value: 10.925559296951619, Iteration: 4, Function Count: 104
Func value: 10.666982869428539, Iteration: 5, Function Count: 130
Func value: 10.621039614559077, Iteration: 6, Function Count: 143
Func value: 10.532963498338813, Iteration: 7, Function Count: 156
Func value: 10.389445857093197, Iteration: 8, Function Count: 169
Func value: 10.131964207178449, Iteration: 9, Function Count: 182
Func value: 9.758945709784273, Iteration: 10, Function Count: 195
Func value: 9.663282569139307, Iteration: 11, Function Count: 208
Func value: 8.868889507228957, Iteration: 12, Function Count: 234
Func value: 8.225864213524634, Iteration: 13, Function Count: 247
Func value: 8.148280723428998, Iteration: 14, Function Count: 260
Func value: 8.022576089350279, Iteration: 15, Function Count: 273
Func value: 7.8

Finally the VCV of the parameter estimates is computed.

In [139]:
out = gmm_objective(step2opt, excessRet, factors, Winv2, out=True)
G = gmm_G(step2opt, excessRet, factors)
S = np.cov(out[1].T)
vcv = inv(G @ inv(S) @ G.T)/T

The annualized risk premia and their associated t-stats.

In [140]:
premia = step2opt[-2:]
premia = Series(premia,index=['NdrMinus', 'Ncf'])
premia_vcv = vcv[-2:,-2:]
print('Annualized Risk Premia')
print(premia*4)

premia_stderr = np.diag(premia_vcv)
premia_stderr = Series(premia_stderr,index=['NdrMinus', 'Ncf'])
print('T-stats')
print(premia / (premia_stderr))

Annualized Risk Premia
NdrMinus   -22.988643
Ncf         29.974171
dtype: float64
T-stats
NdrMinus   -0.325861
Ncf         0.369614
dtype: float64
