In [1]:
from numpy import hstack, ones, array, mat, tile, reshape, squeeze, eye, asmatrix
from numpy.linalg import inv
from pandas import read_csv, Series 
from scipy.linalg import kron
from scipy.optimize import fmin_bfgs
from scipy import stats
import numpy as np
import statsmodels.api as sm
from statsmodels.sandbox.regression.gmm import GMM, IV2SLS

In [2]:
# read in data
data = read_csv('EA3_data.csv')
# # filter to only results from paper
# data = data[data['Date'] < '1964']

In [3]:
# define portfolio returns vector
R = np.array(data.iloc[:,5:].values+1)
T, K = R.shape

# define V vector, vector of ones, Ndr, Ncf entries
V = np.array([np.ones((T)), data['NdrMinus'].values, data['Ncf']]).T

# define D = (1/T)R'V
D = (1/T)*(R.T @ V)

# define g(B) = DBeta-1
# since D is linear we can sovle analytically for B1gmm
# FN b1gmm = inv(D'WD)D'Wi for some weight matrix W
W = np.eye(10)
DWD = inv(D.T @ W @ D)
beta_gmm1 = DWD @ D.T @ W @ np.ones(10)
a,b,c = beta_gmm1

print(f"Estimate for a: {a}")
print(f"Estimate for b: {b}")
print(f"Estimate for c: {c}")

Estimate for a: 0.9920903001072756
Estimate for b: 0.42170025554557355
Estimate for c: -11.294426841715904


In [4]:
# now use the CLT to get an estimate for S using the variance of B
#moment_fn = lambda t, b: (D[t]@b - np.ones(3))*T
#moment_fn = lambda t, b: (R[t] @ (V[t].T @ b) - np.ones(10))
g = np.zeros((10,10))
for t in range(10):
    vb = V[t] @ beta_gmm1
    uhat = R[t]*vb - np.ones(10)
    g[t] = uhat

means = np.outer(np.ones(10), np.mean(g, axis=0))
demeaned = g - means
S = np.cov(demeaned)

var_beta_gmm1 = (1/T) * DWD @ (D.T @ W @ S @ W.T @ D) @ DWD
std_devs = np.sqrt(np.diag(var_beta_gmm1))
std_devs

array([4.34283988e-03, 1.35400621e+00, 8.46181153e+00])

In [5]:
# wald test
restr = np.array([1,1,0])
rtb = restr @ beta_gmm1
wald = rtb.T * (restr @ var_beta_gmm1 @ restr.T)**(-1) * rtb
pval = 1 - stats.chi2.cdf(wald, 1)

print(f"Wald test returns a value of {wald:.2f} with {pval:.3f} significance for a chi-squared distribution with 1 degrees of freedom.")

Wald test returns a value of 1.09 with 0.296 significance for a chi-squared distribution with 1 degrees of freedom.


In [6]:
# gamma estimate
gamma = -c/a

d_gamma = np.array([-c/(a**2),0,-1/a])
std_err = d_gamma @ var_beta_gmm1 @ d_gamma.T
std_err


72.8452124060811

In [7]:
# returning to the initial model, we'll use the inverse of S as the new weight matrix
W = inv(S)
DWD = inv(D.T @ W @ D)
beta_gmm2 = DWD @ D.T @ W @ np.ones(10)
a,b,c = beta_gmm2

g = np.zeros((10,10))
for t in range(10):
    vb = V[t] @ beta_gmm2
    uhat = R[t]*vb - np.ones(10)
    g[t] = uhat

means = np.outer(np.ones(10), np.mean(g, axis=0))
demeaned = g - means
S = np.cov(demeaned)

var_beta_gmm2 = (1/T) * DWD @ (D.T @ W @ S @ W.T @ D) @ DWD
std_devs = np.sqrt(np.diag(var_beta_gmm2))
std_a, std_b, std_c = std_devs

print(f"Estimate for a: {a}\n Std Err for a: {std_a}")
print(f"Estimate for b: {b}\n Std Err for b: {std_b}")
print(f"Estimate for c: {c}\n Std Err for c: {std_c}")

Estimate for a: -0.1552734375
 Std Err for a: 0.2717582364207201
Estimate for b: -13.125
 Std Err for b: 25.495097567963924
Estimate for c: 213.0
 Std Err for c: 91.0823802938856


In [8]:
# second stage wald test
restr = np.array([1,1,0])
rtb = np.matmul(restr, beta_gmm2)
wald = rtb.T * (1/(restr.T @ var_beta_gmm2 @ restr)) * rtb
pval = 1 - stats.chi2.cdf(wald, 1)

print(f"Wald test returns a value of {wald:.2f} with {pval:.3f} significance for a chi-squared distribution with 1 degrees of freedom.")


Wald test returns a value of 0.28 with 0.599 significance for a chi-squared distribution with 1 degrees of freedom.


In [9]:
# second stage standard error
gamma = -c/a
d_gamma = np.array([-c/(a**2),0,-1/a])
std_err = d_gamma @ var_beta_gmm2 @ d_gamma.T
std_err

6526087.192910942

In [10]:
# test for overidentification
gb = (D @ beta_gmm2 - np.ones(10))
J = T *(gb.T @ inv(S) @ gb)
pval = 1 - stats.chi2.cdf(J, 7)

print(f"J test returns a value of {J:.2f} with {pval:.3f} significance for a chi-squared distribution with 7 degrees of freedom.")

J test returns a value of -11887504491255875584.00 with 1.000 significance for a chi-squared distribution with 7 degrees of freedom.
