# Question 3 
## (4)
First we define the functions we need for GMM estimation. 

In [1]:
import numpy as np 
from numpy.linalg import inv 
from scipy.stats import distributions as iid 
from scipy.optimize import minimize

#From here: https://stackoverflow.com/questions/4740172/how-do-you-a-double-factorial-in-python
def doublefactorial(n):
     if n <= 0:
         return 1
     else:
         return n * doublefactorial(n-2)

    
def gj(b, x, k): 
    '''
    b: [mu, sigma], parameters for normal dist.
    x: a single observaton
    k: number of moments
    '''
    (mu, sigma) = b
    res = []
    for i in range(1,k+1):
        if (i % 2) == 0:
            xk = (x - mu) ** i  - (sigma**i) * doublefactorial(i-1)
        else: 
            xk = (x - mu) ** i 
        res.append(xk)
    return res


def gN(b, x_lst, k):
    '''
    Average of gj across all observations
    b: [mu, sigma], parameters for normal dist.
    x_lst: list of all observations
    k: number of moments
    '''
    return np.mean([gj(b, x_lst[j], k) for j in range(len(x_lst))], axis=0)


def Omegahat(b, x_lst, k):
    e = np.array([gj(b, x_lst[j], k) for j in range(len(x_lst))])

    # Recenter! We have Eu=0 under null.
    # Important to use this information.
    e = e - e.mean(axis=0)
    
    return e.T@e/e.shape[0]


def J(b, W, x_lst, k): 
    m = gN(b, x_lst, k) # Sample moments @ b
    N = len(x_lst)

    return (N*m.T@W@m) # Scale by sample size


def two_step_gmm(x_lst, k):
    # First step uses identity weighting matrix; use mean and variance as initial guess 
    W1 = np.eye(len(gj([0, 1], x_lst[0], k)))
    b1 = minimize(lambda b: J(b, W1, x_lst, k), [np.mean(x_lst), np.var(x_lst)]).x 

    # Construct 2nd step weighting matrix using first step estimate of beta
    W2 = inv(Omegahat(b1, x_lst, k))

    return minimize(lambda b: J(b, W2, x_lst, k), b1)

Then we generate a sample from random normal distribution and show that this sample could pass the test. We also show that a sample generated from a uniform distribution cannot pass the test. 

In [2]:
# Estimation parameters
N = 1000
k = 4
mu, sigma = [2, 2]

# Limiting distribution of criterion (under null)
limiting_J = iid.chi2(k-2)

# Normal distribution 
X_norm = iid.norm.rvs(loc=mu, scale=sigma, size=(N, )) 
soltn = two_step_gmm(X_norm, k)
print(f'Normal distribution: b = {soltn.x}, J = {soltn.fun}, Critical J = {limiting_J.isf(0.05)}')

# Uniform distribution 
X_uni = iid.uniform.rvs(loc=mu, scale=sigma, size=(N, )) 
soltn_uni = two_step_gmm(X_uni, k)
print(f'Uniform distribution: b = {soltn_uni.x}, J = {soltn_uni.fun}, Critical J = {limiting_J.isf(0.05)}')

Normal distribution: b = [1.92492118 1.98066456], J = 5.659871260160289, Critical J = 5.991464547107983
Uniform distribution: b = [ 2.98350648 -0.43532796], J = 370.80781612264775, Critical J = 5.991464547107983


## (5)
To investigate the optimal choice of $k$, we vary the range of $k$ to see how the test performs. 

In [3]:
N = 1000
mu, sigma = [2, 3]
X = iid.norm.rvs(loc=mu, scale=sigma, size=(N, )) 
for k in range(3, 15): 
    soltn = two_step_gmm(X, k)
    limiting_J = iid.chi2(k-2)
    print(f'k = {k}: b = {soltn.x}, J = {soltn.fun}, Critical J = {limiting_J.isf(0.05)}')

k = 3: b = [2.06586922 3.01092456], J = 0.02971106397668757, Critical J = 3.8414588206941285
k = 4: b = [2.06564742 3.01077606], J = 0.03070175890851452, Critical J = 5.991464547107983
k = 5: b = [2.05946818 3.00318949], J = 1.9512528961685092, Critical J = 7.814727903251178
k = 6: b = [2.06660942 2.98089532], J = 2.4884012435271545, Critical J = 9.487729036781158
k = 7: b = [2.07951766 2.97968779], J = 2.601178275600883, Critical J = 11.070497693516355
k = 8: b = [2.11100567 2.7943187 ], J = 20.252114183883975, Critical J = 12.59158724374398
k = 9: b = [2.15686754 2.79082664], J = 22.18252060789257, Critical J = 14.067140449340167
k = 10: b = [15.61816404  7.04066872], J = 3.423908921120107e+16, Critical J = 15.507313055865454
k = 11: b = [6.62664504 0.4271115 ], J = 18590.99169981852, Critical J = 16.91897760462045


  return (N*m.T@W@m) # Scale by sample size
  df = fun(x) - f0
  return (N*m.T@W@m) # Scale by sample size
  xk = (x - mu) ** i  - (sigma**i) * doublefactorial(i-1)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  xk = (x - mu) ** i
  xk = (x - mu) ** i  - (sigma**i) * doublefactorial(i-1)


k = 12: b = [-6.24699257e+10 -9.85382325e+03], J = -5.050314449343852e+242, Critical J = 18.30703805327515
k = 13: b = [11.9853072   1.47705737], J = 46765811.728515625, Critical J = 19.67513757268249


  return (N*m.T@W@m) # Scale by sample size


k = 14: b = [17.82378389 53.68739115], J = -1.4308731374060264e+36, Critical J = 21.02606981748307


## (6)
We estimate the parameters $(\mu, \sigma)$ using maximum likelihood approach and compare them with that from GMM. 

In [4]:
def neg_log_likelihood(b, x_lst): 
    mu, sigma = b
    n = len(x_lst)
    ll = -n/2*np.log(2*np.pi*sigma**2) - 1/(2*sigma**2)*np.sum((x_lst-mu)**2)
    return -ll 

def MLE(x_lst): 
    initial_guess = [np.mean(x_lst), np.var(x_lst)]
    return minimize(lambda b: neg_log_likelihood(b, x_lst), initial_guess)

soltn = MLE(X)
print(f'MLE: b = {soltn.x}')

MLE: b = [2.06582581 3.0100923 ]
