# Advanced Econometrics II
### Computer Assignment - Weak identification

In [None]:
import pandas as pd
import os
import numpy as np
import statsmodels
import matplotlib.pyplot as plt
print(os.getcwd())
data = pd.read_csv('dest.csv', 
                header=None, 
                names=['age', 'age2', 'ed',
                       'exper', 'exper2', 'nearc2',
                       'nearc4', 'nearc4a', 'nearc4b',
                       'race', 'smsa', 'south', 'wage'])
print(data.columns.values)

## Question 1

In [None]:
#create data
np.random.seed(1211)
rho_list = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]
a_list = [1, 0.6, 0.3, 0.15, 0.07, 0.04, 0.02, 0]
N = 100
k = 10
MC = 10
Z = np.random.normal(loc=0, scale=1, size=(N, k))
beta_0 = 0
results = np.zeros(MC)
R_freq = np.zeros([len(rho_list), len(a_list)])


for a in a_list:
    for rho in rho_list:
        for l in range(MC):
            pi = np.zeros(k)
            pi[0] = a
            sigma = np.array([[1, rho], [rho, 1]])
            eps_v = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=(N))
            eps = eps_v[:, 0]
            v = eps_v[:, 1]

            X = Z @ pi + v
            Y = X * beta_0 + eps
            Pz = Z @ np.linalk_AR_r_LM_beta(Y, X, beta_0, Z)g.inv(Z.T @ Z) @ Z.T


            beta_2SLS = (X.T @ Pz @ X) ** (-1) * (X.T @ Pz @ Y)
            residuals = Y - X.T * beta_2SLS
            Shat = np.zeros((10, 10))
            for i in range(Z.shape[1]):
                Shat += N ** (-1) * residuals[i] ** 2 * (np.outer(Z[i, ], Z[i, ]))


            beta_2SLS_var = N * (X.T @ Pz @ X) ** (-1) * \
                            (X.T @ Z @ np.linalg.inv(Z.T @ Z) @ Shat @ np.linalg.inv(Z.T @ Z) @ Z.T @ X) * \
                            (X.T @ Pz @ X) ** (-1)

            tstat = (beta_2SLS - beta_0) / np.sqrt(beta_2SLS_var)
            results[l] = (tstat < -1.96) | (tstat > 1.96)
            
        R_freq[rho_list.index(rho), a_list.index(a)] = round(np.mean(results), 3)

In [None]:
df = pd.DataFrame(R_freq, rho_list)
df.columns = a_list
#df.set_index(rho_list)
print(df)


for f in range(8):
    plt.plot(R_freq[:, f])
    plt.ylabel('rej frequency')
    plt.title('a is ' + str(a_list[f]))
    plt.show()


## Question 2

In [None]:
import numpy as np
from numpy.linalg import inv as inv
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def projection_matrix(X):
    if X.shape == (X.shape[0],):
        return np.outer(X, X) * ((X.T @ X) ** (-1)) 
    else:
        return X @ np.linalg.inv(X.T @ X) @ X.T
        

def orthogonal_projection_matrix(X):
    return np.identity(X.shape[0]) - projection_matrix(X)

def pi_sim(y, X, betaH0, Z):
    
    N = Z.shape[0]
    k = Z.shape[1]
    
    eps = y - X * betaH0
    sigma_eps_hat = (1 / (N - k)) * (y - X * betaH0).T @ orthogonal_projection_matrix(Z) @ (y - X * betaH0)
    sigma_eps_V_hat = (1 / (N - k)) * (y - X * betaH0).T @ orthogonal_projection_matrix(Z) @ X
    sigma_V_eps_hat = (1 / (N - k)) * X.T @ orthogonal_projection_matrix(Z) @ (y - X * betaH0)
    sigma_V_hat = (1 / (N - k)) * X.T @ orthogonal_projection_matrix(Z) @ X
    
    rho_hat = sigma_eps_hat / sigma_eps_V_hat
    
    pi_sim = inv(Z.T @ Z) @ Z.T @ (X - eps * rho_hat)
    
    return pi_sim, sigma_eps_hat, sigma_eps_V_hat, sigma_V_eps_hat, sigma_V_hat
    
def k_AR_r_LM_beta(y, X, betaH0, Z):
    pi, sigma_eps_hat, sigma_eps_V_hat, sigma_V_eps_hat, sigma_V_hat = pi_sim(y, X, betaH0, Z)
    
    Sigma_hat_VV_dot_eps = sigma_V_hat - (sigma_V_eps_hat * sigma_eps_V_hat) / sigma_eps_hat
    
    k_AR_beta = (1 / sigma_eps_hat) * (y - X * betaH0).T @ projection_matrix(Z) @ (y - X * betaH0)
    r_beta = (1 / Sigma_hat_VV_dot_eps) * pi.T @ (Z.T @ Z) @ pi    
    LM_beta = (1 / sigma_eps_hat) * (y - X * betaH0).T @ projection_matrix(Z @ pi) @ (y - X * betaH0)
    
    return k_AR_beta, r_beta, LM_beta

def LR_beta(y, X, betaH0, Z):
    k_AR_beta, r_beta, LM_beta = k_AR_r_LM_beta(y, X, betaH0, Z)
    
    return (0.5) * (k_AR_beta - r_beta) + np.sqrt((k_AR_beta + r_beta) ** 2 - 4 * r_beta * (k_AR_beta - LM_beta))

In [None]:
#create data
np.random.seed(1211)
rho_list = [0]#, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]
a_list = [1]#, 0.6, 0.3, 0.15, 0.07, 0.04, 0.02, 0]
N = 100
k = 10
MC = 5000
Z = np.random.normal(loc=0, scale=1, size=(N, k))
beta_0 = 0
LR_reject = np.zeros(MC)
r_beta = np.zeros(MC)
R_freq = pd.DataFrame({'r_beta': [], 'LR' : []})


for a in a_list:
    for rho in rho_list:
        for l in range(MC):
            pi = np.zeros(k)
            pi[0] = a
            sigma = np.array([[1, rho], [rho, 1]])
            eps_v = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=(N))
            eps = eps_v[:, 0]
            v = eps_v[:, 1]

            X = Z @ pi + v
            Y = X * beta_0 + eps
            
            LR_reject[l] = LR_beta(Y, X, beta_0, Z)
            r_beta[l] = k_AR_r_LM_beta(Y, X, beta_0, Z)[1]
            
            
        #R_freq = R_freq.append(pd.DataFrame({'r_beta' : [k_AR_r_LM_beta(Y, X, beta_0, Z)[1]], 'LR' : np.mean(LR_reject)}),
        #                     ignore_index=True)
        
