In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [13]:
'''Part A'''

def simulation1(N, gamma, beta):

    mu = np.zeros(3)

    sigma = np.array([
        [1, 0, 0],
        [0, 1, 0.5],
        [0, 0.5, 1]
    ])

    samples = np.random.multivariate_normal(mu, sigma, N)
    X, W, U = samples[:, 0], samples[:, 1], samples[:, 2]
    
    Z = np.where(X > 0, 1, 0)
    D = np.where(gamma[0] + gamma[1]*Z + W > 0, 1, 0)
    Y = beta[0] + beta[1]*D + U
    
    results = {
        'Y': Y,
        'D': D,
        'W': W,
        'X': X,
        'Z': Z,
        'U': U
    }

    return results

In [14]:
'''Part B'''

simulate_b = simulation1(10000, [0.25, 0.25], [0.5, 1])
correlation_b = np.corrcoef(simulate_b['D'], simulate_b['U'])[0, 1]
print(f"Correlation between di and ui: {correlation_b}")

Correlation between di and ui: 0.3996441138430648


In [15]:
'''Part C'''
def OLS_est(X, Y):
    beta_hat = np.linalg.inv(X.T @ X) @ (X.T @ Y)
    return beta_hat

beta_hat = OLS_est(simulate_b['D'].reshape(-1, 1), simulate_b['Y'])
print(f"Estimated beta from OLS: {beta_hat[0]}")
print(f"Interpretation: On average, being treated (D=1) increases the outcome Y by {beta_hat[0]} units compared to not being treated (D=0), holding all else constant.")

Estimated beta from OLS: 1.7797111230232294
Interpretation: On average, being treated (D=1) increases the outcome Y by 1.7797111230232294 units compared to not being treated (D=0), holding all else constant.


In [16]:
'''Part D'''
alpha_hat = OLS_est(simulate_b['Z'].reshape(-1, 1), simulate_b['D'])
print(f"Estimated alpha from first stage regression: {alpha_hat[0]}")
print(f"Interpretation: A one-unit increase in the instrument Z is associated with an average increase of {alpha_hat[0]} units in the treatment D.")

Estimated alpha from first stage regression: 0.6905766526019691
Interpretation: A one-unit increase in the instrument Z is associated with an average increase of 0.6905766526019691 units in the treatment D.
