In [17]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

reg = LinearRegression(fit_intercept=False)

In [None]:
'''Part A'''

def simulation1(N, gamma, beta):

    sample_mean = np.zeros(3)

    sample_covariance = np.array([
        [1, 0, 0],
        [0, 1, 0.5],
        [0, 0.5, 1]
    ])

    samples = np.random.multivariate_normal(sample_mean, sample_covariance, N)
    X, W, U = samples[:, 0], samples[:, 1], samples[:, 2]
    
    Z = np.where(X > 0, 1, 0)
    D = np.where(gamma[0] + gamma[1]*Z + W > 0, 1, 0)
    Y = beta[0] + beta[1]*D + U
    
    results = {
        'Y': Y,
        'D': D,
        'W': W,
        'X': X,
        'Z': Z,
        'U': U,
    }

    return results

In [36]:
'''Part B'''

sample_gamma = [0.25, 0.25]
sample_beta = [0.5, 1]

simulate_b = simulation1(10000, sample_gamma, sample_beta)
correlation_b = np.corrcoef(simulate_b['D'], simulate_b['U'])[0, 1]
print(f"Correlation between D and U: {correlation_b:.4f}")

Correlation between D and U: 0.3926


In [37]:
'''Part C'''
def intercept(X):
    return np.column_stack([np.ones(len(X)), X])

reg.fit(intercept(simulate_b['D']), simulate_b['Y'])
beta_0 = reg.coef_[0]
beta_1 = reg.coef_[1]

print(f"β₀ (intercept): {beta_0:.4f}")
print(f"β₁ (treatment effect): {beta_1:.4f}")
print(f"Interpretation: On average, being treated (D=1) increases outcome Y by {beta_1:.4f} units")
print(f"compared to not being treated (D=0). The baseline outcome when D=0 is {beta_0:.4f}.")

β₀ (intercept): -0.0271
β₁ (treatment effect): 1.8157
Interpretation: On average, being treated (D=1) increases outcome Y by 1.8157 units
compared to not being treated (D=0). The baseline outcome when D=0 is -0.0271.


In [38]:
'''Part D'''
reg.fit(intercept(simulate_b['Z']), simulate_b['D'])
alpha_0 = reg.coef_[0]
alpha_1 = reg.coef_[1]

print(f"α₀ (intercept): {alpha_0:.4f}")
print(f"α₁ (instrument effect): {alpha_1:.4f}")
print(f"Interpretation:")
print(f"- α₀: {alpha_0:.1%} of people with Z=0 receive treatment (D=1)")
print(f"- α₁: Having Z=1 increases probability of treatment by {alpha_1:.1%}")
print(f"- Total: {alpha_0 + alpha_1:.1%} of people with Z=1 receive treatment")

α₀ (intercept): 0.6072
α₁ (instrument effect): 0.0769
Interpretation:
- α₀: 60.7% of people with Z=0 receive treatment (D=1)
- α₁: Having Z=1 increases probability of treatment by 7.7%
- Total: 68.4% of people with Z=1 receive treatment


In [39]:
'''Part E'''
reg.fit(intercept(simulate_b['X']), simulate_b['D'])
delta_0 = reg.coef_[0]
delta_1 = reg.coef_[1]

print(f"δ₀ (intercept): {delta_0:.4f}")
print(f"δ₁ (effect of X on D): {delta_1:.4f}")
print(f"Interpretation:")
print(f"- δ₀: {delta_0:.1%} of people with X=0 receive treatment (D=1)")
print(f"- δ₁: A one-unit increase in X increases probability of treatment by {delta_1:.1%}")
print(f"- Total: {delta_0 + delta_1:.1%} of people with X=1 receive treatment")

δ₀ (intercept): 0.6453
δ₁ (effect of X on D): 0.0333
Interpretation:
- δ₀: 64.5% of people with X=0 receive treatment (D=1)
- δ₁: A one-unit increase in X increases probability of treatment by 3.3%
- Total: 67.9% of people with X=1 receive treatment


Comparatively, Z is shown to have a more significant positive effect on D.

In [42]:
'''Part F'''
covZY = np.cov(simulate_b['Z'], simulate_b['Y'])[0, 1]
covZD = np.cov(simulate_b['Z'], simulate_b['D'])[0, 1]
covXY = np.cov(simulate_b['X'], simulate_b['Y'])[0, 1]
covXD = np.cov(simulate_b['X'], simulate_b['D'])[0, 1]

beta_IV_Z = covZD / covZY
beta_IV_X = covXY / covXD

print("IV ESTIMATES:")
print(f"Using Z as instrument: β₁ᴵⱽ = {beta_IV_Z:.4f}")
print(f"Using X as instrument: β₁ᴵⱽ = {beta_IV_X:.4f}")

print()
print("Analysis:")
print(f"Note that true β₁ = {sample_beta[1]}")
print(f"Using Z as instrument gives estimate that differs from true β₁ by {beta_IV_Z - sample_beta[1]:.4f} which is an {'overestimate' if beta_IV_Z > sample_beta[1] else 'underestimate'}")
print(f"Using X as instrument gives estimate that differs from true β₁ by {beta_IV_X - sample_beta[1]:.4f} which is an {'overestimate' if beta_IV_X > sample_beta[1] else 'underestimate'}")

IV ESTIMATES:
Using Z as instrument: β₁ᴵⱽ = 1.5294
Using X as instrument: β₁ᴵⱽ = 0.7692

Analysis:
Note that true β₁ = 1
Using Z as instrument gives estimate that differs from true β₁ by 0.5294 which is an overestimate
Using X as instrument gives estimate that differs from true β₁ by -0.2308 which is an underestimate


#### Part G
For true γ1 with IV zi to have a LATE interpretation, di must meet the monotonicity condition where di(1) >= di(0).
Therefore, γ1 >= 0 must be true. This is ensures that:
- When zi = 1, individual i is more likely to recieve treatment di
- When zi = 0, individual i is less likely to recieve treatment di

In terms of compliance groups, β1 is the estimate for only the compliers. This is for the subgroup of the sample population where the binary treatment status (di) changes with the treatment (zi).