### Ex 12.8.1 Omitted Confounder Bias
When we have an unobserved variable that drives variation in both the observed outcome and the treatment variable. This can lead to misattribution of causal effect, where the unobserved variable (the confounder) is causing an outcome, but that effect is being attributed to the treatment state that is also driven. For instance intellectual ability could both cause education and high wages, but then if you regress wage on education, the education state will take credit for that variation.

#### Demonstrating Bias caused by unobserved ability
Here we perform a regression where we show how the unobserved confounder of ability induces bias in our estimatiuon of the impact of schooling on wages. 

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

### Generate Data 
N = 100000
A = np.random.uniform(size=N)*5 # Ability
Z = np.random.normal(size=N) # Distance to School
D = Z + A*2 + np.random.normal(size=N) # Schooling
Y = D + A # Wage

data = pd.DataFrame({"Wage": Y, "Schooling":D, "Distance":Z, "Ability":A})


### Naive Regression
naive = sm.add_constant(data[["Schooling"]])
naive_fit = sm.OLS(data["Wage"], naive).fit()
magic = sm.add_constant(data[["Ability","Schooling"]])
magic_fit = sm.OLS(data["Wage"], magic).fit()

print(f"True Effect: {1}")
print(f"Naive OLS, impact of Schooling: {naive_fit.params["Schooling"]}")
print(f"Regression With Ability Observed: {magic_fit.params["Schooling"]}")


True Effect: 1
Naive OLS, impact of Schooling: 1.4016796762165313
Regression With Ability Observed: 0.9999999999999998


#### Demonstrating sensitivity


In [95]:
def estimate(gamma, delta):
    N = 25000
    A = np.random.normal(size=N) # Ability
    Z = A*2 + np.random.normal(size=N) # Distance to School
    D = Z + A*delta + np.random.normal(size=N) # Schooling
    Y = D + A*gamma # Wage

    data = pd.DataFrame({"Wage": Y, "Schooling":D, "Distance":Z, "Ability":A})
    naive = sm.add_constant(data[["Schooling"]])
    naive_fit = sm.OLS(data["Wage"], naive).fit()
    

    return naive_fit.params["Schooling"]

gammas = np.arange(0,50)/40 - 10/40
deltas = np.arange(0,50)/40 - 10/40

G, D = np.meshgrid(gammas,deltas)
est = np.zeros(np.shape(G))
for i in range(G.shape[0]):
    for j in range(G.shape[1]):
        est[i,j] = estimate(G[i,j], D[i,j])

import plotly.graph_objects as go

fig = go.Figure(data=[go.Surface(z = est-1, x=G, y=D, colorscale="Blues")])
fig.update_layout(
    title="Omitted Variable Bias Surface",
    scene=dict(
        xaxis_title="gamma (Ability to Wage)",
        yaxis_title="delta (Ability to Schooling)",
        zaxis_title="Induced Bias on OLS"
    ),
    autosize=True,
    width=800,
    height=600,
)