In [1]:
import pandas as pd
import numpy as np

from src.models.iv import IV
from src.models.linreg import LinReg
from src.displays.display_linear import display_models

In [2]:
magic = pd.read_csv('../data/father_education.csv')
magic

Unnamed: 0,wage,educ,ability,fathereduc
0,146.347807,18.053194,348.240021,17.158322
1,147.599580,15.845486,181.160724,13.988533
2,161.820228,15.105207,337.367725,15.994311
3,105.082941,16.458131,106.458032,21.413172
4,167.562196,18.793815,301.510006,16.457630
...,...,...,...,...
995,157.149509,14.883227,461.290475,11.446059
996,166.003573,14.542572,292.700272,16.201575
997,155.693604,18.022844,278.575512,16.739413
998,199.074623,18.885769,346.645549,15.543786


In [3]:
iv = IV(df = magic,
        outcome='wage',
        independent=['educ'],
        controls=[],
        instruments=['fathereduc'])

iv.summary()

In [10]:
"""Perfect world where we can measure individual ability"""
perfect_model =  LinReg(df = magic,
                        outcome='wage',
                        independent=['ability', 'educ'])

perfect_model.summary(content_type='html')

In [11]:
"""Naive model"""

naive_model = LinReg(df = magic,
                     outcome='wage',
                     independent=['educ'])

naive_model.summary(content_type='html')

To be a valid instrument, it must meet three criteria:

Relevance: Instrument is correlated with policy variable
Exclusion: Instrument is correlated with outcome only through the policy variable
Exogeneity: Instrument isn’t correlated with anything else in the model (i.e. omitted variables)

In [12]:
relevence = LinReg(df = magic,
                     outcome='educ',
                     independent=['fathereduc'])

relevence.summary(content_type='html')

In [13]:
"""Exclusion"""

exclusion = LinReg(df = magic,
                     outcome='wage',
                     independent=['fathereduc'])

exclusion.summary(content_type='html')

In [14]:
"""Luckily in this toy dataset we have ability"""

exclusion = LinReg(df = magic,
                     outcome='ability',
                     independent=['fathereduc'])

exclusion.summary(content_type='html')

In [15]:
"""first stage"""

first_stage = LinReg(df = magic,
                     outcome='educ',
                     independent=['fathereduc'])

first_stage.summary(content_type='html')    

In [16]:
predicted_educ = first_stage.predict(magic['fathereduc'].values)
magic = magic.assign(independent_hat = predicted_educ)
magic

Unnamed: 0,wage,educ,ability,fathereduc,independent_hat
0,146.347807,18.053194,348.240021,17.158322,17.382994
1,147.599580,15.845486,181.160724,13.988533,14.983731
2,161.820228,15.105207,337.367725,15.994311,16.501936
3,105.082941,16.458131,106.458032,21.413172,20.603558
4,167.562196,18.793815,301.510006,16.457630,16.852630
...,...,...,...,...,...
995,157.149509,14.883227,461.290475,11.446059,13.059292
996,166.003573,14.542572,292.700272,16.201575,16.658818
997,155.693604,18.022844,278.575512,16.739413,17.065916
998,199.074623,18.885769,346.645549,15.543786,16.160927


In [18]:
second_stage = LinReg(df = magic,
                     outcome='wage',
                     independent=['independent_hat',])

display_models([perfect_model, naive_model, second_stage, iv.second_stage_model]) 