In [16]:
import numpy as np
import math
import pandas as pd

In [17]:
lowDim_dataset = pd.read_csv('../data/lowDim_dataset.csv')
highDim_dataset = pd.read_csv('../data/highDim_dataset.csv')

In [20]:
def stratify(df):
    
    Y = df['Y']
    D = df['A']
    scores = df['propensity_scores']
    
    # Create stratum and stratum limits
    Q1 = np.quantile(scores, .20)
    Q2 = np.quantile(scores, .40)
    Q3 = np.quantile(scores, .60)
    Q4 = np.quantile(scores, .80)
    Q5 = np.quantile(scores, 1.0)
    
    quin1 = df[df['propensity_scores']<= Q1]
    quin2 = df[(df['propensity_scores']> Q1) & (df['propensity_scores']<= Q2)]
    quin3 = df[(df['propensity_scores']> Q2) & (df['propensity_scores']<= Q3)]
    quin4 = df[(df['propensity_scores']> Q3) & (df['propensity_scores']<= Q4)]
    quin5 = df[df['propensity_scores']> Q4]

    quintiles = [quin1, quin2, quin3, quin4, quin5]
    Q_ranges = [None, Q1, Q2, Q3, Q4, Q5]

    return [quintiles, Q_ranges]

In [40]:
def strat_ATE(quintiles, Q_ranges):
    results = []
    N = sum([len(quintiles[0]),len(quintiles[1]),len(quintiles[2]),len(quintiles[3]),len(quintiles[4])])
    
    for i, stratum in enumerate(quintiles): 
        i+=1
        
        Nj = len(stratum)                      # Number of ind in stratum
        N1j = stratum['A'].value_counts()[1]   # Number of treated ind
        N0j = stratum['A'].value_counts()[0]   # Number of control ind
        
        sum1 = 0
        sum2 = 0

        # Summation of treated samples within strata
        sum1 = sum([Y*T for Y,T in zip(stratum['Y'],stratum['A'])])
        # Summation of untreated samples within strata
        sum2 = sum([(1-T)*Y for Y,T in zip(stratum['Y'],stratum['A'])]) 

        results.append(Nj/N * ((sum1/N1j)-(sum2/N0j)))

    return sum(results)

**lowDim_dataset**

In [33]:
# Get Calculated Propensity Scores 
lowDim_scores = pd.read_csv('../output/low_dim_propensity_scores.csv') 
lowDim_scores.insert( 1 , "Y" , lowDim_dataset['Y']) 
lowDim_scores.insert( 2 , "A" , lowDim_dataset['A'])

In [37]:
quintiles , Q_ranges = stratify(lowDim_scores) 
print( "Estimated ATE: " , strat_ATE(quintiles ,Q_ranges))

Estimated ATE:  2.463529123502176


**highDim_dataset**

In [38]:
# Get Calculated Propensity Scores 
highDim_scores = pd.read_csv('../output/high_dim_propensity_scores.csv') 
highDim_scores.insert( 1 , "Y" , highDim_dataset['Y']) 
highDim_scores.insert( 2 , "A" , highDim_dataset['A'])

In [39]:
quintiles , Q_ranges = stratify(highDim_scores) 
print( "Estimated ATE: " , strat_ATE(quintiles ,Q_ranges))

Estimated ATE:  -3.0103245509000693
