##### Install causalinference toolkit

In [1]:
pip install causalinference

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting causalinference
  Downloading CausalInference-0.1.3-py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.1/51.1 KB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: causalinference
Successfully installed causalinference-0.1.3


##### Import pandas and CausalModel

In [2]:
import numpy as np
import pandas as pd
from causalinference import CausalModel

##### Read the law school csv file (law_simplified.csv)
You should have uploaded that file into colab

In [4]:
df = pd.read_csv("law_simplified.csv")
df['sex'] = df['sex']-1
df.head()

Unnamed: 0,sex,race,LSAT,UGPA,ZFYA
0,0,7,39.0,3.1,-0.98
1,0,7,36.0,3.0,0.09
2,1,7,30.0,3.1,-0.35
3,1,3,39.0,2.2,0.58
4,0,7,37.0,3.4,-1.26


##### CausalModel class requires three numpy arrays: 
    A (sensitive attribute with 0 and 1 values), 
    X (for the covariates). For this example consider LSAT and UGPA as covariates
    Y (outcome)

##### Create those arrays from the raw dataframe, and create the CausalModel

In [5]:
#CausalModel takes as inputs three NumPy arrays: 
# Y, an N-vector of observed outcomes;
# A, an N-vector of treatment status indicators;
# and X, an N-by-K matrix of covariates. 

A = df['sex'].values
X = df[['LSAT', 'UGPA']].values
Y = df['ZFYA'].values

causal = CausalModel(Y, A, X)

##### Estimate the causal effect using:
    Regression
    Stratification
    Weighting 
    Matching

Estimate the causal effect using Regression

In [8]:
causal.est_via_ols()
print(causal.estimates)


Treatment Effect Estimates: OLS

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      0.079      0.012      6.396      0.000      0.054      0.103
           ATC      0.069      0.012      5.602      0.000      0.045      0.094
           ATT      0.086      0.013      6.820      0.000      0.061      0.110



Estimate the causal effect using Stratification

In [9]:
causal.est_propensity_s()
print(causal.propensity)


Estimated Parameters of Propensity Score

                    Coef.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
     Intercept      1.278      0.130      9.833      0.000      1.023      1.532
            X1     -0.838      0.036    -23.386      0.000     -0.909     -0.768
            X0      0.046      0.003     17.235      0.000      0.041      0.051



In [10]:
causal.trim_s()

In [11]:
causal.stratify_s()
print(causal.strata)


Stratification Summary

              Propensity Score         Sample Size     Ave. Propensity   Outcome
   Stratum      Min.      Max.  Controls   Treated  Controls   Treated  Raw-diff
--------------------------------------------------------------------------------
         1     0.260     0.427       853       578     0.392     0.395    -0.046
         2     0.428     0.462       731       618     0.446     0.447     0.074
         3     0.462     0.503      1440      1336     0.484     0.484     0.024
         4     0.504     0.533      1348      1430     0.519     0.519     0.123
         5     0.534     0.561      1202      1485     0.548     0.549     0.119
         6     0.562     0.591      1123      1473     0.576     0.576     0.104
         7     0.591     0.622      1104      1645     0.606     0.606     0.102
         8     0.622     0.666      1003      1732     0.642     0.643     0.066
         9     0.667     0.700       407       914     0.682     0.682     0.147
   

In [12]:
causal.est_via_blocking()
print(causal.estimates)


Treatment Effect Estimates: Blocking

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      0.076      0.012      6.151      0.000      0.052      0.100
           ATC      0.072      0.012      5.807      0.000      0.048      0.096
           ATT      0.079      0.013      6.213      0.000      0.054      0.103



  olscoef = np.linalg.lstsq(Z, Y)[0]


Estimate the causal effect using Weighting and Matching

In [13]:
causal.est_via_weighting()
causal.est_via_matching()
print(causal.estimates)

  wlscoef = np.linalg.lstsq(Z_w, Y_w)[0]



Treatment Effect Estimates: Blocking

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      0.076      0.012      6.151      0.000      0.052      0.100
           ATC      0.072      0.012      5.807      0.000      0.048      0.096
           ATT      0.079      0.013      6.213      0.000      0.054      0.103

Treatment Effect Estimates: Weighting

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      0.077      0.012      6.290      0.000      0.053      0.101

Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      0.074      0.013      5.570      0.000      0.048   