# Get Started with Causai

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DanielhCarranza/causai/blob/master/notebooks/00_Get_Started_with_Causai.ipynb)

If you are running in colab follow the steps below:

``` 
    git clone  https://github.com/DanielhCarranza/causai.git
    cd causai
    pip install -r requirements.txt  
```


In [26]:
from importlib.util import find_spec
if find_spec("causai") is None:
    import sys
    sys.path.append('..')

import pandas as pd

import causai
# from causai.models import SomeCausalModel
# from causai.interpreter import SomeInterpreter
from causai.datasets.syntheticdataset import SyntheticDataset

%autoreload 2
%matplotlib inline
%reload_ext autoreload

In [13]:
data = SyntheticDataset().generate_data()

In [14]:
data.head()

Unnamed: 0,blood_pressure,sodium,age,proteinuria
0,147.157602,1,73.820262,46.140554
1,133.339602,1,67.000786,40.970603
2,139.833515,0,69.89369,41.835567
3,152.500558,1,76.204466,46.401798
4,149.644768,1,74.33779,44.082575


In [16]:
from sklearn.linear_model import LinearRegression
def estimate_causal_effect(Xt, y, model=LinearRegression(), treatment_idx=0, regression_coef=False):
    model.fit(Xt, y)
    if regression_coef:
        return model.coef_[treatment_idx]
    else:
        Xt1 = pd.DataFrame.copy(Xt)
        Xt1[Xt.columns[treatment_idx]] = 1
        Xt0 = pd.DataFrame.copy(Xt)
        Xt0[Xt.columns[treatment_idx]] = 0
        return (model.predict(Xt1) - model.predict(Xt0)).mean()

In [19]:
continuous_t_df= SyntheticDataset().generate_data(binary_treatment=False)
binary_t_df = SyntheticDataset().generate_data()

In [33]:
ate_est_naive = None
ate_est_adjust_all = None
ate_est_adjust_age = None

def adjusment_estimates(df):
    # Adjustment formula estimates
    ate_est_naive = estimate_causal_effect(df[['sodium']], df['blood_pressure'], treatment_idx=0)
    ate_est_adjust_all = estimate_causal_effect(df[['sodium', 'age', 'proteinuria']],
                                                df['blood_pressure'], treatment_idx=0)
    ate_est_adjust_age = estimate_causal_effect(df[['sodium', 'age']], df['blood_pressure'])
    print('# Adjustment Formula Estimates #')
    print('Naive ATE estimate:\t\t\t\t\t\t\t', ate_est_naive)
    print('ATE estimate adjusting for all covariates:\t', ate_est_adjust_all)
    print('ATE estimate adjusting for age:\t\t\t\t', ate_est_adjust_age)
    print()

def regression_coef_estimates(df):
    # Linear regression coefficient estimates
    ate_est_naive = estimate_causal_effect(df[['sodium']], df['blood_pressure'], treatment_idx=0,
                                            regression_coef=True)
    ate_est_adjust_all = estimate_causal_effect(df[['sodium', 'age', 'proteinuria']],
                                                df['blood_pressure'], treatment_idx=0,
                                                regression_coef=True)
    ate_est_adjust_age = estimate_causal_effect(df[['sodium', 'age']], df['blood_pressure'],
                                                regression_coef=True)
    print('# Regression Coefficient Estimates #')
    print('Naive ATE estimate:\t\t\t\t\t\t\t', ate_est_naive)
    print('ATE estimate adjusting for all covariates:\t', ate_est_adjust_all)
    print('ATE estimate adjusting for age:\t\t\t\t', ate_est_adjust_age)
    print()

In [30]:
print('  Binary Data ')
print()
adjusment_estimates(binary_t_df)
regression_coef_estimates(binary_t_df)

Binary Data 

# Adjustment Formula Estimates #
Naive ATE estimate:							 4.923220185643586
ATE estimate adjusting for all covariates:	 0.9282888903232218
ATE estimate adjusting for age:				 1.119149017071768

# Regression Coefficient Estimates #
Naive ATE estimate:							 4.92322018564359
ATE estimate adjusting for all covariates:	 0.9282888903232221
ATE estimate adjusting for age:				 1.1191490170717682



In [32]:
print('  Continuous Data ')
print()
adjusment_estimates(continuous_t_df)
regression_coef_estimates(continuous_t_df)

Continuous Data 

# Adjustment Formula Estimates #
Naive ATE estimate:							 3.4548087255162017
ATE estimate adjusting for all covariates:	 0.8625915379860507
ATE estimate adjusting for age:				 1.0481919455639501

# Regression Coefficient Estimates #
Naive ATE estimate:							 3.4548087255162034
ATE estimate adjusting for all covariates:	 0.8625915379860509
ATE estimate adjusting for age:				 1.0481919455639503

