# **Importing libraries and sample dataset**

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
!pip install bnlearn==0.7.3
!pip install numpy==1.19.5
!pip install pandas==1.3.5

In [4]:
import bnlearn as bn
import pandas as pd
import numpy as np

In [6]:
pd.__version__,np.__version__

('1.3.5', '1.21.5')

In [None]:
df = bn.import_example('sprinkler')
df.head(5)

[bnlearn] >Import dataset..


Unnamed: 0,Cloudy,Sprinkler,Rain,Wet_Grass
0,0,0,0,0
1,1,0,1,1
2,0,1,0,1
3,1,1,1,1
4,1,1,1,1


# **Step 0: How different are our treatment and control groups?**


In [None]:
df.groupby(['Sprinkler','Cloudy']).agg({'Wet_Grass':'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Wet_Grass
Sprinkler,Cloudy,Unnamed: 2_level_1
0,0,225
0,1,466
1,0,263
1,1,46


We have a lot less cloudy days (46) on the group that has the treatment

# **Step 1: Calculating the propensity scores**

The Propensity Score is a conditional probability of being exposed given a set of covariates.

= 

Probability of receiving a treatment (Sprinkler) given that it was cloudy


In [None]:
#Calculating the probability of a cloudy day being part of the treatment

n_cloudy = len(df[df["Cloudy"]==1])
n_sprinkler_cloudy = len(df[(df["Cloudy"]==1) & (df["Sprinkler"]==1)])
e_cloudy = n_sprinkler_cloudy/n_cloudy
print(e_cloudy)

0.08984375


In [None]:
#Calculating the probability of a non cloudy day being part of the treatment

n_non_cloudy = len(df[df["Cloudy"]==0])
n_sprinkler_non_cloudy = len(df[(df["Cloudy"]==0) & (df["Sprinkler"]==1)])
e_non_cloudy = n_sprinkler_non_cloudy/n_non_cloudy
print(e_non_cloudy)

0.5389344262295082


In [None]:
df["propensity"] = np.where(df["Cloudy"]==1,e_cloudy,e_non_cloudy)


In [None]:
df

Unnamed: 0,Cloudy,Sprinkler,Rain,Wet_Grass,propensity
0,0,0,0,0,0.538934
1,1,0,1,1,0.089844
2,0,1,0,1,0.538934
3,1,1,1,1,0.089844
4,1,1,1,1,0.089844
...,...,...,...,...,...
995,1,0,1,1,0.089844
996,1,0,1,1,0.089844
997,1,0,1,1,0.089844
998,0,0,0,0,0.538934


## **Step 2: Pairing each treated unit (sprinkler = 1), with a control unit with similar propensity**

In [None]:
## Creating a dataframe only with 'treated' units
treated = df[df["Sprinkler"]==1]
treated = treated.reset_index(drop=True)
treated.head(5)

Unnamed: 0,Cloudy,Sprinkler,Rain,Wet_Grass,propensity
0,0,1,0,1,0.538934
1,1,1,1,1,0.089844
2,1,1,1,1,0.089844
3,1,1,1,1,0.089844
4,0,1,0,1,0.538934


In [None]:
## Creating a dataframe only with 'untreated' units
untreated = df[df["Sprinkler"]==0]

In [None]:
## Function that adds to the treatment table a control unit with the same propensity score
matched_control = []
def add_matched_control(unit):
    control_unit =untreated[untreated["propensity"]==unit["propensity"]].sample().iloc[0] ## Samples 1 unit of the untreated table
    matched_control.append(control_unit) ## adds to the matched control list

treated.apply(add_matched_control, axis=1)  ## runs function thrtough all the units in the treatment table
matched_control_df = pd.DataFrame(matched_control).reset_index(drop=True)  ## creates a dataframe of matched controls

In [None]:
matched_control_df

Unnamed: 0,Cloudy,Sprinkler,Rain,Wet_Grass,propensity
0,0.0,0.0,1.0,0.0,0.538934
1,1.0,0.0,0.0,0.0,0.089844
2,1.0,0.0,1.0,1.0,0.089844
3,1.0,0.0,1.0,1.0,0.089844
4,0.0,0.0,1.0,1.0,0.538934
...,...,...,...,...,...
304,0.0,0.0,1.0,1.0,0.538934
305,1.0,0.0,1.0,1.0,0.089844
306,0.0,0.0,0.0,0.0,0.538934
307,1.0,0.0,1.0,1.0,0.089844


In [None]:
treated

Unnamed: 0,Cloudy,Sprinkler,Rain,Wet_Grass,propensity
0,0,1,0,1,0.538934
1,1,1,1,1,0.089844
2,1,1,1,1,0.089844
3,1,1,1,1,0.089844
4,0,1,0,1,0.538934
...,...,...,...,...,...
304,0,1,0,1,0.538934
305,1,1,1,1,0.089844
306,0,1,0,1,0.538934
307,1,1,0,1,0.089844


# **Step 3: Calculating the Average Treatment effect**

In [None]:
##merging treatment and control functions
paired_sample = treated.join(matched_control_df, rsuffix="_control")

In [None]:
paired_sample

Unnamed: 0,Cloudy,Sprinkler,Rain,Wet_Grass,propensity,Cloudy_control,Sprinkler_control,Rain_control,Wet_Grass_control,propensity_control
0,0,1,0,1,0.538934,0.0,0.0,1.0,0.0,0.538934
1,1,1,1,1,0.089844,1.0,0.0,0.0,0.0,0.089844
2,1,1,1,1,0.089844,1.0,0.0,1.0,1.0,0.089844
3,1,1,1,1,0.089844,1.0,0.0,1.0,1.0,0.089844
4,0,1,0,1,0.538934,0.0,0.0,1.0,1.0,0.538934
...,...,...,...,...,...,...,...,...,...,...
304,0,1,0,1,0.538934,0.0,0.0,1.0,1.0,0.538934
305,1,1,1,1,0.089844,1.0,0.0,1.0,1.0,0.089844
306,0,1,0,1,0.538934,0.0,0.0,0.0,0.0,0.538934
307,1,1,0,1,0.089844,1.0,0.0,1.0,1.0,0.089844


In [None]:
ATE = (paired_sample["Wet_Grass"]-paired_sample["Wet_Grass_control"]).mean()
ATE

0.6245954692556634