# Causal Trees

### Installations

In [4]:
%pip install -q econml

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


### Package Imports

In [9]:
import pandas as pd
from econml.cate_interpreter import SingleTreeCateInterpreter
from econml.policy import DRPolicyTree
from econml.cate_interpreter import SingleTreePolicyInterpreter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import _tree

### Class Definitions and Functions

In [34]:
class ABCausal:
    
    def __init__(self, df, test_train_ratio = 0.25, effect_variable = 'Y', treatment_variable = 'T', redundant_features = [], scale_feature_matrix = False):
        
        self.default_params_causal_tree = {
            'max_depth': 4,
            'min_samples_split': 1_000,
            'min_samples_leaf': 300,
            'honest': True,
            'random_state': 42,
            'mc_iters': 10
        }
        
        if test_train_ratio > 0.0:
            self.df_train, self.df_test = train_test_split(df, test_size = test_train_ratio, random_state=42)
            self.Y_train, self.T_train, self.X_train = self.return_variables(self.df_train, effect_variable, treatment_variable,
                                                                             redundant_features = redundant_features,scale_feature_matrix=scale_feature_matrix)
            self.Y_test, self.T_test, self.X_test = self.return_variables(self.df_test, effect_variable, treatment_variable, 
                                                                          scale_feature_matrix=scale_feature_matrix)
       
        
        else:
            self.df_train = df
            self.Y_train, self.T_train, self.X_train = self.return_variables(self.df_train, effect_variable, treatment_variable,
                                                                             redundant_features = redundant_features, scale_feature_matrix=scale_feature_matrix)

    def return_variables(self, df, effect_variable, treatment_variable, redundant_features = [], scale_feature_matrix = False):
        
        Y = df[effect_variable]
        T = df[treatment_variable]
        X = df.reset_index(drop=True).drop([effect_variable, treatment_variable] + redundant_features , axis=1)
        if scale_feature_matrix:
            X = StandardScaler().fit_transform(X)
            
        return Y, T, X
    
    
        
    def return_leaf_node_effectsize(self):
        """
        Terminal Nodes have feature index -2
        """
        
        tree = self.causal_tree.policy_model_.tree_model_.tree_
        node_values = self.causal_tree.policy_model_.tree_.value
        feature, n_samples  = self.return_tree_features(tree) 
        net_effect =  [n_samples[leaf_node] * (-1) * node_values[leaf_node, 1] for leaf_node in np.where(feature == -2)[0] if node_values[leaf_node, 1] < 0] # effect over status quo: everyone gets treatment (T=1)
        #net_effect =  [n_samples[leaf_node] * node_values[leaf_node, 1] for leaf_node in np.where(feature == -2)[0] if node_values[leaf_node, 1] >= 0] # effect over status: everyone gets nothing
        n_samples_negative = sum([n_samples[leaf_node] for leaf_node in np.where(feature == -2)[0] if node_values[leaf_node, 1] <= 0])
        n_samples_positive = sum([n_samples[leaf_node] for leaf_node in np.where(feature == -2)[0] if node_values[leaf_node, 1] > 0])
        percentage_without_email = n_samples_negative / ( n_samples_negative + n_samples_positive)
        print(f'{percentage_without_email:.2%} should get no treatment.')

        additional_outcomes = sum(net_effect)[0]
        n_groups_with_effect = len(net_effect)
        # if the tree is honest, only half of the samples are used in the tree evaluation
        if self.causal_tree.policy_model_.tree_model_.get_params()["honest"]:
            additional_outcomes *= 2
        return n_groups_with_effect, additional_outcomes
      
    def return_tree_features(self, tree): 
        feature = tree.feature
        n_samples = tree.weighted_n_node_samples
        return feature, n_samples 
        
    
    def define_causal_tree(self, params = None):
        if not params:
            params = self.default_params_causal_tree
        self.causal_tree = DRPolicyTree(**params)
        
    def fit_causal_tree(self):
        self.causal_tree.fit(self.Y_train, self.T_train, X=self.X_train, W=None )
        
    def return_treatment_mean(self):
        return self.causal_tree.predict(self.X_train).mean()   
    
    def return_outcome_mean(self):
        return self.Y_train.mean()
    
    def return_tree_policy(self):
        return self.causal_tree.policy_model_
    
    def plot_tree(self):
        plt.figure(figsize=(100,25))
        self.causal_tree.plot()
        
    def model_causal_tree(self):
        self.define_causal_tree(default_params=params)
        self.fit_causal_tree()
        print(f"Mean: {self.return_treatment_mean():0.2%}")
        n_groups, additional_outcomes = self.return_leaf_node_effectsize()
        self.impact_summary()
        self.plot_tree()
        #simple_CTR_sales_scaling(additional_outcomes)

    
    def impact_summary(self):
        n_groups, additional_outcomes = self.return_leaf_node_effectsize()
        original_outcomes_treatment = int(self.Y_train[self.T_train == 1].sum()/self.T_train.mean())
        original_outcomes_no_treatment = int(self.Y_train[self.T_train == 0].sum()/(1 - self.T_train.mean()))
    
        if isinstance(additional_outcomes, list):
            additional_outcomes = additional_outcomes[0]
        perc_increase_treatment = additional_outcomes/original_outcomes_treatment
        perc_increase_no_treatment = (additional_outcomes+original_outcomes_treatment)/original_outcomes_no_treatment - 1
        
        print(f"Causal impact: {float(additional_outcomes):0.2f} additional desired actions in {n_groups} selected target group(s)!\n"
              f"This represents a percentage increase of {perc_increase_treatment:.2%} over 100% treatment\n"
              f"This represents a percentage increase of {perc_increase_no_treatment:.2%} over 0% treatment\n"
              f"0% treatment: {original_outcomes_no_treatment} vs 100% treatment: {original_outcomes_treatment}"
             )

### Load Data

In [36]:
df = pd.read_csv('data.csv')

df['day'] = pd.to_datetime(df['day'])
df['weekday'] = df['day'].dt.weekday

In [37]:
df.head()

Unnamed: 0,user_id,day,ch1,ch2,treated,kpi,weekday
0,0,2023-11-17,19,4,0,70,4
1,0,2023-11-18,62,4,0,27,5
2,0,2023-11-19,43,4,0,42,6
3,0,2023-11-20,78,4,0,4,0
4,0,2023-11-21,77,4,0,21,1


In [38]:
df = df.apply(pd.to_numeric, errors='coerce')

### Fit Causal Tree

In [43]:
abc = ABCausal(df, effect_variable = 'kpi', treatment_variable = 'treated',redundant_features=['day'], scale_feature_matrix = True)

In [45]:
abc.define_causal_tree()
abc.fit_causal_tree()

In [None]:
 print(f"Mean: {self.return_treatment_mean():0.2%}")

In [None]:
# Plot
abc.causal_tree.plot()

AttributeError: '_PolicyModelFinal' object has no attribute 'model_cate'

In [None]:
# Impact Summary
abc.impact_summary()

In [None]:
dd