# Estimation de l'ATE/CATE sur des données synthétiques

## Import

In [1]:
import numpy as np
import pandas as pd
#import causalml
from scipy.stats import bernoulli
import scipy as sp
#from scipy import st
from scipy import integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("white")
sns.set_context("paper", font_scale=1.5, rc={"lines.linewidth": 1.5})
plt.rcParams['figure.figsize'] = 10, 8

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor

import warnings
warnings.filterwarnings('ignore')

## Génération de données synthétiques

In [2]:
def treatment_assign(Nobs, d, X, p):
    '''
    Input: 
    
    p : score de propension.
    Nobs : Nombre de lignes da la matrice X i.e. nombre de personnes.
    
    Output:
    
    W : Vecteur de taille Nobs contenant des 0 ou 1 pour désigner l'affectation du traitement.
    '''
    sigmoid = lambda x: 1/(1+np.exp(-x))
    
    omega = np.random.uniform(0, 1, (Nobs, d))
    psi = np.random.uniform(0, 1, (Nobs, 1))

    if p == None:
      p = np.zeros(Nobs)
      for i in range(Nobs):
        p[i] = sigmoid(omega[i] @ X[i])
      W = bernoulli.rvs(p, size = Nobs) 
    else:
      W = bernoulli.rvs(p, size = Nobs) 
    
    return W


def causal_generation(Nobs, dim, beta, bias, f, g, p):
    '''
    Input :
    
    Nobs : Nombre de lignes da la matrice X i.e. nombre de personnes.
    dim : Nombre de colonnes de la matrice X i.e. nombres de caractéristiques (features).
    beta : Vecteur de dimension (2, dim).
    bias : Vecteur de dimension (1, 2).
    W : Vecteur de dimension (1, Nobs) contenant des 0 ou 1 pour désigner 
    l'affectation du traitement.
    f et g sont des fonctions.
    
    Output:
    
    (X, Y, W) : Triplet contenant la matrice X des features, Y le vecteur des 
                résultats potentiels et W le vecteur de l'affectation du traitement.
    '''
    moy = np.zeros(dim)
    var = np.eye(dim)
    X = np.random.multivariate_normal(moy, var, Nobs)
    Y = np.zeros(Nobs)

    W = treatment_assign(Nobs, dim, X, p)

    for i in range(Nobs):
        bruit = np.random.normal(0, 1)
        if W[i] == 0:
            Y[i] = f(beta[0] @ X[i] + bias[0]) + bruit
        if W[i] == 1:
            Y[i] = g(beta[1] @ X[i] + bias[1]) + bruit
            
    return (X, W, Y)

## Métalearners internes

### S-learners

In [3]:
from sklearn.base import BaseEstimator, ClassifierMixin

class SLearner(BaseEstimator, ClassifierMixin):
    """ Homemade SLearner class """
    
    def __init__(self, base_estimator):
        # init
        self.estimator = base_estimator
        
    def fit(self, X, W, Y):
        # Initiation des variables
        self.X = X
        self.W = W
        self.Y = Y
        self.features = np.hstack((self.X, self.W[:,np.newaxis]))
        self.clf = self.estimator.fit(self.features, self.Y)

    def predict_CATE(self, x):
        # Complete the method      
        self.Y_0_hat = self.clf.predict(np.c_[x, np.zeros(len(x))])
        self.Y_1_hat = self.clf.predict(np.c_[x, np.ones(len(x))])
        return self.Y_1_hat - self.Y_0_hat

    def predict_ATE(self):
        return (self.Y_1_hat - self.Y_0_hat).mean()

### T-learners

In [4]:
from sklearn.base import BaseEstimator, ClassifierMixin

class TLearner(BaseEstimator, ClassifierMixin):
    """ Homemade TLearner class """
    
    def __init__(self, base_estimator0, base_estimator1):
        # init
        self.estimator0 = base_estimator0
        self.estimator1 = base_estimator1

    def fit(self, X, W, Y):
        # Initiation des variables
        self.X = X
        self.W = W
        self.Y = Y
        self.mu_0 = self.estimator0.fit(X[self.W==0,:], self.Y[self.W==0])
        self.mu_1 = self.estimator1.fit(X[self.W==1,:], self.Y[self.W==1])

    def predict_CATE(self, x):
        # Complete the method         
        self.Y_0_hat = self.mu_0.predict(x)
        self.Y_1_hat = self.mu_1.predict(x)
        return self.Y_1_hat - self.Y_0_hat

    def predict_ATE(self):
        return (self.Y_1_hat - self.Y_0_hat).mean()

### X-Learners

In [5]:
from sklearn.base import BaseEstimator, ClassifierMixin

class XLearner(BaseEstimator, ClassifierMixin):
    """ Homemade XLearner class """
    
    def __init__(self, outcome_learner0, outcome_learner1, effect_learner0, effect_learner1):
        # init
        self.outcome_learner0 = outcome_learner0
        self.outcome_learner1 = outcome_learner1
        self.effect_learner0 = effect_learner0
        self.effect_learner1 = effect_learner1

    def fit(self, X, W, Y):
        # Initiation des variables
        self.X = X
        self.W = W
        self.Y = Y 
        
        #Stage 1 : Estimate the average outcomes μ0(x) and  μ1(x)
        self.mu_0 = self.outcome_learner0.fit(X[self.W==0,:], self.Y[self.W==0])
        self.mu_1 = self.outcome_learner1.fit(X[self.W==1,:], self.Y[self.W==1])
        
        #Stage 2 : Impute the user level treatment effects
        self.D0 = self.mu_1.predict(X[self.W==0,:]) - self.Y[self.W==0] 
        self.D1 = self.Y[self.W==1] - self.mu_0.predict(X[self.W==1,:])    
        
        #estimate τ1(x) = E[D1|X=x], and τ0(x) = E[D0|X=x] using machine learning models:
        self.tau_0 = self.effect_learner0 .fit(X[self.W==0,:], self.D0)
        self.tau_1 = self.effect_learner1 .fit(X[self.W==1,:], self.D1)
        

    def predict_CATE(self, x, p):
        # Complete the method         
        self.CATE_hat = p*self.tau_0.predict(x) + (1-p)*self.tau_1.predict(x)
        return self.CATE_hat

    def predict_ATE(self):
        return (self.CATE_hat).mean()

### DR-Learner

In [6]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV

class DRLearner(BaseEstimator, ClassifierMixin):
    """ Homemade DRLearner class """
    
    def __init__(self, model_regression, model_propensity, model_final):
        # init
        self.model_regression = model_regression
        self.model_propensity = model_propensity
        self.model_final = model_final
        

    def fit(self, X, W, Y):
        # Initiation des variables
        self.X = X
        self.W = W
        self.Y = Y 
        
        #Stage 1 : Regression of the outcomes μ(X,T) = E[Y|X,W,T]
        self.features = np.hstack((self.X, self.W[:,np.newaxis]))
        self.mu = self.model_regression.fit(self.features, self.Y)
        
        #Stage 1 : Model to estimate the propensity_score
        self.model_propensity = CalibratedClassifierCV(self.model_propensity)
        self.model_propensity.fit(self.X, self.W)
        self.propensity = self.model_propensity.predict_proba(X)

        #Stage 1 : predict Y_pred
        self.Y_pred_0 = self.mu.predict(np.hstack((self.X, np.zeros((self.X.shape[0],1)))))
        self.Y_pred_0 += (Y - self.Y_pred_0) * (1 - self.W) / self.propensity[:,0]
        self.Y_pred_1 = self.mu.predict(np.hstack((self.X, np.ones((self.X.shape[0],1)))))
        self.Y_pred_1 += (Y - self.Y_pred_1) * (self.W) / self.propensity[:,1]
        
        #Stage 2 : fit model final
        self.model_final.fit(self.X, self.Y_pred_1 - self.Y_pred_0)
        

    def predict_CATE(self, x):
        # Complete the method         
        self.CATE_hat = self.model_final.predict(x)
        return self.CATE_hat

    def predict_ATE(self):
        return (self.CATE_hat).mean()

## Phase de test

### Initialisation des paramètres

In [7]:
N = 1000
d = 2                                       # d = 2, afin de pouvoir être calculé par intégration et par Monte Carlo
p = 0.7
beta0 = np.random.uniform(1, 30, (1, d))
beta1 = np.random.uniform(2000,4000, (1, d))
beta = np.vstack((beta0,beta0))               # beta0 = beta1           
bias = np.array([100,10])                 # beta0 = beta1, cas simple pour faciliter l'interprétation des résultats                              # Gamma0 != Gamma1, biais différent
f = lambda x:x
g = lambda x:x

### Générations des données

In [8]:
# Génération des données
X, W, Y = causal_generation(N, d, beta, bias, f, g, p)

### Prédictions des métalearners "Team Filrouge"

#### S-Learner

In [9]:
from sklearn.ensemble import RandomForestRegressor

def run_slearner(X, W, Y, baselearner):
  slearner = SLearner(base_estimator = baselearner)
  slearner.fit(X,W,Y)
  
  cate_hat_S = slearner.predict_CATE(X)
  #print("- Les dimensions du CATE = {}.".format(cate_hat_S.shape))
  ate_hat_S = slearner.predict_ATE()
  #print("- L'estimation de la valeur de l'ATE = {}.".format(ate_hat_S))
  return ate_hat_S

In [10]:
run_slearner(X, W, Y, GradientBoostingRegressor())

-89.31109774343271

#### T-Learner

In [11]:
def run_tlearner(X, W, Y, baselearner0, baselearner1):
  tlearner = TLearner(base_estimator0 = baselearner0, 
                      base_estimator1 = baselearner1)
  tlearner.fit(X,W,Y)

  cate_hat_T = tlearner.predict_CATE(X)
  #print("- Les dimensions du CATE = {}.".format(cate_hat_S.shape))
  ate_hat_T = tlearner.predict_ATE()
  #print("- L'estimation de la valeur de l'ATE = {}.".format(ate_hat_T))
  return ate_hat_T

In [12]:
run_tlearner(X, W, Y, GradientBoostingRegressor(), RandomForestRegressor())

-89.87874762856057

#### X-Learner

In [13]:
def run_xlearner(X, W, Y, outcome_learner0, outcome_learner1, effect_learner0, effect_learner1):
  xlearner = XLearner(outcome_learner0, outcome_learner1, 
                      effect_learner0, effect_learner1)
  xlearner.fit(X,W,Y)
  cate_hat_X = xlearner.predict_CATE(X, W)
  ate_hat_X = xlearner.predict_ATE()
  return ate_hat_X

In [14]:
run_xlearner(X, W, Y, LinearRegression(), LinearRegression(),
             LinearRegression(), LinearRegression())

-89.96880186743039

In [15]:
"""
# classifier to estimate the propensity score
cls = LogisticRegression()
# calibration of the classifier
cls = CalibratedClassifierCV(cls)
# training of the classifier
cls.fit(X, W)
# predicton of the classifier
propensity = cls.predict_proba(X)[:,1]

#plt.hist(propensity)

xlearner = XLearner()
xlearner.fit(X,W,Y)

cate_hat_X = xlearner.predict_CATE(X, propensity)

ate_hat_X = xlearner.predict_ATE()
print("- L'estimation de la valeur de l'ATE = {}.".format(ate_hat_X))
"""

'\n# classifier to estimate the propensity score\ncls = LogisticRegression()\n# calibration of the classifier\ncls = CalibratedClassifierCV(cls)\n# training of the classifier\ncls.fit(X, W)\n# predicton of the classifier\npropensity = cls.predict_proba(X)[:,1]\n\n#plt.hist(propensity)\n\nxlearner = XLearner()\nxlearner.fit(X,W,Y)\n\ncate_hat_X = xlearner.predict_CATE(X, propensity)\n\nate_hat_X = xlearner.predict_ATE()\nprint("- L\'estimation de la valeur de l\'ATE = {}.".format(ate_hat_X))\n'

#### DR-Learner

In [16]:
def run_drlearner(X, W, Y, model_regression, model_propensity, model_final):
  drlearner = DRLearner(model_regression, model_propensity, model_final)
  drlearner.fit(X,W,Y)

  cate_hat_dr = drlearner.predict_CATE(X)
  #print("- Les dimensions du CATE = {}.".format(cate_hat_dr.shape))
  ate_hat_dr = drlearner.predict_ATE()
  #print("- L'estimation de la valeur de l'ATE = {}.".format(ate_hat_dr))
  return ate_hat_dr

In [17]:
run_drlearner(X, W, Y, LinearRegression(), LogisticRegression(), LinearRegression())

-89.96896957620633

## Calcul Paradis

#### Calcul MSE

In [18]:
def MSE(y,y_pred):
    return 1/2*(y-y_pred)**2

#### Calcul de l'ATE

In [19]:
def ATE_paradis(beta, bias, f=lambda i:i, g=lambda i:i):
    p = beta.shape[1]
    ate = 0
    if p==1:
        ate = integrate.quad(lambda x: (g(beta[1]*x + bias[1]) -f(beta[0]*x + bias[0]))*sp.stats.norm.pdf(x,0,1),-1000 , 1000)
    if p==2:
        ate=integrate.dblquad(lambda x, y: (g(beta[1,0]*x + beta[1,1]*y + bias[1]) -f(beta[0,0]*x + beta[0,1]*y + bias[0])
                                           )*sp.stats.norm.pdf(x,0,1)*sp.stats.norm.pdf(y,0,1),-1000 , 1000, lambda y :-1000,lambda y : 1000)
    if p>2:
        return "dimension above 2"
    return  ate

In [20]:
def monte_carlo(Nobs, dim, beta, bias, f, g):
    '''
    Input :
    
    Nobs : Nombre de lignes da la matrice X i.e. nombre de personnes.
    dim : Nombre de colonnes de la matrice X i.e. nombres de caractéristiques (features).
    beta : Vecteur de dimension (2, dim), note dim doit être < 10
    bias : Vecteur de dimension (1, 2).
    W : Vecteur de dimension (1, Nobs) contenant des 0 ou 1 pour désigner l'affectation du traitement.
    f et g sont des fonctions.
    
    Output:
    
    ATE : ATE calculé par la méthode de Monte Carlo
    '''
    moy = np.zeros(dim)
    var = np.eye(dim)
    X = np.random.multivariate_normal(moy, var, Nobs)
    ATE = np.mean(g(X.dot(beta[1])+ bias[1]) - f(X.dot(beta[0])+ bias[0])) 
            
    return ATE

In [21]:
print('ATE calculé par intégration: {}'.format(ATE_paradis(beta, bias, f=f, g=g)))
print('ATE calculé par Monte Carlo: {}'.format(monte_carlo(10**6, d, beta, bias, f, g)))

ATE calculé par intégration: (-89.99999999775461, 1.4270833668839065e-08)
ATE calculé par Monte Carlo: -90.0


## Génération de tableau



In [22]:
N = 1000
f = lambda x:np.sin(x)
g = lambda x:x

dim = []
bases = []
score_prop = []

base_learners = {"Linear Regression" : LinearRegression(),
                 "Random Forest" : RandomForestRegressor(),
                 "XGboost" : GradientBoostingRegressor()}

res = {"Score propension": score_prop, 
       "Base Learner" : bases,
       "Dimension" : dim,
       "S-Learner": [],  "T-Learner": [],
       "X-Learner": [], "Doubly Robust Learning": []}

In [24]:

for b in list(base_learners.keys()):
  bl = base_learners[b]
  
  for d in [5]:
    beta0 = np.random.uniform(1, 30, (1, d))
    beta = np.vstack((beta0, beta0))                      
    bias = np.array([100, 10])   

    # Real Value ATE
    #print("La valeur réelle de l'ATE en d={} est {}.".format(d, monte_carlo(10**6, d, beta, bias, f, g)))

    for p in [0.1, 0.5, 0.9, None]:
      dim.append(d)
      if p == None:
        score_prop.append("confounding")
      else:
        score_prop.append(p)
      bases.append(b)

      slearner = []
      tlearner = []
      xlearner = []
      drlearner = []

      for _ in range(25):
        
        X, W, Y = causal_generation(N, d, beta, bias, f, g, p)

        # S-Learner
        ate_S = run_slearner(X, W, Y, bl)
        slearner.append(round(ate_S, 3))

        # T-Learner
        ate_T = run_tlearner(X, W, Y, bl, RandomForestRegressor())
        tlearner.append(round(ate_T, 3))

        # X-Learner
        ate_hat_X = run_xlearner(X, W, Y, bl, LinearRegression(),
                                 LinearRegression(), LinearRegression())
        xlearner.append(round(ate_hat_X, 3))

        # Doubly Robust Learning
        ate_dr = run_drlearner(X, W, Y, bl, LogisticRegression(), LinearRegression())
        drlearner.append(round(ate_dr, 3))


      # Results
      s_mean_value = round(np.mean(slearner), 3)
      s_std_value = round(np.std(slearner), 3)
      res["S-Learner"].append(str(s_mean_value) + " ± " + str(s_std_value))

      t_mean_value = round(np.mean(tlearner), 3)
      t_std_value = round(np.std(tlearner), 3)
      res["T-Learner"].append(str(t_mean_value) + " ± " + str(t_std_value))

      x_mean_value = round(np.mean(xlearner), 3)
      x_std_value = round(np.std(xlearner), 3)
      res["X-Learner"].append(str(x_mean_value) + " ± " + str(x_std_value))

      dr_mean_value = round(np.mean(drlearner), 3)
      dr_std_value = round(np.std(drlearner), 3)
      res["Doubly Robust Learning"].append(str(dr_mean_value) + " ± " + str(dr_std_value))



res["Dimension"] = dim
res["Base Learner"] =  bases
res["Score propension"] = score_prop

df = pd.DataFrame(res, columns = list(res.keys()))
df = df.set_index(["Base Learner", "Dimension", "Score propension"])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,S-Learner,T-Learner,X-Learner,Doubly Robust Learning
Base Learner,Dimension,Score propension,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Linear Regression,5,0.1,9.822 ± 3.08,9.687 ± 1.734,9.799 ± 0.848,10.029 ± 3.112
Linear Regression,5,0.5,9.831 ± 1.246,9.742 ± 1.267,9.814 ± 1.235,9.827 ± 1.237
Linear Regression,5,0.9,10.07 ± 3.094,9.759 ± 1.045,9.788 ± 1.086,10.206 ± 2.761
Linear Regression,5,confounding,9.65 ± 1.174,13.526 ± 1.169,9.584 ± 1.206,9.49 ± 1.666
Random Forest,5,0.1,9.964 ± 1.537,10.034 ± 1.532,10.044 ± 0.808,9.923 ± 2.548
Random Forest,5,0.5,10.018 ± 0.95,9.975 ± 0.926,10.026 ± 0.841,10.001 ± 0.848
Random Forest,5,0.9,10.522 ± 1.69,10.254 ± 1.193,10.28 ± 1.174,9.604 ± 3.946
Random Forest,5,confounding,13.031 ± 1.251,13.206 ± 1.205,10.085 ± 1.217,10.396 ± 1.76
XGboost,5,0.1,9.796 ± 1.434,9.763 ± 1.922,9.676 ± 0.938,10.058 ± 2.687
XGboost,5,0.5,10.212 ± 1.06,10.309 ± 1.13,10.222 ± 1.113,10.202 ± 1.128
