In [57]:
from pprint import pprint

import pandas as pd
import matplotlib.pyplot as plt
from sklift.metrics import uplift_at_k
from sklift.viz import plot_uplift_curve
from sklift.viz import plot_qini_curve
import numpy as np
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

from causalml.inference.meta import BaseXRegressor, BaseTClassifier, BaseSClassifier, BaseRClassifier
from causalml.dataset import *
from causalml.metrics import *

from classifierNN import *
from lightgbm import LGBMClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [11]:
import torch
import random
seed = 42

# MetaLearners based on neural network

***Classifier_NN*** is class that trains neural network that consists of fully connected layers (k-1 Linear layes with BatchNorm and LeakyRElu(0.05) and last Lenear layer with Sigmoid). Number of layers (k) is defined by length of list ***hid_size***. Also, we can define number of ***epoch*** and learning rate (***lr***). 

In [105]:
def MetaLearners_NN(X_train,treat_train,y_train,X_val,treat_val,y_val,hid_size,epoch = 5, lr=1e-3):
    torch.manual_seed(seed)
    
    learner_t = BaseTClassifier(learner=Classifier_NN(X_train.shape[1],hid_size,epoch,lr))
    learner_t.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_t = np.squeeze(learner_t.predict(X_val))
    score_t = uplift_at_k(y_true=y_val, uplift=cate_t, treatment=treat_val, strategy='by_group', k=0.3)
    
    learner_s = BaseSClassifier(learner=Classifier_NN(X_train.shape[1]+1,hid_size,epoch,lr))
    learner_s.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_s = np.squeeze(learner_s.predict(X_val))
    score_s = uplift_at_k(y_true=y_val, uplift=cate_s, treatment=treat_val, strategy='by_group', k=0.3)
    
    learner_x = BaseXRegressor(Classifier_NN(X_train.shape[1],hid_size,epoch,lr),Classifier_NN(X_train.shape[1],hid_size,epoch,lr))
    learner_x.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_x = np.squeeze(learner_x.predict(X_val))
    score_x = uplift_at_k(y_true=y_val, uplift=cate_x, treatment=treat_val, strategy='by_group', k=0.3)
    
    return score_t, score_s, score_x

# MetaLearners based on LGBMClassifier
Light GBM Classifier (***LGBMClassifier***) is a fast, distributed, high-performance gradient boosting framework based on decision tree algorithm.

In [4]:
def MetaLearners(X_train,treat_train,y_train,X_val,treat_val,y_val):
    learner_t = BaseTClassifier(learner=LGBMClassifier())
    learner_t.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_t = np.squeeze(learner_t.predict(X_val))
    score_t = uplift_at_k(y_true=y_val, uplift=cate_t, treatment=treat_val, strategy='by_group', k=0.3)
    
    learner_s = BaseSClassifier(learner=LGBMClassifier())
    learner_s.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_s = np.squeeze(learner_s.predict(X_val))
    score_s = uplift_at_k(y_true=y_val, uplift=cate_s, treatment=treat_val, strategy='by_group', k=0.3)
    
    learner_x = BaseXRegressor(LGBMClassifier(),LGBMClassifier())
    learner_x.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_x = np.squeeze(learner_x.predict(X_val))
    score_x = uplift_at_k(y_true=y_val, uplift=cate_x, treatment=treat_val, strategy='by_group', k=0.3)
 
    return score_t, score_s, score_x

In [5]:
met = np.array(['T','S','X'])[:,None]

# X5 Retail Hero dataset

In [120]:
df_clients = pd.read_csv('clients.csv', index_col='client_id')
df_train = pd.read_csv('uplift_train.csv', index_col='client_id')
df_test = pd.read_csv('uplift_test.csv', index_col='client_id')

df_features = df_clients.copy()
df_features['first_issue_time'] = \
    (pd.to_datetime(df_features['first_issue_date'])
     - pd.to_datetime(df_features['first_issue_date']).min()) / pd.Timedelta('365d')

df_features['first_redeem_time'] = \
    (pd.to_datetime(df_features['first_redeem_date'])
     - pd.to_datetime(df_features['first_redeem_date']).min()) / pd.Timedelta('365d')

df_features['issue_redeem_delay'] = df_features['first_redeem_time'] \
    - df_features['first_issue_time']

df_features = df_features.join(pd.get_dummies(df_features['gender']))
df_features['first_redeem_time'] = df_features['first_redeem_time'].fillna(df_features['first_redeem_time'].mean())
df_features['issue_redeem_delay'] = df_features['issue_redeem_delay'].fillna(df_features['issue_redeem_delay'].mean())

df_features = df_features.drop(['first_issue_date', 'first_redeem_date', 'gender'], axis=1)

indices_train = df_train.index
indices_test = df_test.index
indices_learn, indices_valid = train_test_split(df_train.index, test_size=0.3, random_state=123)

X_train = df_features.loc[indices_learn, :]
y_train = df_train.loc[indices_learn, 'target']
treat_train = df_train.loc[indices_learn, 'treatment_flg']

X_val = df_features.loc[indices_valid, :]
y_val = df_train.loc[indices_valid, 'target']
treat_val =  df_train.loc[indices_valid, 'treatment_flg']

X_train_full = df_features.loc[indices_train, :]
y_train_full = df_train.loc[:, 'target']
treat_train_full = df_train.loc[:, 'treatment_flg']

X_test = df_features.loc[indices_test, :]

In [121]:
res_x5 = np.array(['X5 Retail Hero']*3)[:,None]
res = np.array(MetaLearners(X_train.values,treat_train,y_train,X_val.values,treat_val,y_val))[:,None]

In [123]:
torch.manual_seed(seed)
res_nn = []
sum_res_nn = []
for epoch in [4,5,6]:
    res_nn.append(MetaLearners_NN(X_train.values,treat_train,y_train,X_val.values,treat_val,y_val,[5,3],epoch))
    sum_res_nn.append(sum(res_nn[-1]))
    print('Epoch:', epoch, ', result', res_nn[-1])
    
res_NN = np.array(res_nn[np.array(sum_res_nn).argmax()])[:,None]
res_x5 = np.concatenate((res_x5,met,np.round(res,3),np.round(res_NN,3)),axis=1)

Epoch: 4 , result (0.028327895102571055, 0.03497839813891668, 0.038607032438039424)
Epoch: 5 , result (0.05034391774172431, 0.03815699366546821, 0.03838610431332268)
Epoch: 6 , result (0.03204590773135141, 0.03957068189371282, 0.0381620290643222)


In [124]:
pd.DataFrame(res_x5, columns=['Dataset','Learner','LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,X5 Retail Hero,T,0.053,0.05
1,X5 Retail Hero,S,0.04,0.038
2,X5 Retail Hero,X,0.038,0.038


# Hillstrom dataset

In [115]:
df = pd.read_csv('Hillstrom.csv')
df.drop(['history_segment', "conversion", "spend"], axis=1, inplace=True)

cat_cols = ['zip_code', 'channel']
df_ohe = pd.get_dummies(df, columns=cat_cols)
df_ohe.segment = df_ohe.segment.map({'Womens E-Mail': 1, 'Mens E-Mail': 1, 'No E-Mail': 0})

X = df_ohe.drop('visit', axis=1)
y = df_ohe['visit'].astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

treat_train = X_train['segment']
treat_test = X_test['segment']

X_train.drop(['segment'], axis=1, inplace=True)
X_test.drop(['segment'], axis=1, inplace=True)

In [118]:
torch.manual_seed(seed)
res_hill = np.array(['Hillstorm']*3)[:,None]
res = np.array(MetaLearners(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test))[:,None]

res_nn = []
sum_res_nn = []
for epoch in [4,5,6]:
    res_nn.append(MetaLearners_NN(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test,[8,4],epoch))
    sum_res_nn.append(sum(res_nn[-1]))
    print('Epoch:', epoch, ', result', res_nn[-1])
    
res_NN = np.array(res_nn[np.array(sum_res_nn).argmax()])[:,None]
res_hill = np.concatenate((res_hill,met,np.round(res,3),np.round(res_NN,3)),axis=1)

Epoch: 4 , result (0.06931773931007518, 0.06227740714827032, 0.0728053796358784)
Epoch: 5 , result (0.0743347102005954, 0.07435218504302307, 0.0728053796358784)
Epoch: 6 , result (0.059285601383736924, 0.06892551365326349, 0.0728053796358784)


In [119]:
pd.DataFrame(res_hill, columns=['Dataset','Learner','LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,Hillstorm,T,0.061,0.074
1,Hillstorm,S,0.067,0.074
2,Hillstorm,X,0.073,0.073


# Kuusito dataset

In [109]:
df = pd.read_csv('Kuusito.csv')
df.drop(['customer_type'], axis=1, inplace=True)

df = df.replace(r'Value', '', regex=True)
df['target_control'] = df['target_control'].map({'control': 1, 'target': 0})
df['outcome'] = df['outcome'].map({'negative': 0, 'positive': 1})

df = pd.get_dummies(df,drop_first=True)

X = df.drop('outcome', axis=1).astype('int64')
y = df['outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

treat_train = X_train['target_control']
treat_test = X_test['target_control']

X_train.drop(['target_control'], axis=1, inplace=True)
X_test.drop(['target_control'], axis=1, inplace=True)
X_train.drop(['customer_id'], axis=1, inplace=True)
X_test.drop(['customer_id'], axis=1, inplace=True)

In [112]:
res_kuusito = np.array(['Kuusito']*3)[:,None]
res = np.array(MetaLearners(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test))[:,None]

res_nn = []
sum_res_nn = []
for epoch in [19,20,21,22]:
    res_nn.append(MetaLearners_NN(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test,[40,30,20,10],epoch,5e-3))
    sum_res_nn.append(sum(res_nn[-1]))
    print('Epoch:', epoch, ', result', res_nn[-1])
    
res_NN = np.array(res_nn[np.array(sum_res_nn).argmax()])[:,None]
res_kuusito = np.concatenate((res_kuusito,met,np.round(res,3),np.round(res_NN,3)),axis=1)

Epoch: 19 , result (0.15655214723926386, 0.18486707566462168, 0.07351738241308797)
Epoch: 20 , result (0.17886707566462168, 0.1864171779141104, 0.09801226993865025)
Epoch: 21 , result (0.12223721881390598, 0.1643721881390593, 0.07183231083844582)
Epoch: 22 , result (0.12214723926380372, 0.1441022494887525, 0.0656523517382413)


In [113]:
pd.DataFrame(res_kuusito, columns=['Dataset','Learner','LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,Kuusito,T,0.279,0.179
1,Kuusito,S,0.31,0.186
2,Kuusito,X,0.239,0.098


# Synthetic

In [89]:
y, X, treatment, tau, b, e = synthetic_data(mode=2, n=10000, p=8, sigma=1.0)
y = (y > np.median(y)).astype(int)
X_train, X_test, y_train, y_test, treat_train, treat_test= train_test_split(X, y, treatment, test_size=0.33, random_state=0)

In [106]:
res_syn = np.array(['Synthetic']*3)[:,None]
res = np.array(MetaLearners(X_train,treat_train,y_train,X_test,treat_test,y_test))[:,None]
res_nn = []
sum_res_nn = []
for epoch in [10,11,12,13,14]:
    res_nn.append(MetaLearners_NN(X_train,treat_train,y_train,X_test,treat_test,y_test,[6,4,2],epoch))
    sum_res_nn.append(sum(res_nn[-1]))
    print('Epoch:', epoch, ', result', res_nn[-1])
    
res_NN = np.array(res_nn[np.array(sum_res_nn).argmax()])[:,None]
res_syn = np.concatenate((res_syn,met,np.round(res,3),np.round(res_NN,3)),axis=1)

Epoch: 10 , result (0.4319376227897839, 0.45512033398821217, 0.18695153896529138)
Epoch: 11 , result (0.4253315324165029, 0.42739030779305826, 0.23219138834315656)
Epoch: 12 , result (0.43110674525212833, 0.4884127373935822, 0.3528405370006549)
Epoch: 13 , result (0.42301080550098236, 0.40693762278978396, 0.2571913883431566)
Epoch: 14 , result (0.41896283562540926, 0.4401154223968566, 0.18695153896529138)


In [107]:
pd.DataFrame(res_syn, columns=['Dataset','Learner','LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,Synthetic,T,0.42,0.431
1,Synthetic,S,0.458,0.488
2,Synthetic,X,0.361,0.353


# ALL

In [125]:
res_all = np.concatenate((res_x5,res_hill,res_kuusito,res_syn),axis=0)
pd.DataFrame(res_all, columns=['Dataset','Learner','LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,X5 Retail Hero,T,0.053,0.05
1,X5 Retail Hero,S,0.04,0.038
2,X5 Retail Hero,X,0.038,0.038
3,Hillstorm,T,0.061,0.074
4,Hillstorm,S,0.067,0.074
5,Hillstorm,X,0.073,0.073
6,Kuusito,T,0.279,0.179
7,Kuusito,S,0.31,0.186
8,Kuusito,X,0.239,0.098
9,Synthetic,T,0.42,0.431
