In [1]:
!pip install scikit-uplift
!pip install catboost



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklift.models import SoloModel
from catboost import CatBoostClassifier
from sklift.metrics import uplift_at_k
from sklift.models import ClassTransformation
from sklift.models import TwoModels

In [3]:
df = pd.read_csv('./data.csv')
df.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0


In [4]:
df.rename(columns={'conversion':'target', 'offer': 'treatment'}, inplace=True)

In [5]:
df.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,treatment,target
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0


In [6]:
df.treatment.unique()

array(['Buy One Get One', 'No Offer', 'Discount'], dtype=object)

In [7]:
df['treatment'] = df['treatment'].replace('Buy One Get One', 1, regex=True)
df['treatment'] = df['treatment'].replace('No Offer', 0, regex=True)
df['treatment'] = df['treatment'].replace('Discount', 1, regex=True)

In [8]:
df.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,treatment,target
0,10,142.44,1,0,Surburban,0,Phone,1,0
1,6,329.08,1,1,Rural,1,Web,0,0
2,7,180.65,0,1,Surburban,1,Web,1,0


In [9]:
models_results = {
    'approach': [],
    'uplift@10%': [],
    'uplift@20%': []
}

In [10]:
treatment = 'treatment'
target = 'target'
feats = ['recency', 'history', 'used_discount', 'used_bogo', 'zip_code','is_referral', 'channel']
indices_train, indices_test = train_test_split(df.index, test_size=0.3, random_state=42)

In [11]:
X_train = df.loc[indices_train, feats]
y_train = df.loc[indices_train, target]
treat_train = df.loc[indices_train, treatment]
X_test = df.loc[indices_test, feats]
y_test = df.loc[indices_test, target]
treat_test = df.loc[indices_test, treatment]

Модель S-learner

In [12]:
cat_features=['zip_code','channel']
sm = SoloModel(CatBoostClassifier(n_estimators=100, max_depth=3, cat_features=cat_features, random_state=0, silent=True))
sm = sm.fit(X_train, y_train, treat_train)
uplift_sm = sm.predict(X_test)
uplift_sm

array([0.04453953, 0.07533368, 0.07495534, ..., 0.0621168 , 0.03428698,
       0.06376261])

In [13]:
sm_score_10 = uplift_at_k(y_true=y_test, uplift=uplift_sm, treatment=treat_test, strategy='by_group', k=0.1)
sm_score_10

0.09718186852460378

In [14]:
sm_score_20 = uplift_at_k(y_true=y_test, uplift=uplift_sm, treatment=treat_test, strategy='by_group', k=0.2)
sm_score_20

0.08233792113690588

In [15]:
models_results['approach'].append('SoloModel')
models_results['uplift@10%'].append(sm_score_10)
models_results['uplift@20%'].append(sm_score_20)
models_results

{'approach': ['SoloModel'],
 'uplift@10%': [0.09718186852460378],
 'uplift@20%': [0.08233792113690588]}

Трансформация таргета

In [16]:
ct = ClassTransformation(
    CatBoostClassifier(n_estimators=100, max_depth=3, cat_features=cat_features, random_state=0, silent=True))
ct = ct.fit(X_train, y_train, treat_train)

uplift_ct = ct.predict(X_test)
uplift_ct

array([-0.30608194, -0.10610503, -0.13777319, ..., -0.21110331,
       -0.19906819, -0.17463761])

In [17]:
ct_score_10 = uplift_at_k(y_true=y_test, uplift=uplift_ct, treatment=treat_test, strategy='by_group', k=0.1)
ct_score_20 = uplift_at_k(y_true=y_test, uplift=uplift_ct, treatment=treat_test, strategy='by_group', k=0.2)

models_results['approach'].append('ClassTransformation')
models_results['uplift@10%'].append(ct_score_10)
models_results['uplift@20%'].append(ct_score_20)
models_results

{'approach': ['SoloModel', 'ClassTransformation'],
 'uplift@10%': [0.09718186852460378, 0.12378115569737289],
 'uplift@20%': [0.08233792113690588, 0.10660381670318017]}

Две независимые модели

In [18]:
tm = TwoModels(
    CatBoostClassifier(n_estimators=100, max_depth=3, cat_features=cat_features, random_state=0, silent=True),
    CatBoostClassifier(n_estimators=100, max_depth=3, cat_features=cat_features, random_state=0, silent=True),
    method='vanilla' )
tm = tm.fit(
    X_train, y_train, treat_train)

uplift_tm = tm.predict(X_test)
uplift_tm

array([0.03731897, 0.0697111 , 0.06794176, ..., 0.05576166, 0.05130654,
       0.0806872 ])

In [19]:
tm_score_10 = uplift_at_k(y_true=y_test, uplift=uplift_tm, treatment=treat_test, strategy='by_group', k=0.1)
tm_score_20 = uplift_at_k(y_true=y_test, uplift=uplift_tm, treatment=treat_test, strategy='by_group', k=0.2)
models_results['approach'].append('TwoModels')
models_results['uplift@10%'].append(tm_score_10)
models_results['uplift@20%'].append(tm_score_20)
models_results

{'approach': ['SoloModel', 'ClassTransformation', 'TwoModels'],
 'uplift@10%': [0.09718186852460378, 0.12378115569737289, 0.09237235830668161],
 'uplift@20%': [0.08233792113690588, 0.10660381670318017, 0.08301375122261867]}

In [20]:
pd.DataFrame(data=models_results).sort_values('uplift@10%', ascending=False)

Unnamed: 0,approach,uplift@10%,uplift@20%
1,ClassTransformation,0.123781,0.106604
0,SoloModel,0.097182,0.082338
2,TwoModels,0.092372,0.083014
