In [157]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import GradientBoostingClassifier
from sklift.metrics import uplift_at_k
from sklift.viz import plot_uplift_preds
from sklift.models import SoloModel, ClassTransformation, TwoModels

In [65]:
df = pd.read_csv('data.csv')
df.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0


In [68]:
df['conversion'].unique()

array([0, 1])

In [143]:
class FeatureTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X.rename(columns={'offer': 'treatment'}, inplace=True)
        
        X.loc[X['treatment'] == 'No Offer', 'treatment'] = 0
        X.loc[X['treatment'] != 0, 'treatment'] = 1
        
        X = pd.get_dummies(X, columns=['zip_code', 'channel'])
        
        return X

In [144]:
X = df.drop('conversion', axis=1)
y = df['conversion']
X.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One
1,6,329.08,1,1,Rural,1,Web,No Offer
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One


In [145]:
pipeline = Pipeline([
    ('features', FeatureTransformer())
])

In [146]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=21)

In [147]:
pipeline.fit(X_train, y_train)
X_train = pipeline.transform(X_train)
X_test = pipeline.transform(X_test)
X_train.head(3)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,recency,history,used_discount,used_bogo,is_referral,treatment,zip_code_Rural,zip_code_Surburban,zip_code_Urban,channel_Multichannel,channel_Phone,channel_Web
56949,6,167.21,1,0,0,1,0,1,0,0,0,1
16849,11,58.09,0,1,1,0,0,1,0,0,0,1
5808,1,323.76,1,0,0,1,0,1,0,0,0,1


In [148]:
treat_train = X_train['treatment']
treat_test = X_test['treatment']
X_train.drop('treatment', axis=1, inplace=True)
X_test.drop('treatment', axis=1, inplace=True)

In [149]:
# !pip3 install scikit-uplift==0.2.0

In [172]:
def measure_quality(model, name):
    uplift = model.predict(X_test)
    score_10 = uplift_at_k(y_true=y_test, uplift=uplift, treatment=treat_test, strategy='by_group', k=0.1)
    score_20 = uplift_at_k(y_true=y_test, uplift=uplift, treatment=treat_test, strategy='by_group', k=0.2)
    metrics = pd.DataFrame()
    metrics['Name']       = pd.Series([name])
    metrics['uplift@10%'] = pd.Series([score_10])
    metrics['uplift@20%'] = pd.Series([score_20])
    return metrics

In [161]:
sm = SoloModel(
    GradientBoostingClassifier(random_state=42)
).fit(X_train, y_train, treat_train)

In [162]:
ct = ClassTransformation(
    GradientBoostingClassifier(random_state=42)
).fit(X_train, y_train, treat_train)

  ct = ClassTransformation(


In [163]:
tm = TwoModels(
    estimator_trmnt=GradientBoostingClassifier(random_state=42), 
    estimator_ctrl=GradientBoostingClassifier(random_state=42), 
    method='vanilla'
).fit(X_train, y_train, treat_train)


In [173]:
pd.concat([
    measure_quality(sm, 'S-learner'),
    measure_quality(ct, 'Class transform'),
    measure_quality(tm, 'T-learner')
], ignore_index=True)

Unnamed: 0,Name,uplift@10%,uplift@20%
0,S-learner,0.077472,0.071771
1,Class transform,0.095105,0.081343
2,T-learner,0.096781,0.070602


In [None]:
sm_uplift = sm.predict(X_test)

In [191]:
X_test_uplift = X_test.copy()
X_test_uplift['uplift'] = sm_uplift
X_test_uplift['conversion'] = y_test
X_test_uplift = X_test_uplift.sort_values(by='uplift')
X_test_uplift.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,is_referral,zip_code_Rural,zip_code_Surburban,zip_code_Urban,channel_Multichannel,channel_Phone,channel_Web,uplift,conversion
35838,2,1672.21,1,0,1,1,0,0,0,0,1,-0.031479,0
8010,1,1589.14,1,0,1,1,0,0,0,0,1,-0.030869,0
37261,2,412.21,1,0,0,1,0,0,1,0,0,-0.028171,1


In [200]:
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
X_test_uplift['decile'] = pd.qcut(X_test_uplift['uplift'], q=10, labels=labels)
X_test_uplift.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,is_referral,zip_code_Rural,zip_code_Surburban,zip_code_Urban,channel_Multichannel,channel_Phone,channel_Web,uplift,conversion,decile
35838,2,1672.21,1,0,1,1,0,0,0,0,1,-0.031479,0,0
8010,1,1589.14,1,0,1,1,0,0,0,0,1,-0.030869,0,0
37261,2,412.21,1,0,0,1,0,0,1,0,0,-0.028171,1,0


In [203]:
X_test_uplift.groupby(['decile']).mean()['conversion']

decile
0    0.117427
1    0.109238
2    0.117721
3    0.134207
4    0.124142
5    0.150908
6    0.155376
7    0.150943
8    0.181989
9    0.235000
Name: conversion, dtype: float64