In [1]:
from pprint import pprint

import pandas as pd
import matplotlib.pyplot as plt
from sklift.metrics import uplift_at_k
from sklift.viz import plot_uplift_curve
from sklift.viz import plot_qini_curve
import numpy as np
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

from causalml.inference.meta import BaseXRegressor, BaseTClassifier, BaseSClassifier, BaseRClassifier
from causalml.dataset import *
from causalml.metrics import *

from classifierNN import *
from lightgbm import LGBMClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [2]:
import torch
import random
seed = 42

# MetaLearners based on neural network

***Classifier_NN*** is class that trains neural network that consists of fully connected layers (k Linear layes with BatchNorm and LeakyRElu(0.05) and output Linear layer with Sigmoid). Number of layers (k) is defined by length of list ***hid_size***. Also, we can define number of ***epoch*** and learning rate (***lr***). 

In [3]:
def MetaLearners_NN(X_train,treat_train,y_train,X_val,treat_val,y_val,hid_size,epoch = 5, lr=1e-3):
    torch.manual_seed(seed)
    
    learner_t = BaseTClassifier(learner=Classifier_NN(X_train.shape[1],hid_size,epoch,lr))
    learner_t.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_t = np.squeeze(learner_t.predict(X_val))
    score_t = uplift_at_k(y_true=y_val, uplift=cate_t, treatment=treat_val, strategy='by_group', k=0.3)
    
    learner_s = BaseSClassifier(learner=Classifier_NN(X_train.shape[1]+1,hid_size,epoch,lr))
    learner_s.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_s = np.squeeze(learner_s.predict(X_val))
    score_s = uplift_at_k(y_true=y_val, uplift=cate_s, treatment=treat_val, strategy='by_group', k=0.3)
    
    learner_x = BaseXRegressor(Classifier_NN(X_train.shape[1],hid_size,epoch,lr),Classifier_NN(X_train.shape[1],hid_size,epoch,lr))
    learner_x.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_x = np.squeeze(learner_x.predict(X_val))
    score_x = uplift_at_k(y_true=y_val, uplift=cate_x, treatment=treat_val, strategy='by_group', k=0.3)
    
    return score_t, score_s, score_x

# MetaLearners based on LGBMClassifier
Light GBM Classifier (***LGBMClassifier***) is a fast, distributed, high-performance gradient boosting framework based on decision tree algorithm.

In [4]:
def MetaLearners(X_train,treat_train,y_train,X_val,treat_val,y_val):
    learner_t = BaseTClassifier(learner=LGBMClassifier())
    learner_t.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_t = np.squeeze(learner_t.predict(X_val))
    score_t = uplift_at_k(y_true=y_val, uplift=cate_t, treatment=treat_val, strategy='by_group', k=0.3)
    
    learner_s = BaseSClassifier(learner=LGBMClassifier())
    learner_s.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_s = np.squeeze(learner_s.predict(X_val))
    score_s = uplift_at_k(y_true=y_val, uplift=cate_s, treatment=treat_val, strategy='by_group', k=0.3)
    
    learner_x = BaseXRegressor(LGBMClassifier(),LGBMClassifier())
    learner_x.fit(X=X_train, treatment=treat_train, y=y_train)
    cate_x = np.squeeze(learner_x.predict(X_val))
    score_x = uplift_at_k(y_true=y_val, uplift=cate_x, treatment=treat_val, strategy='by_group', k=0.3)
 
    return score_t, score_s, score_x

In [5]:
met = np.array(['T','S','X'])[:,None]

# X5 Retail Hero dataset

In [6]:
df_clients = pd.read_csv('data/clients.csv', index_col='client_id')
df_train = pd.read_csv('data/uplift_train.csv', index_col='client_id')
df_test = pd.read_csv('data/uplift_test.csv', index_col='client_id')

df_features = df_clients.copy()
df_features['first_issue_time'] = \
    (pd.to_datetime(df_features['first_issue_date'])
     - pd.to_datetime(df_features['first_issue_date']).min()) / pd.Timedelta('365d')

df_features['first_redeem_time'] = \
    (pd.to_datetime(df_features['first_redeem_date'])
     - pd.to_datetime(df_features['first_redeem_date']).min()) / pd.Timedelta('365d')

df_features['issue_redeem_delay'] = df_features['first_redeem_time'] \
    - df_features['first_issue_time']

df_features = df_features.join(pd.get_dummies(df_features['gender']))
df_features['first_redeem_time'] = df_features['first_redeem_time'].fillna(df_features['first_redeem_time'].mean())
df_features['issue_redeem_delay'] = df_features['issue_redeem_delay'].fillna(df_features['issue_redeem_delay'].mean())

df_features = df_features.drop(['first_issue_date', 'first_redeem_date', 'gender'], axis=1)

indices_train = df_train.index
indices_test = df_test.index
indices_learn, indices_valid = train_test_split(df_train.index, test_size=0.3, random_state=123)

X_train = df_features.loc[indices_learn, :]
y_train = df_train.loc[indices_learn, 'target']
treat_train = df_train.loc[indices_learn, 'treatment_flg']

X_val = df_features.loc[indices_valid, :]
y_val = df_train.loc[indices_valid, 'target']
treat_val =  df_train.loc[indices_valid, 'treatment_flg']

X_train_full = df_features.loc[indices_train, :]
y_train_full = df_train.loc[:, 'target']
treat_train_full = df_train.loc[:, 'treatment_flg']

X_test = df_features.loc[indices_test, :]

In [7]:
res_x5 = np.array(['X5 Retail Hero']*3)[:,None]
res = np.array(MetaLearners(X_train.values,treat_train,y_train,X_val.values,treat_val,y_val))[:,None]

In [None]:
# мб надо подавать каждому металернеру сетку со своими параметрами, а не с одинаковыми

In [11]:
torch.manual_seed(seed)
res_nn = []
sum_res_nn = []
for epoch in [4,5,6]:
    for lr in [5e-3,1e-3,5e-4]:
        res_nn.append(MetaLearners_NN(X_train.values,treat_train,y_train,X_val.values,treat_val,y_val,[5,3],epoch,lr))
        sum_res_nn.append(sum(res_nn[-1]))
        print('Epoch:', epoch,', lr:', lr, ', result', res_nn[-1])
    
res_NN = np.array(res_nn[np.array(sum_res_nn).argmax()])[:,None]
res_x5 = np.concatenate((res_x5,met,np.round(res,3),np.round(res_NN,3)),axis=1)

Epoch: 4 , lr: 0.005 , result (0.0376893309969083, 0.04399490921175875, 0.038384845463609096)
Epoch: 4 , lr: 0.001 , result (0.028327895102571055, 0.03497839813891668, 0.038607032438039424)
Epoch: 4 , lr: 0.0005 , result (0.03698815170649661, 0.03832505010221865, 0.03794110093960534)
Epoch: 5 , lr: 0.005 , result (0.03609436840992175, 0.027263537669818816, 0.03838610431332268)
Epoch: 5 , lr: 0.001 , result (0.05034391774172431, 0.03815699366546821, 0.03838610431332268)
Epoch: 5 , lr: 0.0005 , result (0.03836785099247708, 0.03889467959757098, 0.038384845463609096)
Epoch: 6 , lr: 0.005 , result (0.04576674018348992, 0.03806069166238657, 0.03838610431332268)
Epoch: 6 , lr: 0.001 , result (0.03204590773135141, 0.03957068189371282, 0.0381620290643222)
Epoch: 6 , lr: 0.0005 , result (0.04581017049860514, 0.034770058511334634, 0.038384845463609096)


In [12]:
pd.DataFrame(res_x5, columns=['Dataset', 'Learner', 'LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,X5 Retail Hero,T,0.053,0.05
1,X5 Retail Hero,S,0.04,0.038
2,X5 Retail Hero,X,0.038,0.038


In [13]:
t_x5 = %timeit -o -q -r 1 MetaLearners(X_train.values,treat_train,y_train,X_val.values,treat_val,y_val)
t_nn_x5 = %timeit -o -q -r 1 MetaLearners_NN(X_train.values,treat_train,y_train,X_val.values,treat_val,y_val,[5,3],5)
print('Time with LGBMClassifier:', t_x5)
print('Time with MLP:', t_nn_x5)

Time with LGBMClassifier: 4min 37s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time with MLP: 7min 32s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Hillstrom dataset (MineThatData)

In [14]:
df = pd.read_csv('data/Hillstrom.csv')
df.drop(['history_segment', "conversion", "spend"], axis=1, inplace=True)

cat_cols = ['zip_code', 'channel']
df_ohe = pd.get_dummies(df, columns=cat_cols)
df_ohe.segment = df_ohe.segment.map({'Womens E-Mail': 1, 'Mens E-Mail': 1, 'No E-Mail': 0})

X = df_ohe.drop('visit', axis=1)
y = df_ohe['visit'].astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

treat_train = X_train['segment']
treat_test = X_test['segment']

X_train.drop(['segment'], axis=1, inplace=True)
X_test.drop(['segment'], axis=1, inplace=True)

In [15]:
torch.manual_seed(seed)
res_hill = np.array(['Hillstorm']*3)[:,None]
res = np.array(MetaLearners(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test))[:,None]

res_nn = []
sum_res_nn = []
for epoch in [4,5,6]:
    for lr in [1e-3,5e-4]:
        for s1 in [9,8]:
            for s2 in [6,5,4]:
                res_nn.append(MetaLearners_NN(X_train.values,treat_train,y_train,
                                              X_test.values,treat_test,y_test,[s1,s2],epoch,lr))
                sum_res_nn.append(sum(res_nn[-1]))
                print('Epoch:', epoch, ', Lr:', lr, ', size:', [s1,s2], ', result', res_nn[-1])
    
res_NN = np.array(res_nn[np.array(sum_res_nn).argmax()])[:,None]
res_hill = np.concatenate((res_hill,met,np.round(res,3),np.round(res_NN,3)),axis=1)

Epoch: 4 , Lr: 0.001 , size: [9, 6] , result (0.057172723822855154, 0.04992539933397175, 0.0728053796358784)
Epoch: 4 , Lr: 0.001 , size: [9, 5] , result (0.05603133476003208, 0.05468813945239931, 0.0728053796358784)
Epoch: 4 , Lr: 0.001 , size: [9, 4] , result (0.061697806084266615, 0.06715638315407152, 0.0728053796358784)
Epoch: 4 , Lr: 0.001 , size: [8, 6] , result (0.06604780169864488, 0.060152240827229725, 0.0728053796358784)
Epoch: 4 , Lr: 0.001 , size: [8, 5] , result (0.06538296849996178, 0.07024108743576868, 0.0728053796358784)
Epoch: 4 , Lr: 0.001 , size: [8, 4] , result (0.06931773931007518, 0.06227740714827032, 0.0728053796358784)
Epoch: 4 , Lr: 0.0005 , size: [9, 6] , result (0.06700305550438368, 0.05082913053977872, 0.0728053796358784)
Epoch: 4 , Lr: 0.0005 , size: [9, 5] , result (0.05692954166081354, 0.055485781453532584, 0.0728053796358784)
Epoch: 4 , Lr: 0.0005 , size: [9, 4] , result (0.06456356750148312, 0.06627565109571773, 0.0728053796358784)
Epoch: 4 , Lr: 0.0005

In [16]:
pd.DataFrame(res_hill, columns=['Dataset', 'Learner', 'LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,Hillstorm,T,0.061,0.074
1,Hillstorm,S,0.067,0.074
2,Hillstorm,X,0.073,0.073


In [17]:
t_hill = %timeit -o -q -r 1 MetaLearners(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test)
t_nn_hill = %timeit -o -q -r 1 MetaLearners_NN(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test,[8,4],5)
print('Time with LGBMClassifier:', t_hill)
print('Time with MLP:', t_nn_hill)

Time with LGBMClassifier: 3min 35s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time with MLP: 3min 23s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Kuusito dataset

In [53]:
df = pd.read_csv('data/Kuusito.csv')
df.drop(['customer_type'], axis=1, inplace=True)

df = df.replace(r'Value', '', regex=True)
df['target_control'] = df['target_control'].map({'control': 1, 'target': 0})
df['outcome'] = df['outcome'].map({'negative': 0, 'positive': 1})

df = pd.get_dummies(df,drop_first=True)

X = df.drop('outcome', axis=1).astype('int64')
y = df['outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

treat_train = X_train['target_control']
treat_test = X_test['target_control']

X_train.drop(['target_control'], axis=1, inplace=True)
X_test.drop(['target_control'], axis=1, inplace=True)
X_train.drop(['customer_id'], axis=1, inplace=True)
X_test.drop(['customer_id'], axis=1, inplace=True)

In [55]:
res_kuusito = np.array(['Kuusito']*3)[:,None]
res = np.array(MetaLearners(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test))[:,None]

res_nn = []
sum_res_nn = []
for epoch in [18,20,22,24,26]:
    for lr in [5e-2,1e-2]:
        for s1 in [45,40,35]:
            for s2 in [30,25,20]:
                for s3 in [15,10,5]:
                    res_nn.append(MetaLearners_NN(X_train.values,treat_train,y_train,X_test.values,
                                                  treat_test,y_test,[s1,s2,s3],epoch,lr))
                    sum_res_nn.append(sum(res_nn[-1]))
                    print('Epoch:', epoch, ', lr:', lr, ', size', [s1,s2,s3],', result', res_nn[-1])
    
res_NN = np.array(res_nn[np.array(sum_res_nn).argmax()])[:,None]
res_kuusito = np.concatenate((res_kuusito,met,np.round(res,3),np.round(res_NN,3)),axis=1)

Epoch: 18 , lr: 0.05 , size [45, 30, 15] , result (0.1926421267893661, 0.22486707566462172, 0.04142740286298574)
Epoch: 18 , lr: 0.05 , size [45, 30, 10] , result (0.1968670756646217, 0.15028220858895708, 0.04142740286298574)
Epoch: 18 , lr: 0.05 , size [45, 30, 5] , result (0.1736973415132924, 0.17187730061349693, 0.08383231083844583)
Epoch: 18 , lr: 0.05 , size [45, 25, 15] , result (0.1926421267893661, 0.14378732106339465, 0.11823721881390598)
Epoch: 18 , lr: 0.05 , size [45, 25, 10] , result (0.18273210633946824, 0.18223721881390598, 0.11405725971370145)
Epoch: 18 , lr: 0.05 , size [45, 25, 5] , result (0.24446216768916157, 0.1459222903885481, 0.0477423312883436)
Epoch: 18 , lr: 0.05 , size [45, 20, 15] , result (0.2184621676891616, 0.17046216768916156, 0.12414723926380372)
Epoch: 18 , lr: 0.05 , size [45, 20, 10] , result (0.2189120654396728, 0.13691206543967277, 0.03360736196319014)
Epoch: 18 , lr: 0.05 , size [45, 20, 5] , result (0.2175419222903885, 0.19228220858895706, 0.06974

Epoch: 20 , lr: 0.05 , size [35, 30, 10] , result (0.24109202453987733, 0.17623721881390597, 0.09187730061349692)
Epoch: 20 , lr: 0.05 , size [35, 30, 5] , result (0.21459713701431493, 0.19814723926380373, 0.06183231083844587)
Epoch: 20 , lr: 0.05 , size [35, 25, 15] , result (0.22482208588957053, 0.18686707566462168, 0.12032719836400813)
Epoch: 20 , lr: 0.05 , size [35, 25, 10] , result (0.2006421267893661, 0.2353169734151329, 0.10778732106339467)
Epoch: 20 , lr: 0.05 , size [35, 25, 5] , result (0.20059713701431492, 0.13151738241308797, 0.15396728016359917)
Epoch: 20 , lr: 0.05 , size [35, 20, 15] , result (0.18210224948875253, 0.17432719836400812, 0.04538241308793456)
Epoch: 20 , lr: 0.05 , size [35, 20, 10] , result (0.1494723926380368, 0.08778732106339465, 0.06747239263803678)
Epoch: 20 , lr: 0.05 , size [35, 20, 5] , result (0.17601226993865027, 0.17405725971370145, 0.07378732106339464)
Epoch: 20 , lr: 0.01 , size [45, 30, 15] , result (0.15223721881390595, 0.22522699386503064, 0

Epoch: 22 , lr: 0.01 , size [40, 30, 5] , result (0.17646216768916156, 0.19859713701431492, 0.16259713701431494)
Epoch: 22 , lr: 0.01 , size [40, 25, 15] , result (0.15677709611451945, 0.17473210633946823, 0.12214723926380372)
Epoch: 22 , lr: 0.01 , size [40, 25, 10] , result (0.0964621676891616, 0.1845971370143149, 0.11228220858895704)
Epoch: 22 , lr: 0.01 , size [40, 25, 5] , result (0.18300204498977507, 0.1886421267893661, 0.09183231083844584)
Epoch: 22 , lr: 0.01 , size [40, 20, 15] , result (0.1666871165644172, 0.1339672801635992, 0.1689120654396728)
Epoch: 22 , lr: 0.01 , size [40, 20, 10] , result (0.1548670756646217, 0.15237218813905934, 0.1257423312883436)
Epoch: 22 , lr: 0.01 , size [40, 20, 5] , result (0.19877709611451944, 0.19904703476482621, 0.14637218813905933)
Epoch: 22 , lr: 0.01 , size [35, 30, 15] , result (0.1643721881390593, 0.16850715746421263, 0.13210224948875254)
Epoch: 22 , lr: 0.01 , size [35, 30, 10] , result (0.1521472392638037, 0.1521022494887525, 0.0574274

Epoch: 26 , lr: 0.05 , size [45, 25, 15] , result (0.22922699386503065, 0.2226871165644172, 0.04756237218813908)
Epoch: 26 , lr: 0.05 , size [45, 25, 10] , result (0.1763721881390593, 0.16255214723926387, 0.07765235173824131)
Epoch: 26 , lr: 0.05 , size [45, 25, 5] , result (0.22895705521472398, 0.15756237218813907, 0.06778732106339463)
Epoch: 26 , lr: 0.05 , size [45, 20, 15] , result (0.16778732106339467, 0.15655214723926386, 0.13237218813905932)
Epoch: 26 , lr: 0.05 , size [45, 20, 10] , result (0.17655214723926382, 0.19037218813905932, 0.0636523517382413)
Epoch: 26 , lr: 0.05 , size [45, 20, 5] , result (0.21273210633946826, 0.17628220858895705, 0.07974233128834357)
Epoch: 26 , lr: 0.05 , size [40, 30, 15] , result (0.14601226993865024, 0.1341922290388548, 0.13010224948875254)
Epoch: 26 , lr: 0.05 , size [40, 30, 10] , result (0.15841717791411036, 0.20077709611451944, 0.06356237218813904)
Epoch: 26 , lr: 0.05 , size [40, 30, 5] , result (0.1986421267893661, 0.20450715746421266, 0.0

In [56]:
pd.DataFrame(res_kuusito, columns=['Dataset', 'Learner', 'LGBMClassifier', 'MLP'])


Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,Kuusito,T,0.279,0.247
1,Kuusito,S,0.31,0.208
2,Kuusito,X,0.239,0.132


In [59]:
t_kuusito = %timeit -o -q -r 1 MetaLearners(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test)
t_nn_kuusito = %timeit -o -q -r 1 MetaLearners_NN(X_train.values,treat_train,y_train,X_test.values,treat_test,y_test,[35, 25, 5],18,0.05)
print('Time with LGBMClassifier:', t_kuusito)
print('Time with MLP:', t_nn_kuusito)

Time with LGBMClassifier: 42.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time with MLP: 1min 34s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Kuusito dataset 
### one more

# Synthetic

In [22]:
y, X, treatment, tau, b, e = synthetic_data(mode=2, n=10000, p=8, sigma=1.0)
y = (y > np.median(y)).astype(int)
X_train, X_test, y_train, y_test, treat_train, treat_test= train_test_split(X, y, treatment, test_size=0.33, random_state=0)

In [23]:
res_syn = np.array(['Synthetic']*3)[:,None]
res = np.array(MetaLearners(X_train,treat_train,y_train,X_test,treat_test,y_test))[:,None]
res_nn = []
sum_res_nn = []
for epoch in [11,12,13]:
    for lr in [5e-3,1e-3]:
        for s1 in [7,6]:
            for s2 in [5,4]:
                for s3 in [3,2]:
                    res_nn.append(MetaLearners_NN(X_train,treat_train,y_train,X_test,
                                                  treat_test,y_test,[s1,s2,s3],epoch,lr))
                    sum_res_nn.append(sum(res_nn[-1]))
                    print('Epoch:', epoch,', Lr:', lr, ', size:', [s1,s2,s3], ', result', res_nn[-1])

res_NN = np.array(res_nn[np.array(sum_res_nn).argmax()])[:,None]
res_syn = np.concatenate((res_syn,met,np.round(res,3),np.round(res_NN,3)),axis=1)

Epoch: 11 , Lr: 0.005 , size: [7, 5, 3] , result (0.4748444662737394, 0.4627005566470203, 0.35119106090373287)
Epoch: 11 , Lr: 0.005 , size: [7, 5, 2] , result (0.47811476751800913, 0.4512156188605108, 0.2547519646365422)
Epoch: 11 , Lr: 0.005 , size: [7, 4, 3] , result (0.4575843156516045, 0.47865504256712504, 0.4686476751800917)
Epoch: 11 , Lr: 0.005 , size: [7, 4, 2] , result (0.4787614603798297, 0.49091764898493784, 0.18695153896529138)
Epoch: 11 , Lr: 0.005 , size: [6, 5, 3] , result (0.43585461689587424, 0.47645301244269805, 0.41839800261951543)
Epoch: 11 , Lr: 0.005 , size: [6, 5, 2] , result (0.4459684020956123, 0.4696627373935822, 0.2686763261296661)
Epoch: 11 , Lr: 0.005 , size: [6, 4, 3] , result (0.4527586771447282, 0.4554354944335298, 0.3960748199083169)
Epoch: 11 , Lr: 0.005 , size: [6, 4, 2] , result (0.4338899803536346, 0.45145301244269803, 0.4535895546823837)
Epoch: 11 , Lr: 0.001 , size: [7, 5, 3] , result (0.45906188605108056, 0.39140471512770136, 0.24885805500982322

In [24]:
pd.DataFrame(res_syn, columns=['Dataset', 'Learner', 'LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,Synthetic,T,0.42,0.458
1,Synthetic,S,0.458,0.479
2,Synthetic,X,0.361,0.469


In [25]:
t_syn = %timeit -o -q -r 1 MetaLearners(X_train,treat_train,y_train,X_test,treat_test,y_test)
t_nn_syn = %timeit -o -q -r 1 MetaLearners_NN(X_train,treat_train,y_train,X_test,treat_test,y_test,[7,4,3],11,5e-3)
print('Time with LGBMClassifier:', t_syn)
print('Time with MLP:', t_nn_syn)

Time with LGBMClassifier: 2.16 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time with MLP: 16.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# ALL

In [57]:
res_all = np.concatenate((res_x5,res_hill,res_kuusito,res_syn),axis=0)
pd.DataFrame(res_all, columns=['Dataset','Learner','LGBMClassifier', 'MLP'])

Unnamed: 0,Dataset,Learner,LGBMClassifier,MLP
0,X5 Retail Hero,T,0.053,0.05
1,X5 Retail Hero,S,0.04,0.038
2,X5 Retail Hero,X,0.038,0.038
3,Hillstorm,T,0.061,0.074
4,Hillstorm,S,0.067,0.074
5,Hillstorm,X,0.073,0.073
6,Kuusito,T,0.279,0.247
7,Kuusito,S,0.31,0.208
8,Kuusito,X,0.239,0.132
9,Synthetic,T,0.42,0.458


In [60]:
t_a_x5 = np.array([t_x5.average,t_nn_x5.average])[:,None]
t_a_hill = np.array([t_hill.average,t_nn_hill.average])[:,None]
t_a_kuusito = np.array([t_kuusito.average,t_nn_kuusito.average])[:,None]
t_a_syn = np.array([t_syn.average,t_nn_syn.average])[:,None]

t_all = np.concatenate((t_a_x5,t_a_hill,t_a_kuusito,t_a_syn),axis=1).T
t_all = np.round(t_all,1)
d = np.array(['RetailHero', 'Hillstrom', 'Kuusito', 'Synthetic'])[:,None]
pd.DataFrame(np.concatenate((d,t_all),axis=1),columns=['Dataset','Time with LGBM','Time with MLP'])

Unnamed: 0,Dataset,Time with LGBM,Time with MLP
0,RetailHero,277.2,452.3
1,Hillstrom,215.4,203.8
2,Kuusito,42.1,94.1
3,Synthetic,2.2,16.9


# Making Multi Index

In [62]:
idx = pd.MultiIndex.from_product([['RetailHero', 'Hillstrom', 'Kuusito', 'Synthetic'],
                                  ['T', 'S', 'X']])

df = pd.DataFrame(res_all[:, 2:],
                  columns=['LGBMClassifier', 'MLP'],
                  index=idx)

df.index.names = ['Dataset', 'Learner']
df.columns.name = 'Base Model'
display(df)

with open("LearnersByBaseModels.txt", "w") as text_file:
    text_file.write(df.to_latex())

Unnamed: 0_level_0,Base Model,LGBMClassifier,MLP
Dataset,Learner,Unnamed: 2_level_1,Unnamed: 3_level_1
RetailHero,T,0.053,0.05
RetailHero,S,0.04,0.038
RetailHero,X,0.038,0.038
Hillstrom,T,0.061,0.074
Hillstrom,S,0.067,0.074
Hillstrom,X,0.073,0.073
Kuusito,T,0.279,0.247
Kuusito,S,0.31,0.208
Kuusito,X,0.239,0.132
Synthetic,T,0.42,0.458


In [63]:
df = pd.DataFrame(t_all,
                  columns=['LGBMClassifier', 'MLP'],
                  index=['RetailHero', 'Hillstrom', 'Kuusito', 'Synthetic'])

df.columns.name = 'Dataset'
display(df)

with open("TimeLearnersByBaseModels.txt", "w") as text_file:
    text_file.write(df.to_latex())

Dataset,LGBMClassifier,MLP
RetailHero,277.2,452.3
Hillstrom,215.4,203.8
Kuusito,42.1,94.1
Synthetic,2.2,16.9
