In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import KFold
from sklearn import linear_model, metrics, preprocessing
from sklearn.model_selection import KFold, train_test_split
import xgboost

import azureml.core
from azureml.core import Dataset
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Experiment, Workspace

import sys
sys.path.append('../')
sys.path.append('../scripts/')

In [2]:
# Log In to Azure ML Workspace
interactive_auth = InteractiveLoginAuthentication(tenant_id="76f90eb1-fb9a-4446-9875-4d323d6455ad")

# Initialise workspace
ws = Workspace.from_config(auth=interactive_auth)

# Data import 
aml_dataset = Dataset.get_by_name(ws, 'train_ds', version='latest')
data = aml_dataset.to_pandas_dataframe()
data.set_index('CardHolder', inplace=True)

# Save it to the further opeartions
original_columns = data.columns
data_origin = data.copy()



In [20]:
data = data_origin.copy()

In [21]:
from importlib import reload
import features.functions
reload(features.functions)

#data = features.functions.preprocessing(data)
data_w_features = data.copy()

In [22]:
index_columns = ['gender', 'main_format', 'children']
data = data_w_features.copy()

# Let's fill NaN in "index_columns"
data.loc[:, 'group'].replace({'test' : 1, 'control' : 0}, inplace=True)
data.loc[:,'gender'].replace({'М' : 2, 'Ж' : 1, 'Не определен' : 0, None : 0}, inplace=True)
data.loc[:,'children'].replace({None : -1}, inplace=True)

In [23]:
from scripts.metrics import custom_metric
import features.functions
reload(features.functions)

data = data.reset_index().drop('CardHolder', axis=1)
data.loc[:, 'group'].replace({1 : 'test', 0 : 'control'}, inplace=True)

data['class'] = 0
data.loc[(data['group'] == 'control') & (data['response_att'] == 1), 'class'] = 1
data.loc[(data['group'] == 'test') & (data['response_att'] == 0), 'class'] = 2
data.loc[(data['group'] == 'test') & (data['response_att'] == 1), 'class'] = 3

train, test = train_test_split(data)
train, test = features.functions.feature_generation(train, test, 
                                mean_columns=['group', 'response_att', 'stdev_days_between_visits_15d'],
                                mean_index_columns=['gender', 'age', 'children'])

x_train = train.drop(['class', 'group', 'response_att'], axis=1)
y_train = train['class']

x_test = test.drop(['class', 'group', 'response_att'], axis=1)
y_test = test['class']

params = {'n_estimators': 20, 'eval_metric': ['merror', 'mlogloss'], 'verbose': True, 'n_jobs': -1}
model = xgboost.XGBClassifier(**params)

model.fit(x_train, y_train, verbose=True, eval_set=[(x_test, y_test)])

pred = model.predict_proba(x_test)
final = pred[:,3] + pred[:,0] - pred[:,1] - pred[:,2]
test['uplift'] = final

custom_metric(test)

[0]	validation_0-merror:0.328241	validation_0-mlogloss:1.31566
[1]	validation_0-merror:0.325161	validation_0-mlogloss:1.2564
[2]	validation_0-merror:0.326273	validation_0-mlogloss:1.20611
[3]	validation_0-merror:0.323618	validation_0-mlogloss:1.16266
[4]	validation_0-merror:0.323432	validation_0-mlogloss:1.12546
[5]	validation_0-merror:0.323548	validation_0-mlogloss:1.09322
[6]	validation_0-merror:0.323507	validation_0-mlogloss:1.06506
[7]	validation_0-merror:0.323082	validation_0-mlogloss:1.04047
[8]	validation_0-merror:0.323112	validation_0-mlogloss:1.01883
[9]	validation_0-merror:0.323199	validation_0-mlogloss:0.999825
[10]	validation_0-merror:0.32243	validation_0-mlogloss:0.982849
[11]	validation_0-merror:0.322401	validation_0-mlogloss:0.96804
[12]	validation_0-merror:0.322436	validation_0-mlogloss:0.954908
[13]	validation_0-merror:0.322308	validation_0-mlogloss:0.943214
[14]	validation_0-merror:0.322116	validation_0-mlogloss:0.932821
[15]	validation_0-merror:0.322058	validation_0-

5.806842773362791