In [None]:
# Load required libraries
import numpy as np
import pandas as pd

# Custom utils from kesh-utils (Check source code: https://github.com/KeshavShetty/kesh-utils)
from KUtils.common import utils
from KUtils.eda import chartil
from KUtils.eda import data_preparation as dp
from KUtils.classifier import generic_classifier_utils as gcu

from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, make_scorer, recall_score, precision_score


In [None]:
# Show all column in head()
pd.set_option('display.max_columns', None)

In [None]:
import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [None]:
# Data generated from ../Sampleing.ipyb
train_df = pd.read_csv('wns_train_df.csv')

test_df = pd.read_csv('wns_test_df.csv')


In [None]:
print(train_df.shape)
   
print(test_df.shape)


In [None]:
train_df.head()

In [None]:
test_df.head()

# 1. Model 1 on inner (With Bayesian Optimization)

In [None]:
train_df = train_df
# Putting feature variable to X
X = train_df.drop(['impression_id', 'is_click'],axis=1)

# Putting response variable to y
y = train_df['is_click']

X_train = X
y_train = y

X_test = X
y_test = y

In [None]:
import xgboost as xgb

dtrain = xgb.DMatrix(X_train, label=y_train)

from bayes_opt import BayesianOptimization

def bo_tune_xgb(max_depth, gamma, n_estimators ,learning_rate, subsample, colsample_bytree):
    params = {'max_depth': int(max_depth),
        'gamma': gamma,
        'n_estimators': int(n_estimators),
        'learning_rate':learning_rate,
        'subsample': subsample,
        'colsample_bytree': colsample_bytree,
        'tree_method': 'gpu_hist',
        'eval_metric': 'rmse'}
    #Cross validating with the specified parameters in 5 folds and 70 iterations
    cv_result = xgb.cv(params, dtrain, num_boost_round=70, nfold=5)
    #Return the negative RMSE
    return -1.0*cv_result['test-rmse-mean'].iloc[-1]

xgb_bo = BayesianOptimization(bo_tune_xgb, {
    'max_depth': (15,20),
    'gamma': (0, 1),
    'learning_rate':(0, 1),
    'n_estimators':(450, 500),
    'subsample': (0,1),
    'colsample_bytree': (0,1)                                            
})
xgb_bo.maximize(n_iter=7, init_points=8)

In [None]:
#Extracting the best parameters
params = xgb_bo.max['params']
print(params)

#Converting the max_depth and n_estimator values from float to int
params['max_depth']= int(params['max_depth'])
params['n_estimators']= int(params['n_estimators'])
params['tree_method']= 'gpu_hist'
print(params)

#Initialize an XGBClassifier with the tuned parameters and fit the training data
from xgboost import XGBClassifier
final_clf = XGBClassifier(**params).fit(X_train, y_train)

#predicting for training set
y_pred_default = final_clf.predict(X_test)

In [None]:
inner_model = final_clf

In [None]:
# Printing classification report
print(classification_report(y_test, y_pred_default))

local_confusion_matrix = metrics.confusion_matrix(y_test, y_pred_default )

# Printing confusion matrix and accuracy
print('Confusion Matrix:')
print(local_confusion_matrix)

# Accuracy, precision, recall and f1 score
print('\nScores:')
accuracy = metrics.accuracy_score(y_test, y_pred_default)
precision = metrics.precision_score(y_test, y_pred_default)
recall = metrics.recall_score(y_test, y_pred_default)
f1_score = metrics.f1_score(y_test, y_pred_default)
roc_auc = metrics.roc_auc_score(y_test, y_pred_default)
sensitivity = recall
specificity =  local_confusion_matrix[0,0]/(local_confusion_matrix[0,0]+local_confusion_matrix[0,1])
print(" Accuracy {0:.3f}, \n Sensitivity {1:.3f}, \n Specificity {2:.3f}, \n Precision {3:.3f}, \n Recall {4:.3f}, \n f1_score {5:.3f}, \n roc_auc {6:.3f}".format(
    accuracy, sensitivity,specificity, precision,recall,f1_score,roc_auc))

In [None]:
# Feature importance
importances = final_clf.feature_importances_
unique_dict = dict(zip(X_train.columns, importances))
chartil.core_barchart_from_series(
    pd.Series(unique_dict), 
    optional_settings={'sort_by_value':True, 'decimal_precision':2}) 

In [None]:
print(inner_model)


# Apply final model on test 

In [None]:
test_df.head()

In [None]:
ext_pred_proba = inner_model.predict_proba(test_df.drop(['impression_id'], axis=1))

In [None]:
test_df['is_click'] = ext_pred_proba[:,1]


In [None]:
inner_tst = test_df.loc[:,['impression_id', 'is_click' ]]

In [None]:
inner_tst.head()

In [None]:
submission_csv = inner_tst

In [None]:
submission_csv.shape

In [None]:
submission_csv.to_csv("WNS_inn_out_xgb_submission.csv", index=False)
