# HR Analytics - Hyper Parameter Tuning (HPT)

Objective: Create a Logistic Reg Model to predict the attrition of employees by using the features available from the datasets given

Created By: Stephen Cole

Creation Date: 19/07/2022

##### Last Updated

In [1]:
from datetime import datetime, timedelta

print(f"Last Updated: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")

Last Updated: 22/08/2022 15:18:12


### Import Packages

In [2]:
import os
import logging
import pandas as pd
import numpy as np

from datetime import timedelta, datetime
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold ,StratifiedKFold

import xgboost as xgb
import lightgbm as lgb

import warnings
warnings.filterwarnings('ignore')

# Import DataScience Modelling Functions from file created (ensure the file is in same dir as this notebook)
from functions.DS_Model_Functions import *

#### Open configuration

In [3]:
with open (os.path.join(os.getcwd(),'config/attrition_config.yaml'),'r') as stream:
    attrition_propensity_config = yaml.safe_load(stream)

In [4]:
path_to_data = attrition_propensity_config['general_config']['path_to_data']

In [5]:
# Setting up log file for moodelling
# Note: Here I am practising logging - Only useful for automated scripts or omitting big outputs
# Please see the log folder for the resulting logs created

log_folder_name = attrition_propensity_config['log_config']['path_to_folder']

os.makedirs(log_folder_name, exist_ok=True)

run_date = datetime.today().strftime('%Y-%m-%d')

log_filename = run_date+'-attrition-modelling.log'
path_to_log_file = os.path.join(log_folder_name,log_filename)

logging.basicConfig(filename = path_to_log_file,
                    filemode="w",format = '%(asctime)s %(name)s %(levelname)s %(message)s',
                    level=logging.INFO,
                    datefmt="%Y-%m-%d")
LOGGER = logging.getLogger(__name__)

In [6]:
pd.set_option("display.max_columns", None, "display.max_rows", 500)

### Load Objects

In [7]:
start_time = time.time()
print('start_time', time.asctime( time.localtime(time.time()) ))

X = load_obj('Modelling_dset', path_to_data)
y = load_obj('Target', path_to_data)
features = load_obj('features', path_to_data)

end_time = time.time()
total_time = end_time - start_time
print('total_time to read data', str(timedelta(seconds=total_time)))

LOGGER.info("Data and Model loaded successfully")
LOGGER.info("Features to be modelled: {}".format(features))
LOGGER.info("Time to load data: {}.".format(str(timedelta(seconds=total_time))))

start_time Mon Aug 22 15:18:17 2022
total_time to read data 0:00:00.005985


In [8]:
X = X[features]

In [9]:
X.shape

(4410, 27)

## XGBoost - HPT

### Train-Test Split

In [10]:
# Split dataset into train and test datasets for modelling and evaluation
# Set the random_state for the same split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [11]:
# Use XGBoost DMatrix for memory efficiency and model training speed

dmat_train = xgb.DMatrix(X_train, label=y_train)

In [12]:
# Define base parameters

params = {
    # Tree Parameters
    'max_depth': 3,
    'min_child_weight': 20,
    'scale_pos_weight':1,
    'max_delta_step': 2,
    'subsample': 0.9,
    'colsample_bytree': 0.9,
    # Regularisation Parameters
    'eta': 0.1,
    'gamma': 0,
    'alpha': 0,
    'lambda': 1,
    # Other Parameters
    'verbosity': 0,
    'nthread': os.cpu_count(),
    'eval_metrics': 'logloss',
    'objective': 'binary:logistic',
    'tree_method': 'hist'
}

LOGGER.info("Base Parameters set to: '\n' {}".format(params))

In [13]:
changed_params_grid = [[{'max_depth': max_depth,
                        'min_child_weight': min_child_weight,
                        'scale_pos_weight': scale_pos_weight}
                        for max_depth in [1, 3, 5, 10, 15, 20]
                        for min_child_weight in [1, 5, 10, 20, 50]
                        for scale_pos_weight in [1, 5, 10, 15, 25, 50, 100]],
                      [{'max_delta_step': max_delta_step,
                        'subsample': subsample,
                        'colsample_bytree': colsample_bytree,}
                        for max_delta_step in [0, 0.5, 1, 2, 4, 6, 8, 10]
                        for subsample in [0.1, 0.25, 0.5, 0.75, 1]
                        for colsample_bytree in [0.5, 0.75, 0.9, 1]],
                      [{'eta': eta,
                        'gamma': gamma,
                        'alpha': alpha,
                        'lambda': lambd}
                        for eta in [0.1, 0.5, 0.9]
                        for gamma in [0, 0.1, 0.5, 1,5, 10]
                        for alpha in [0, 0.5, 1]
                        for lambd in [0.01, 0.1, 1, 1.5]]]

In [14]:
# HPT using XGBoost
best_params = xgb_hpt_tune(LOGGER=LOGGER,
                           base_params=params,
                           grid=changed_params_grid,
                           dtrain=dmat_train,
                           num_boost_round=100,
                           nfold=5, 
                           stratified=True,
                           metrics={'auc'}, 
                           early_stopping_rounds=20, 
                           seed=5)

XGBoost HPT start_time Mon Aug 22 15:18:17 2022
Hyperparameter space grid has 210 points.

start_time Mon Aug 22 15:18:17 2022


100%|████████████████████████████████████████████████████████████████████████████████| 210/210 [04:10<00:00,  1.19s/it]


Parameters Changed ----------------------------------------------------------- 
max_depth : 3 -> 20
min_child_weight : 20 -> 1
scale_pos_weight : 1 -> 15

Hyperparameter space grid has 160 points.

start_time Mon Aug 22 15:22:28 2022


100%|████████████████████████████████████████████████████████████████████████████████| 160/160 [03:50<00:00,  1.44s/it]


Parameters Changed ----------------------------------------------------------- 
max_delta_step : 2 -> 0.5
subsample : 0.9 -> 0.75

Hyperparameter space grid has 216 points.

start_time Mon Aug 22 15:26:18 2022


100%|████████████████████████████████████████████████████████████████████████████████| 216/216 [04:14<00:00,  1.18s/it]

Parameters Changed ----------------------------------------------------------- 
eta : 0.1 -> 0.9
alpha : 0 -> 0.5
lambda : 1 -> 0.1


New set of parameters: 


{'max_depth': 20, 'min_child_weight': 1, 'scale_pos_weight': 15, 'max_delta_step': 0.5, 'subsample': 0.75, 'colsample_bytree': 0.9, 'eta': 0.9, 'gamma': 0, 'alpha': 0.5, 'lambda': 0.1, 'verbosity': 0, 'nthread': 8, 'eval_metrics': 'logloss', 'objective': 'binary:logistic', 'tree_method': 'hist'} 

XGBoost HPT total time 0:12:16.203589


XGBoost HPT end_time Mon Aug 22 15:30:33 2022





In [15]:
best_params

{'max_depth': 20,
 'min_child_weight': 1,
 'scale_pos_weight': 15,
 'max_delta_step': 0.5,
 'subsample': 0.75,
 'colsample_bytree': 0.9,
 'eta': 0.9,
 'gamma': 0,
 'alpha': 0.5,
 'lambda': 0.1,
 'verbosity': 0,
 'nthread': 8,
 'eval_metrics': 'logloss',
 'objective': 'binary:logistic',
 'tree_method': 'hist'}

In [16]:
# Determine optimal number of boosters

xgb_cv_scores = []
for train_index, val_index in StratifiedKFold(n_splits = 5, shuffle = True).split(X_train, y_train.to_numpy()):
        
    X_train_skf, X_val_skf = X.iloc[train_index], X.iloc[val_index]
    y_train_skf, y_val_skf = y.iloc[train_index], y.iloc[val_index]
        
    dtrain = xgb.DMatrix(X_train_skf, label = y_train_skf.to_numpy())
    dval = xgb.DMatrix(X_val_skf, label = y_val_skf.to_numpy())
    evallist = [(dtrain, 'train'), (dval, 'validation')]
        
    bst = xgb.train(best_params,dtrain, 9999, evallist, early_stopping_rounds = 100)

    xgb_cv_scores.append(bst.best_iteration)
    
xgb_num_boost_round = round(np.mean(xgb_cv_scores))
LOGGER.info("Optimal number of boosters found for XGBoost Model: {}".format(xgb_num_boost_round))

[0]	train-logloss:0.81020	validation-logloss:0.81801
[1]	train-logloss:0.80456	validation-logloss:0.82534
[2]	train-logloss:0.71887	validation-logloss:0.76037
[3]	train-logloss:0.61048	validation-logloss:0.66692
[4]	train-logloss:0.51172	validation-logloss:0.57902
[5]	train-logloss:0.41470	validation-logloss:0.50239
[6]	train-logloss:0.33496	validation-logloss:0.42808
[7]	train-logloss:0.26534	validation-logloss:0.36319
[8]	train-logloss:0.20738	validation-logloss:0.31293
[9]	train-logloss:0.16625	validation-logloss:0.27502
[10]	train-logloss:0.13456	validation-logloss:0.24232
[11]	train-logloss:0.10727	validation-logloss:0.20951
[12]	train-logloss:0.08662	validation-logloss:0.18508
[13]	train-logloss:0.07093	validation-logloss:0.16032
[14]	train-logloss:0.05866	validation-logloss:0.14969
[15]	train-logloss:0.04956	validation-logloss:0.14013
[16]	train-logloss:0.04264	validation-logloss:0.12911
[17]	train-logloss:0.03725	validation-logloss:0.12068
[18]	train-logloss:0.03272	validation-

[151]	train-logloss:0.00489	validation-logloss:0.10789
[152]	train-logloss:0.00500	validation-logloss:0.10833
[153]	train-logloss:0.00491	validation-logloss:0.10818
[154]	train-logloss:0.00482	validation-logloss:0.10877
[155]	train-logloss:0.00490	validation-logloss:0.10850
[156]	train-logloss:0.00486	validation-logloss:0.10820
[157]	train-logloss:0.00486	validation-logloss:0.10820
[158]	train-logloss:0.00483	validation-logloss:0.10753
[159]	train-logloss:0.00470	validation-logloss:0.10755
[160]	train-logloss:0.00473	validation-logloss:0.10784
[161]	train-logloss:0.00482	validation-logloss:0.10790
[162]	train-logloss:0.00480	validation-logloss:0.10823
[163]	train-logloss:0.00480	validation-logloss:0.10823
[164]	train-logloss:0.00474	validation-logloss:0.10843
[165]	train-logloss:0.00477	validation-logloss:0.10829
[166]	train-logloss:0.00483	validation-logloss:0.10834
[167]	train-logloss:0.00469	validation-logloss:0.10839
[0]	train-logloss:0.76883	validation-logloss:0.77286
[1]	train-lo

[134]	train-logloss:0.00526	validation-logloss:0.13277
[135]	train-logloss:0.00528	validation-logloss:0.13273
[136]	train-logloss:0.00525	validation-logloss:0.13289
[137]	train-logloss:0.00523	validation-logloss:0.13219
[138]	train-logloss:0.00513	validation-logloss:0.13308
[139]	train-logloss:0.00525	validation-logloss:0.13337
[140]	train-logloss:0.00518	validation-logloss:0.13324
[141]	train-logloss:0.00505	validation-logloss:0.13424
[142]	train-logloss:0.00513	validation-logloss:0.13409
[143]	train-logloss:0.00516	validation-logloss:0.13358
[144]	train-logloss:0.00509	validation-logloss:0.13367
[145]	train-logloss:0.00515	validation-logloss:0.13329
[146]	train-logloss:0.00517	validation-logloss:0.13327
[147]	train-logloss:0.00517	validation-logloss:0.13327
[148]	train-logloss:0.00517	validation-logloss:0.13327
[149]	train-logloss:0.00509	validation-logloss:0.13315
[150]	train-logloss:0.00509	validation-logloss:0.13332
[151]	train-logloss:0.00504	validation-logloss:0.13323
[152]	trai

[127]	train-logloss:0.00526	validation-logloss:0.14513
[128]	train-logloss:0.00509	validation-logloss:0.14489
[129]	train-logloss:0.00516	validation-logloss:0.14483
[130]	train-logloss:0.00523	validation-logloss:0.14573
[131]	train-logloss:0.00521	validation-logloss:0.14369
[132]	train-logloss:0.00525	validation-logloss:0.14411
[133]	train-logloss:0.00522	validation-logloss:0.14407
[134]	train-logloss:0.00517	validation-logloss:0.14354
[135]	train-logloss:0.00513	validation-logloss:0.14327
[136]	train-logloss:0.00518	validation-logloss:0.14340
[137]	train-logloss:0.00506	validation-logloss:0.14263
[138]	train-logloss:0.00506	validation-logloss:0.14263
[0]	train-logloss:0.77127	validation-logloss:0.77258
[1]	train-logloss:0.75425	validation-logloss:0.78035
[2]	train-logloss:0.68150	validation-logloss:0.73786
[3]	train-logloss:0.58398	validation-logloss:0.66212
[4]	train-logloss:0.49278	validation-logloss:0.58947
[5]	train-logloss:0.40780	validation-logloss:0.51421
[6]	train-logloss:0.32

[139]	train-logloss:0.00520	validation-logloss:0.17693
[140]	train-logloss:0.00508	validation-logloss:0.17611
[141]	train-logloss:0.00499	validation-logloss:0.17595
[142]	train-logloss:0.00512	validation-logloss:0.17618
[143]	train-logloss:0.00512	validation-logloss:0.17618
[144]	train-logloss:0.00491	validation-logloss:0.17503
[145]	train-logloss:0.00506	validation-logloss:0.17531
[146]	train-logloss:0.00512	validation-logloss:0.17599
[147]	train-logloss:0.00494	validation-logloss:0.17448
[148]	train-logloss:0.00494	validation-logloss:0.17448
[149]	train-logloss:0.00498	validation-logloss:0.17438
[150]	train-logloss:0.00495	validation-logloss:0.17408
[151]	train-logloss:0.00504	validation-logloss:0.17475
[152]	train-logloss:0.00498	validation-logloss:0.17535
[153]	train-logloss:0.00498	validation-logloss:0.17535
[0]	train-logloss:0.79386	validation-logloss:0.79519
[1]	train-logloss:0.76101	validation-logloss:0.77360
[2]	train-logloss:0.68887	validation-logloss:0.71763
[3]	train-loglos

[136]	train-logloss:0.00509	validation-logloss:0.11409
[137]	train-logloss:0.00509	validation-logloss:0.11409
[138]	train-logloss:0.00501	validation-logloss:0.11403
[139]	train-logloss:0.00505	validation-logloss:0.11372
[140]	train-logloss:0.00497	validation-logloss:0.11379
[141]	train-logloss:0.00501	validation-logloss:0.11355
[142]	train-logloss:0.00500	validation-logloss:0.11338
[143]	train-logloss:0.00490	validation-logloss:0.11413
[144]	train-logloss:0.00490	validation-logloss:0.11451
[145]	train-logloss:0.00505	validation-logloss:0.11534
[146]	train-logloss:0.00494	validation-logloss:0.11536
[147]	train-logloss:0.00494	validation-logloss:0.11536
[148]	train-logloss:0.00510	validation-logloss:0.11533
[149]	train-logloss:0.00500	validation-logloss:0.11507
[150]	train-logloss:0.00490	validation-logloss:0.11470
[151]	train-logloss:0.00489	validation-logloss:0.11419
[152]	train-logloss:0.00482	validation-logloss:0.11404
[153]	train-logloss:0.00490	validation-logloss:0.11414
[154]	trai

In [17]:
dtrain = xgb.DMatrix(X_train, label = y_train)
dtest = xgb.DMatrix(X_test, label = y_test)
evallist = [(dtrain, 'train'),(dtest, 'test')]

In [18]:
xgb_model = xgb.train(best_params, dtrain = dmat_train, num_boost_round=xgb_num_boost_round, evals=evallist)
LOGGER.info("XGBoost Model trained with {} num_boost_round".format(xgb_num_boost_round))

[0]	train-logloss:0.77372	test-logloss:0.77182
[1]	train-logloss:0.71549	test-logloss:0.73038
[2]	train-logloss:0.64065	test-logloss:0.67572
[3]	train-logloss:0.54424	test-logloss:0.58045
[4]	train-logloss:0.44119	test-logloss:0.48580
[5]	train-logloss:0.36313	test-logloss:0.41430
[6]	train-logloss:0.28787	test-logloss:0.34185
[7]	train-logloss:0.23207	test-logloss:0.29357
[8]	train-logloss:0.18126	test-logloss:0.24507
[9]	train-logloss:0.14401	test-logloss:0.20932
[10]	train-logloss:0.11340	test-logloss:0.18340
[11]	train-logloss:0.09419	test-logloss:0.16880
[12]	train-logloss:0.07638	test-logloss:0.15240
[13]	train-logloss:0.06232	test-logloss:0.14090
[14]	train-logloss:0.05244	test-logloss:0.13140
[15]	train-logloss:0.04448	test-logloss:0.12188
[16]	train-logloss:0.03931	test-logloss:0.11807
[17]	train-logloss:0.03416	test-logloss:0.11160
[18]	train-logloss:0.03018	test-logloss:0.10704
[19]	train-logloss:0.02664	test-logloss:0.10323
[20]	train-logloss:0.02366	test-logloss:0.10007
[2

In [19]:
# Save the model for evaluation

save_obj(obj=xgb_model, 
         name='HR_Attrition_XGB_Model_202208', 
         file_path=path_to_data)
LOGGER.info("XGBoost Model HR_Attrition_XGB_Model_202208 saved to {}".format(path_to_data))

### LightGBM - HPT

In [20]:
# Define base parameters (default values)

light_params = {
    # Core Parameters
    'task': 'train',
    'objective': 'binary',
    'learning_rate': 0.1,               
    'num_threads': os.cpu_count(),
    'num_leaves': 31,                   
    'seed': 42,
    
    # Learning Control Parameters
    'min_data_in_leaf': 20,             
    'min_sum_hessian_in_leaf': 0.001,   
    'bagging_fraction': 1.0,            
    'feature_fraction': 1.0,            
    'max_delta_step': 2,
    'xgboost_dart_mode': True,
    'extra_trees': True,
    'max_depth': -1,                    
    'path_smooth': 0.0,                 
    'verbosity': -1,
    
    # Dataset Parameters
    'max_bins': 255,                     
    
    # Objective Parameters
    'is_unbalance': True,
    'sigmoid': 1.0                      
}

In [23]:
# Setup grid for HPT function

changed_params_grid = [[
    {
    'min_data_in_leaf': min_data_in_leaf,                 
    'min_sum_hessian_in_leaf': min_sum_hessian_in_leaf, 
    'learning_rate': learning_rate,       
    }           
    for min_data_in_leaf in [1, 20, 100, 250, 500, 1000]                 # min_data_in_leaf >= 0.0
    for min_sum_hessian_in_leaf in [0.001, 0.10, 1.0, 5.0, 10.0]      # min_sum_hessian_in_leaf >= 0.0
    for learning_rate in [0.001, 0.01, 0.1, 0.5, 1.5, 5.0]            # Use small learning_rate with large num_iterations  
], 
    [{
    'bagging_fraction': bagging_fraction,                 
    'feature_fraction': feature_fraction,                 
    'path_smooth': path_smooth,                           
    }    
    for bagging_fraction in [0.001, 0.01, 0.1, 0.25, 0.5, 1.0]        # 0.0 < bagging_fraction <= 1.0
    for feature_fraction in [0.001, 0.01, 0.1, 0.25, 0.5, 1.0]        # 0.0 < feature_fraction <= 1.0
    for path_smooth in [0.01, 10, 100, 500, 1000]                     # path_smooth >= 0.0
],
    [{
                    
    'num_leaves': num_leaves,                             
    'max_bin': max_bin,                                   
    'sigmoid': sigmoid                                    
    }
    for num_leaves in [2, 250, 500, 1000, 2000, 10000]                      # 1 < num_leaves <= 131072    
    for max_bin in [2, 100, 255, 750, 1500, 5000]                        # max_bin > 1.0     
    for sigmoid in [0.001, 0.01, 0.1, 1, 10, 50]                           # sigmoid > 0.0     
]]

In [24]:
# HPT using LightGBM
best_params = lgb_hpt_tune(LOGGER=LOGGER,
                           base_params=light_params,
                           grid=changed_params_grid, 
                           X_train=X_train,
                           y_train=y_train,
                           nfold=5,
                           stratified=True, 
                           metrics='auc')

LightGBM HPT start_time Mon Aug 22 15:57:44 2022
Hyperparameter space grid has 180 points.

start_time Mon Aug 22 15:57:44 2022


100%|████████████████████████████████████████████████████████████████████████████████| 180/180 [05:08<00:00,  1.71s/it]


Parameters Changed ----------------------------------------------------------- 
learning_rate : 0.1 -> 0.5

Hyperparameter space grid has 180 points.

start_time Mon Aug 22 16:02:53 2022


100%|████████████████████████████████████████████████████████████████████████████████| 180/180 [09:04<00:00,  3.03s/it]


Parameters Changed ----------------------------------------------------------- 
bagging_fraction : 1.0 -> 0.001
path_smooth : 0.0 -> 10

Hyperparameter space grid has 216 points.

start_time Mon Aug 22 16:11:58 2022


100%|████████████████████████████████████████████████████████████████████████████████| 216/216 [11:46<00:00,  3.27s/it]

Parameters Changed ----------------------------------------------------------- 
num_leaves : 31 -> 250
sigmoid : 1.0 -> 10

New Parameters: 

{'task': 'train', 'objective': 'binary', 'learning_rate': 0.5, 'num_threads': 8, 'num_leaves': 250, 'seed': 42, 'min_data_in_leaf': 20, 'min_sum_hessian_in_leaf': 0.001, 'bagging_fraction': 0.001, 'feature_fraction': 1.0, 'max_delta_step': 2, 'xgboost_dart_mode': True, 'extra_trees': True, 'max_depth': -1, 'path_smooth': 10, 'verbosity': -1, 'max_bins': 255, 'is_unbalance': True, 'sigmoid': 10, 'max_bin': 100} 

LightGBM HPT total time 0:26:00.015201


LightGBM HPT end_time Mon Aug 22 16:23:44 2022





In [25]:
best_params

{'task': 'train',
 'objective': 'binary',
 'learning_rate': 0.5,
 'num_threads': 8,
 'num_leaves': 250,
 'seed': 42,
 'min_data_in_leaf': 20,
 'min_sum_hessian_in_leaf': 0.001,
 'bagging_fraction': 0.001,
 'feature_fraction': 1.0,
 'max_delta_step': 2,
 'xgboost_dart_mode': True,
 'extra_trees': True,
 'max_depth': -1,
 'path_smooth': 10,
 'verbosity': -1,
 'max_bins': 255,
 'is_unbalance': True,
 'sigmoid': 10,
 'max_bin': 100}

In [26]:
# Determine optimal number of boosters

lgb_cv_scores = []
for train_index, val_index in StratifiedKFold(n_splits = 5, shuffle = True).split(X_train, y_train.to_numpy()):
        
    X_train_skf, X_val_skf = X.iloc[train_index], X.iloc[val_index]
    y_train_skf, y_val_skf = y.iloc[train_index], y.iloc[val_index]
        
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_test, label=y_test)
        
    bst = lgb.train(
                        params=best_params,
                        train_set=train_data,
                        num_boost_round=9999,
                        early_stopping_rounds=100,
                        valid_sets=[valid_data],
                        verbose_eval=1
) 
    lgb_cv_scores.append(bst.best_iteration)
    
lgb_num_boost_round = round(np.mean(lgb_cv_scores))
LOGGER.info("Optimal number of boosters found for LightGBM model: {}".format(lgb_num_boost_round))

[1]	valid_0's binary_logloss: 0.442086
Training until validation scores don't improve for 100 rounds
[2]	valid_0's binary_logloss: 0.418963
[3]	valid_0's binary_logloss: 0.393852
[4]	valid_0's binary_logloss: 0.356793
[5]	valid_0's binary_logloss: 0.324693
[6]	valid_0's binary_logloss: 0.296429
[7]	valid_0's binary_logloss: 0.264725
[8]	valid_0's binary_logloss: 0.235505
[9]	valid_0's binary_logloss: 0.215564
[10]	valid_0's binary_logloss: 0.196786
[11]	valid_0's binary_logloss: 0.182898
[12]	valid_0's binary_logloss: 0.173958
[13]	valid_0's binary_logloss: 0.161633
[14]	valid_0's binary_logloss: 0.152731
[15]	valid_0's binary_logloss: 0.138071
[16]	valid_0's binary_logloss: 0.133925
[17]	valid_0's binary_logloss: 0.126341
[18]	valid_0's binary_logloss: 0.117654
[19]	valid_0's binary_logloss: 0.109439
[20]	valid_0's binary_logloss: 0.104054
[21]	valid_0's binary_logloss: 0.100733
[22]	valid_0's binary_logloss: 0.0984949
[23]	valid_0's binary_logloss: 0.0971355
[24]	valid_0's binary_log

[88]	valid_0's binary_logloss: 0.17147
[89]	valid_0's binary_logloss: 0.175795
[90]	valid_0's binary_logloss: 0.177139
[91]	valid_0's binary_logloss: 0.174347
[92]	valid_0's binary_logloss: 0.172176
[93]	valid_0's binary_logloss: 0.174087
[94]	valid_0's binary_logloss: 0.174379
[95]	valid_0's binary_logloss: 0.176162
[96]	valid_0's binary_logloss: 0.179159
[97]	valid_0's binary_logloss: 0.180259
[98]	valid_0's binary_logloss: 0.180904
[99]	valid_0's binary_logloss: 0.182511
[100]	valid_0's binary_logloss: 0.182315
[101]	valid_0's binary_logloss: 0.184905
[102]	valid_0's binary_logloss: 0.186397
[103]	valid_0's binary_logloss: 0.188499
[104]	valid_0's binary_logloss: 0.18927
[105]	valid_0's binary_logloss: 0.190962
[106]	valid_0's binary_logloss: 0.193475
[107]	valid_0's binary_logloss: 0.194183
[108]	valid_0's binary_logloss: 0.194037
[109]	valid_0's binary_logloss: 0.195913
[110]	valid_0's binary_logloss: 0.195569
[111]	valid_0's binary_logloss: 0.199809
[112]	valid_0's binary_logloss

[65]	valid_0's binary_logloss: 0.131127
[66]	valid_0's binary_logloss: 0.134065
[67]	valid_0's binary_logloss: 0.134756
[68]	valid_0's binary_logloss: 0.136086
[69]	valid_0's binary_logloss: 0.137244
[70]	valid_0's binary_logloss: 0.139325
[71]	valid_0's binary_logloss: 0.139774
[72]	valid_0's binary_logloss: 0.143227
[73]	valid_0's binary_logloss: 0.144855
[74]	valid_0's binary_logloss: 0.14632
[75]	valid_0's binary_logloss: 0.147564
[76]	valid_0's binary_logloss: 0.149533
[77]	valid_0's binary_logloss: 0.149068
[78]	valid_0's binary_logloss: 0.151358
[79]	valid_0's binary_logloss: 0.151447
[80]	valid_0's binary_logloss: 0.15367
[81]	valid_0's binary_logloss: 0.156927
[82]	valid_0's binary_logloss: 0.160047
[83]	valid_0's binary_logloss: 0.16215
[84]	valid_0's binary_logloss: 0.163558
[85]	valid_0's binary_logloss: 0.166209
[86]	valid_0's binary_logloss: 0.169365
[87]	valid_0's binary_logloss: 0.170521
[88]	valid_0's binary_logloss: 0.17147
[89]	valid_0's binary_logloss: 0.175795
[90]

In [27]:
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test)

In [28]:
light_model = lgb.train(
                        params=best_params,
                        train_set=train_data,
                        num_boost_round=lgb_num_boost_round,
                        valid_sets=[valid_data],
                        verbose_eval=1
)
LOGGER.info("LightGBM Model trained with {} boosting rounds".format(lgb_num_boost_round))

[1]	valid_0's binary_logloss: 0.442086
[2]	valid_0's binary_logloss: 0.418963
[3]	valid_0's binary_logloss: 0.393852
[4]	valid_0's binary_logloss: 0.356793
[5]	valid_0's binary_logloss: 0.324693
[6]	valid_0's binary_logloss: 0.296429
[7]	valid_0's binary_logloss: 0.264725
[8]	valid_0's binary_logloss: 0.235505
[9]	valid_0's binary_logloss: 0.215564
[10]	valid_0's binary_logloss: 0.196786
[11]	valid_0's binary_logloss: 0.182898
[12]	valid_0's binary_logloss: 0.173958
[13]	valid_0's binary_logloss: 0.161633
[14]	valid_0's binary_logloss: 0.152731
[15]	valid_0's binary_logloss: 0.138071
[16]	valid_0's binary_logloss: 0.133925
[17]	valid_0's binary_logloss: 0.126341
[18]	valid_0's binary_logloss: 0.117654
[19]	valid_0's binary_logloss: 0.109439
[20]	valid_0's binary_logloss: 0.104054
[21]	valid_0's binary_logloss: 0.100733
[22]	valid_0's binary_logloss: 0.0984949
[23]	valid_0's binary_logloss: 0.0971355
[24]	valid_0's binary_logloss: 0.0931256
[25]	valid_0's binary_logloss: 0.0930927
[26]	

In [30]:
# Save the model for evaluation
model_name = 'HR_Attrition_LIGHTGBM_Model_{}'.format(datetime.today().strftime('%Y%m'))
save_obj(obj=light_model, 
         name=model_name, 
         file_path=path_to_data)
LOGGER.info("LightGBM Model {} saved to {}".format(model_name, path_to_data))

In [31]:
LOGGER.info("Hyper Parameter Tuning Process completed {}".format( time.asctime( time.localtime(time.time()) ) ))