# TODO:
https://www.kaggle.com/competitions/playground-series-s3e2/discussion/378795
https://www.kaggle.com/competitions/playground-series-s3e2/discussion/378780

These show that we should incorporate original data but when validating using K fold methods, we should only validate based on data in the competition dataset and not on original dataset. So implement this technique for this competition

### Also, bagging resulted in a lot better score with simple mean in the last competition, thought it didn't score much better on public, it absolutely ranked up to 60th position in the final private LB. So,
## Remember to trust your cvs over pbl

# Imports

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from pathlib import Path
import xgboost as xgb
import lightgbm as lgbm
import catboost
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import roc_auc_score
from IPython.display import display
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
import optuna
from sklearn.preprocessing import StandardScaler

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Loading Data

In [3]:
BASE_PATH = Path('../input/playground-series-s3e3')

# id is not going to be an informative feature, so we're dropping it for train
# but since we'll need test set's ids to make the submission file, so we'll save those in  a separate varible before dropping
train = pd.read_csv(BASE_PATH / "train.csv").drop(columns="id")
test = pd.read_csv(BASE_PATH / "test.csv")
test_idx = test.id
test = test.drop(columns="id")

# It's been shown that incorporating original data, improves scores - at least on the public leaderboard. So let's do that!
original = pd.read_csv('../input/ibm-hr-analytics-attrition-dataset/WA_Fn-UseC_-HR-Employee-Attrition.csv')

train.head()

Unnamed: 0,Age,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EnvironmentSatisfaction,Gender,...,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,Attrition
0,36,Travel_Frequently,599,Research & Development,24,3,Medical,1,4,Male,...,80,1,10,2,3,10,0,7,8,0
1,35,Travel_Rarely,921,Sales,8,3,Other,1,1,Male,...,80,1,4,3,3,4,2,0,3,0
2,32,Travel_Rarely,718,Sales,26,3,Marketing,1,3,Male,...,80,2,4,3,3,3,2,1,2,0
3,38,Travel_Rarely,1488,Research & Development,2,3,Medical,1,3,Female,...,80,0,15,1,1,6,0,0,2,0
4,50,Travel_Rarely,1017,Research & Development,5,4,Medical,1,2,Female,...,80,0,31,0,3,31,14,4,10,1


In [4]:
original.head()

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,4,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,1,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2


# Pre-Pre-Processing

### Let's make the feature names and order consistent b/w our competition dataset and original dataset, before we concatenate

In [5]:
original['Attrition'] = (original['Attrition'] == 'Yes').astype(np.int64)

# in original data, id is termed as "EmployeeNumber", so let's drop it
original.drop(columns="EmployeeNumber", inplace=True)

In [6]:
# now reordering the features in original dataset
original = original[list(train.columns)]

### Someone in the first compeition showed that adding a source feature i.e. a feature that indicates whether a given record is from original dataset or synthetic improves performance, so let's' do that!

In [7]:
original["is_original"] = 1
train["is_original"] = 0
test["is_original"] = 0

### Let's finally concatenate

In [8]:
train_extended = pd.concat([train, original]).reset_index(drop=True)
len(train_extended)

3147

### checking for null values

In [9]:
pd.concat([train_extended.isnull().sum().rename("Missing in Train"),
           test.isnull().sum().rename("Missing in Test")], axis=1).sort_values(by="Missing in Train")

Unnamed: 0,Missing in Train,Missing in Test
Age,0,0.0
Over18,0,0.0
OverTime,0,0.0
PercentSalaryHike,0,0.0
PerformanceRating,0,0.0
RelationshipSatisfaction,0,0.0
StandardHours,0,0.0
NumCompaniesWorked,0,0.0
StockOptionLevel,0,0.0
TrainingTimesLastYear,0,0.0


#### Insights: No missing values! Something to celebrate! :p

## Let's also concatenate test data to train

In [10]:
y = train_extended.Attrition
y

0       0
1       0
2       0
3       0
4       1
       ..
3142    0
3143    0
3144    0
3145    0
3146    0
Name: Attrition, Length: 3147, dtype: int64

In [11]:
df = pd.concat([train_extended.drop(columns="Attrition"), test])

# Preprocessing

### Identifying Categorical Features

In [12]:
df.dtypes.sort_values()

Age                          int64
YearsSinceLastPromotion      int64
YearsInCurrentRole           int64
YearsAtCompany               int64
WorkLifeBalance              int64
TrainingTimesLastYear        int64
TotalWorkingYears            int64
StockOptionLevel             int64
StandardHours                int64
RelationshipSatisfaction     int64
PerformanceRating            int64
PercentSalaryHike            int64
NumCompaniesWorked           int64
MonthlyRate                  int64
YearsWithCurrManager         int64
MonthlyIncome                int64
JobSatisfaction              int64
DailyRate                    int64
DistanceFromHome             int64
Education                    int64
EmployeeCount                int64
HourlyRate                   int64
EnvironmentSatisfaction      int64
JobLevel                     int64
JobInvolvement               int64
is_original                  int64
Gender                      object
MaritalStatus               object
OverTime            

### Remember, being of type int, doesn't mean that the feature cannot be categorial.
#### Let's check for unique values in each column

In [13]:
df.nunique().sort_values()

StandardHours                  1
EmployeeCount                  1
Over18                         1
is_original                    2
PerformanceRating              2
OverTime                       2
Gender                         2
BusinessTravel                 3
Department                     3
MaritalStatus                  3
RelationshipSatisfaction       4
JobSatisfaction                4
WorkLifeBalance                4
StockOptionLevel               5
JobInvolvement                 5
EnvironmentSatisfaction        5
Education                      6
JobLevel                       6
EducationField                 6
TrainingTimesLastYear          7
JobRole                        9
NumCompaniesWorked            11
PercentSalaryHike             15
YearsSinceLastPromotion       16
YearsWithCurrManager          18
YearsInCurrentRole            19
DistanceFromHome              29
YearsAtCompany                38
TotalWorkingYears             41
Age                           43
HourlyRate

#### INSIGHTS: Taking a quick look at number of unique values in features reveals that we should be safe setting the threshold for to 20 unique values for what consitutes as a categorical feature
#### We'll drop columns with only one value as they bring nothing to the table

#### But feel free to use your own intuition and test & trial to figure our what's works best in terms of threshold and features

In [14]:
feats_to_drop = [col for col in df.columns if df[col].nunique()==1]
cat_features = [col for col in df.columns if df[col].nunique() <= 20 and df[col].nunique() > 1]

In [15]:
df.drop(columns=feats_to_drop, inplace=True)

#### We won't use one hot encoder here, because we already have a large ratio of features to rows and one hotting would increase that ratio by a large margin even further which will result in severe overfitting
#### Rather we'll use ordinal/label encoder (they're basically the same thing)

In [16]:
ord_enc = OrdinalEncoder()

ord_enc.fit(df[cat_features])

df[cat_features] = ord_enc.transform(df[cat_features])
df.head()

Unnamed: 0,Age,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,HourlyRate,...,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,is_original
0,36,1.0,599,1.0,24,2.0,3.0,4.0,1.0,42,...,1.0,1.0,10,2.0,2.0,10,0.0,7.0,8.0,0.0
1,35,2.0,921,2.0,8,2.0,4.0,1.0,1.0,46,...,3.0,1.0,4,3.0,2.0,4,2.0,0.0,3.0,0.0
2,32,2.0,718,2.0,26,2.0,2.0,3.0,1.0,80,...,3.0,2.0,4,3.0,2.0,3,2.0,1.0,2.0,0.0
3,38,2.0,1488,1.0,2,2.0,3.0,3.0,0.0,40,...,2.0,0.0,15,1.0,0.0,6,0.0,0.0,2.0,0.0
4,50,2.0,1017,1.0,5,3.0,3.0,2.0,0.0,37,...,2.0,0.0,31,0.0,2.0,31,14.0,4.0,10.0,0.0


## Always a good idea to scale the features

In [17]:
# we don't want to scale the source column
sc = StandardScaler()
df_scaled = df.copy()
df_scaled.iloc[:, :-1] = pd.DataFrame(sc.fit_transform(df.iloc[:, :-1]), columns=df.columns[:-1])
df_scaled.head()

Unnamed: 0,Age,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,HourlyRate,...,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,is_original
0,-0.042187,-1.006941,-0.676304,-0.504308,1.914406,0.070936,0.631507,1.151976,0.786359,-1.276573,...,-0.680038,0.299291,-0.12089,-0.638312,0.351622,0.512223,-1.167739,1.553534,1.052484,0.0
1,-0.155733,0.556236,0.160523,1.478528,-0.108063,0.070936,1.402986,-1.590395,0.786359,-1.074371,...,1.18259,0.299291,-0.927119,0.188354,0.351622,-0.489294,-0.609548,-0.655171,-0.331151,0.0
2,-0.49637,0.556236,-0.367042,1.478528,2.167215,0.070936,-0.139973,0.237852,0.786359,0.644348,...,1.18259,1.526962,-0.927119,0.188354,0.351622,-0.656214,-0.609548,-0.339641,-0.607878,0.0
3,0.184905,0.556236,1.634065,-0.504308,-0.866489,0.070936,0.631507,0.237852,-1.271683,-1.377674,...,0.251276,-0.928379,0.550967,-1.464978,-2.630526,-0.155455,-1.167739,-0.655171,-0.607878,0.0
4,1.547454,0.556236,0.410011,-0.504308,-0.487276,1.059865,0.631507,-0.676271,-1.271683,-1.529326,...,0.251276,-0.928379,2.70091,-2.291644,0.351622,4.017536,2.739597,0.606946,1.605938,0.0


### Let's seprate test and train sets

In [18]:
X_train = df_scaled.iloc[:-len(test), :]
X_test = df_scaled.iloc[-len(test): , :]

# Modelling

### But first, let's setup cross validation

In [34]:
# for i, (x, y) in enumerate(zip([1,2,3], [4,5,6])):
#     print(f"{'*'*10} {i}")
#     print(f"X: {x}")
#     print(f"Y: {y}")    

********** 0
X: 1
Y: 4
********** 1
X: 2
Y: 5
********** 2
X: 3
Y: 6


In [36]:
# a = np.array([1,2,3])
# b = np.array([4,5,6])

# np.append(a, b)

array([1, 2, 3, 4, 5, 6])

In [19]:
# we're gonna train on the combined dataset but, we'll only calculate the validation score only on comp data

N_FOLDS = 10

def cross_validate(X, y, model):
    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1337)
    all_scores = []
    
    X_comp = X[X.is_original==0]
    y_comp = y[X_comp.index]
    X_original = X[X.is_original==1]
    y_original = y[X_original.index]
    
    for fold_id, ((train_comp_idx, val_comp_idx), (train_org_idx, val_org_idx)) \
                in enumerate(zip(skf.split(X_comp, y_comp), skf.split(X_original, y_original))):
        
        # for training we'll use data from both datasets
        train_idx = np.append(train_comp_idx, train_org_idx)
        # but for validation we'll use only competition data
        val_idx = val_comp_idx
        
        
        X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_tr, y_val = y[train_idx], y[val_idx]
               
        model.fit(X_tr, y_tr)
        
        y_pred = model.predict_proba(X_val)[:, 1]
        
        auc = roc_auc_score(y_val, y_pred)
        
        print(f"Fold {fold_id} \t auc: {auc}")
        
        all_scores.append(auc)
    
    avg_auc = np.mean(all_scores)
    
    print(f"Avg AUC: {avg_auc}")

## INSIGHTS:
So one thing I've noticed is that using cross_validate this way gives us the score that's almost exactly the same that we get on public leaderboard. So let's use this method of cross validation to
* Tune all our models
* Select top k
* Take their predictions average
* submit

# Hyperparameters Tuning

## XGBoost

In [31]:
def objective_xgb(trial, X, y):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 2, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_loguniform('gamma', 0.00001, 0.5),
        'subsample': trial.suggest_loguniform('subsample', 0.2, 1.0),
        'colsample_bytree': trial.suggest_loguniform('colsample_bytree', 0.2, 1.0),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 0.00001, 1.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 0.00001, 1.0),
        'early_stopping_rounds': trial.suggest_int("early_stoppig_rounds", 10, 100)
    }
    

    # we're gonna train on the combined dataset but, we'll only calculate the validation score only on comp data

    N_FOLDS = 10

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1337)
    all_scores = []
    
    X_comp = X[X.is_original==0]
    y_comp = y[X_comp.index]
    X_original = X[X.is_original==1]
    y_original = y[X_original.index]
    
    for fold_id, ((train_comp_idx, val_comp_idx), (train_org_idx, val_org_idx)) \
                in enumerate(zip(skf.split(X_comp, y_comp), skf.split(X_original, y_original))):
        
        # for training we'll use data from both datasets
        train_idx = np.append(train_comp_idx, train_org_idx)
        # but for validation we'll use only competition data
        val_idx = val_comp_idx
        
        
        X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_tr, y_val = y[train_idx], y[val_idx]
        
        model = xgb.XGBClassifier(**params)
        model.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], verbose=False)
        
        y_pred = model.predict_proba(X_val)[:, 1]
        
        auc = roc_auc_score(y_val, y_pred)
        
#         print(f"Fold {fold_id} \t auc: {auc}")
        
        all_scores.append(auc)
    
    avg_auc = np.mean(all_scores)
    
    print(f"Avg AUC: {avg_auc}")
    
    return avg_auc

In [32]:
study_xgb = optuna.create_study(study_name="xgboost_tuning", direction="maximize")
func = lambda trial: objective_xgb(trial, X_train, y)
study_xgb.optimize(func, n_trials=100)

[32m[I 2023-01-17 15:39:02,893][0m A new study created in memory with name: xgboost_tuning[0m
[32m[I 2023-01-17 15:39:11,102][0m Trial 0 finished with value: 0.9459114726971869 and parameters: {'n_estimators': 128, 'max_depth': 4, 'learning_rate': 0.1377132655000192, 'min_child_weight': 4, 'gamma': 0.006910429856181178, 'subsample': 0.43557898641630843, 'colsample_bytree': 0.38295488364827396, 'reg_alpha': 0.06508849850885894, 'reg_lambda': 0.05075093144690056, 'early_stoppig_rounds': 45}. Best is trial 0 with value: 0.9459114726971869.[0m


Avg AUC: 0.9459114726971869


[32m[I 2023-01-17 15:39:23,071][0m Trial 1 finished with value: 0.9302619966905681 and parameters: {'n_estimators': 329, 'max_depth': 10, 'learning_rate': 0.17550539771537726, 'min_child_weight': 5, 'gamma': 0.45046264297483407, 'subsample': 0.2641198447719257, 'colsample_bytree': 0.38624445898097776, 'reg_alpha': 0.00487058094662315, 'reg_lambda': 0.0002809496155313422, 'early_stoppig_rounds': 50}. Best is trial 0 with value: 0.9459114726971869.[0m


Avg AUC: 0.9302619966905681


[32m[I 2023-01-17 15:39:31,754][0m Trial 2 finished with value: 0.9341756297113439 and parameters: {'n_estimators': 112, 'max_depth': 7, 'learning_rate': 0.022300404842103327, 'min_child_weight': 2, 'gamma': 0.07288652089204267, 'subsample': 0.2966874919771711, 'colsample_bytree': 0.6795157247885405, 'reg_alpha': 8.021502125464333e-05, 'reg_lambda': 0.0003981328004288705, 'early_stoppig_rounds': 66}. Best is trial 0 with value: 0.9459114726971869.[0m


Avg AUC: 0.9341756297113439


[32m[I 2023-01-17 15:39:37,997][0m Trial 3 finished with value: 0.9453065820922963 and parameters: {'n_estimators': 56, 'max_depth': 10, 'learning_rate': 0.0515729715947752, 'min_child_weight': 3, 'gamma': 6.759000722621744e-05, 'subsample': 0.48215040511084994, 'colsample_bytree': 0.725431126280524, 'reg_alpha': 0.0005110077416308302, 'reg_lambda': 1.9089812915422165e-05, 'early_stoppig_rounds': 69}. Best is trial 0 with value: 0.9459114726971869.[0m


Avg AUC: 0.9453065820922963


[32m[I 2023-01-17 15:39:51,651][0m Trial 4 finished with value: 0.846024315131458 and parameters: {'n_estimators': 265, 'max_depth': 2, 'learning_rate': 0.013011294885848521, 'min_child_weight': 5, 'gamma': 3.515611604590609e-05, 'subsample': 0.4854106901036844, 'colsample_bytree': 0.21840822796704684, 'reg_alpha': 0.1253644512568437, 'reg_lambda': 0.00020116294755919895, 'early_stoppig_rounds': 57}. Best is trial 0 with value: 0.9459114726971869.[0m


Avg AUC: 0.846024315131458


[32m[I 2023-01-17 15:40:15,753][0m Trial 5 finished with value: 0.9481391340319911 and parameters: {'n_estimators': 423, 'max_depth': 4, 'learning_rate': 0.02424101594417981, 'min_child_weight': 1, 'gamma': 3.761770201772561e-05, 'subsample': 0.2552962176023468, 'colsample_bytree': 0.3430225169762055, 'reg_alpha': 0.021064314147037248, 'reg_lambda': 0.00048098621395449716, 'early_stoppig_rounds': 77}. Best is trial 5 with value: 0.9481391340319911.[0m


Avg AUC: 0.9481391340319911


[32m[I 2023-01-17 15:40:49,030][0m Trial 6 finished with value: 0.9697478856407429 and parameters: {'n_estimators': 454, 'max_depth': 8, 'learning_rate': 0.032738621025353416, 'min_child_weight': 4, 'gamma': 0.00276975418540302, 'subsample': 0.5486924960047377, 'colsample_bytree': 0.6082878702417236, 'reg_alpha': 0.8639042602544933, 'reg_lambda': 0.0011233300848227739, 'early_stoppig_rounds': 90}. Best is trial 6 with value: 0.9697478856407429.[0m


Avg AUC: 0.9697478856407429


[32m[I 2023-01-17 15:41:09,668][0m Trial 7 finished with value: 0.9688901912116197 and parameters: {'n_estimators': 270, 'max_depth': 10, 'learning_rate': 0.036862342938907895, 'min_child_weight': 5, 'gamma': 0.0026344010433498218, 'subsample': 0.7579682011785649, 'colsample_bytree': 0.42490503712451605, 'reg_alpha': 0.013294711634245164, 'reg_lambda': 0.37884121145228256, 'early_stoppig_rounds': 36}. Best is trial 6 with value: 0.9697478856407429.[0m


Avg AUC: 0.9688901912116197


[32m[I 2023-01-17 15:41:33,539][0m Trial 8 finished with value: 0.9241487405773119 and parameters: {'n_estimators': 410, 'max_depth': 4, 'learning_rate': 0.014305488416101835, 'min_child_weight': 1, 'gamma': 0.1719786043916331, 'subsample': 0.46948115222515674, 'colsample_bytree': 0.31765752891823923, 'reg_alpha': 0.9296775918077036, 'reg_lambda': 0.002093730482193919, 'early_stoppig_rounds': 60}. Best is trial 6 with value: 0.9697478856407429.[0m


Avg AUC: 0.9241487405773119


[32m[I 2023-01-17 15:42:11,143][0m Trial 9 finished with value: 0.9758930869645155 and parameters: {'n_estimators': 413, 'max_depth': 9, 'learning_rate': 0.024633088864316647, 'min_child_weight': 3, 'gamma': 0.0026262344084730127, 'subsample': 0.8487276476170488, 'colsample_bytree': 0.8343199046865795, 'reg_alpha': 0.04772952243282579, 'reg_lambda': 0.005578896845419992, 'early_stoppig_rounds': 42}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.9758930869645155


[32m[I 2023-01-17 15:42:34,474][0m Trial 10 finished with value: 0.9729515995587426 and parameters: {'n_estimators': 489, 'max_depth': 8, 'learning_rate': 0.09306701089728422, 'min_child_weight': 8, 'gamma': 0.00032065147459137843, 'subsample': 0.9771199469114519, 'colsample_bytree': 0.9854418560478411, 'reg_alpha': 1.1739181664043347e-05, 'reg_lambda': 0.01998296800679995, 'early_stoppig_rounds': 11}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.9729515995587426


[32m[I 2023-01-17 15:42:58,622][0m Trial 11 finished with value: 0.9745440338297483 and parameters: {'n_estimators': 494, 'max_depth': 8, 'learning_rate': 0.09289282128728464, 'min_child_weight': 8, 'gamma': 0.0003526402904189943, 'subsample': 0.9854007045971114, 'colsample_bytree': 0.9980309714727897, 'reg_alpha': 2.171627237336837e-05, 'reg_lambda': 0.019219086281804798, 'early_stoppig_rounds': 12}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.9745440338297483


[32m[I 2023-01-17 15:43:22,805][0m Trial 12 finished with value: 0.9722405313476742 and parameters: {'n_estimators': 355, 'max_depth': 8, 'learning_rate': 0.0705314501097472, 'min_child_weight': 9, 'gamma': 0.00039521594687981265, 'subsample': 0.9462641303020243, 'colsample_bytree': 0.9976712105779164, 'reg_alpha': 0.000518125374003189, 'reg_lambda': 0.014864510896231593, 'early_stoppig_rounds': 13}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.9722405313476742


[32m[I 2023-01-17 15:43:36,661][0m Trial 13 finished with value: 0.9711332505975362 and parameters: {'n_estimators': 498, 'max_depth': 6, 'learning_rate': 0.2846420995810087, 'min_child_weight': 7, 'gamma': 0.019137723900188135, 'subsample': 0.7152608697524372, 'colsample_bytree': 0.7993868262236163, 'reg_alpha': 0.0007740381319687579, 'reg_lambda': 0.2399690120101134, 'early_stoppig_rounds': 28}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.9711332505975362


[32m[I 2023-01-17 15:43:59,359][0m Trial 14 finished with value: 0.9616655175583746 and parameters: {'n_estimators': 366, 'max_depth': 6, 'learning_rate': 0.09265501280855139, 'min_child_weight': 10, 'gamma': 0.0004113629046161343, 'subsample': 0.7101330531792729, 'colsample_bytree': 0.5446250560467615, 'reg_alpha': 1.77290692352208e-05, 'reg_lambda': 0.0082330090517567, 'early_stoppig_rounds': 26}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.9616655175583746


[32m[I 2023-01-17 15:44:18,264][0m Trial 15 finished with value: 0.9670718422504138 and parameters: {'n_estimators': 213, 'max_depth': 9, 'learning_rate': 0.04723992330027088, 'min_child_weight': 7, 'gamma': 1.0580097441684882e-05, 'subsample': 0.8079608348503934, 'colsample_bytree': 0.8420118523753592, 'reg_alpha': 0.00011706570595147636, 'reg_lambda': 0.08065835271834752, 'early_stoppig_rounds': 22}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.9670718422504138


[32m[I 2023-01-17 15:44:45,618][0m Trial 16 finished with value: 0.9374367990439418 and parameters: {'n_estimators': 409, 'max_depth': 7, 'learning_rate': 0.019500204681589, 'min_child_weight': 7, 'gamma': 0.000833773144502572, 'subsample': 0.6082052789426536, 'colsample_bytree': 0.5039885778240716, 'reg_alpha': 0.13898587594951645, 'reg_lambda': 0.9726966162875015, 'early_stoppig_rounds': 38}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.9374367990439418


[32m[I 2023-01-17 15:45:17,074][0m Trial 17 finished with value: 0.8858287369001655 and parameters: {'n_estimators': 456, 'max_depth': 9, 'learning_rate': 0.010639510637838544, 'min_child_weight': 10, 'gamma': 0.00887544951045194, 'subsample': 0.3482987981614962, 'colsample_bytree': 0.8689810771778436, 'reg_alpha': 0.0022747538229066218, 'reg_lambda': 0.004795225572474295, 'early_stoppig_rounds': 96}. Best is trial 9 with value: 0.9758930869645155.[0m


Avg AUC: 0.8858287369001655


[32m[I 2023-01-17 15:45:34,073][0m Trial 18 finished with value: 0.980439189189189 and parameters: {'n_estimators': 316, 'max_depth': 7, 'learning_rate': 0.1609093369912539, 'min_child_weight': 3, 'gamma': 0.00010446792570597246, 'subsample': 0.8714334826218815, 'colsample_bytree': 0.6407165624060571, 'reg_alpha': 8.853350172478267e-05, 'reg_lambda': 2.0670289562442702e-05, 'early_stoppig_rounds': 39}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.980439189189189


[32m[I 2023-01-17 15:45:46,784][0m Trial 19 finished with value: 0.9773899154256297 and parameters: {'n_estimators': 310, 'max_depth': 5, 'learning_rate': 0.2825417674021756, 'min_child_weight': 3, 'gamma': 9.741759319342314e-05, 'subsample': 0.5953985506675994, 'colsample_bytree': 0.6050164312750497, 'reg_alpha': 9.302502201027427e-05, 'reg_lambda': 1.660932396491284e-05, 'early_stoppig_rounds': 41}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9773899154256297


[32m[I 2023-01-17 15:45:58,133][0m Trial 20 finished with value: 0.9693673009744437 and parameters: {'n_estimators': 217, 'max_depth': 5, 'learning_rate': 0.28129418951610485, 'min_child_weight': 3, 'gamma': 0.00010753289165696589, 'subsample': 0.6308512873385232, 'colsample_bytree': 0.5270137323076868, 'reg_alpha': 9.815314498209028e-05, 'reg_lambda': 1.168244104147375e-05, 'early_stoppig_rounds': 32}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9693673009744437


[32m[I 2023-01-17 15:46:14,616][0m Trial 21 finished with value: 0.9760075381503952 and parameters: {'n_estimators': 316, 'max_depth': 5, 'learning_rate': 0.1820071179127609, 'min_child_weight': 3, 'gamma': 1.2357551275766053e-05, 'subsample': 0.8111317747906351, 'colsample_bytree': 0.6362504074093812, 'reg_alpha': 0.00017453880881978265, 'reg_lambda': 5.6711134969414334e-05, 'early_stoppig_rounds': 45}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9760075381503952


[32m[I 2023-01-17 15:46:31,069][0m Trial 22 finished with value: 0.9693705184776615 and parameters: {'n_estimators': 313, 'max_depth': 5, 'learning_rate': 0.18804677612237397, 'min_child_weight': 2, 'gamma': 1.136081496600086e-05, 'subsample': 0.6308596268243472, 'colsample_bytree': 0.6286191586855593, 'reg_alpha': 4.8087322685698354e-05, 'reg_lambda': 4.761099803593075e-05, 'early_stoppig_rounds': 50}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9693705184776615


[32m[I 2023-01-17 15:46:42,285][0m Trial 23 finished with value: 0.9433386192314763 and parameters: {'n_estimators': 224, 'max_depth': 2, 'learning_rate': 0.20073918871986693, 'min_child_weight': 2, 'gamma': 0.00012032855630446726, 'subsample': 0.8419568816696472, 'colsample_bytree': 0.4801535619606303, 'reg_alpha': 0.00021177939140034342, 'reg_lambda': 7.352637828273397e-05, 'early_stoppig_rounds': 47}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9433386192314763


[32m[I 2023-01-17 15:46:59,610][0m Trial 24 finished with value: 0.9678943739658026 and parameters: {'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.14308839204624024, 'min_child_weight': 4, 'gamma': 4.472974900772944e-05, 'subsample': 0.6891703378435916, 'colsample_bytree': 0.5908855912528873, 'reg_alpha': 0.0015670269873444957, 'reg_lambda': 5.8366282436935584e-05, 'early_stoppig_rounds': 22}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9678943739658026


[32m[I 2023-01-17 15:47:16,564][0m Trial 25 finished with value: 0.9718597168597167 and parameters: {'n_estimators': 363, 'max_depth': 3, 'learning_rate': 0.23291476285152915, 'min_child_weight': 3, 'gamma': 1.7495693944391392e-05, 'subsample': 0.5601485029571911, 'colsample_bytree': 0.7037845576638525, 'reg_alpha': 0.00026727126844240434, 'reg_lambda': 2.7108864214928165e-05, 'early_stoppig_rounds': 37}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9718597168597167


[32m[I 2023-01-17 15:47:30,471][0m Trial 26 finished with value: 0.9699384077955505 and parameters: {'n_estimators': 246, 'max_depth': 7, 'learning_rate': 0.13503581424743757, 'min_child_weight': 2, 'gamma': 0.00013646147638728565, 'subsample': 0.2012297981056669, 'colsample_bytree': 0.4528906340704824, 'reg_alpha': 4.061377557684918e-05, 'reg_lambda': 0.00013154253299926737, 'early_stoppig_rounds': 57}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9699384077955505


[32m[I 2023-01-17 15:47:41,552][0m Trial 27 finished with value: 0.9598271741128883 and parameters: {'n_estimators': 159, 'max_depth': 6, 'learning_rate': 0.12288402531562236, 'min_child_weight': 4, 'gamma': 0.0009204630051967816, 'subsample': 0.3818473101404186, 'colsample_bytree': 0.5688324836564663, 'reg_alpha': 0.00021650370117459364, 'reg_lambda': 1.1576816189315178e-05, 'early_stoppig_rounds': 66}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9598271741128883


[32m[I 2023-01-17 15:47:57,622][0m Trial 28 finished with value: 0.9725418275418274 and parameters: {'n_estimators': 294, 'max_depth': 5, 'learning_rate': 0.2348915152518588, 'min_child_weight': 6, 'gamma': 2.5759771684671776e-05, 'subsample': 0.8777572960734009, 'colsample_bytree': 0.6571229096341485, 'reg_alpha': 3.4514319627060464e-05, 'reg_lambda': 3.089988722457067e-05, 'early_stoppig_rounds': 42}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9725418275418274


[32m[I 2023-01-17 15:48:16,716][0m Trial 29 finished with value: 0.9729276981062697 and parameters: {'n_estimators': 344, 'max_depth': 3, 'learning_rate': 0.15396462080006426, 'min_child_weight': 1, 'gamma': 0.0001378951050584556, 'subsample': 0.5758105591722247, 'colsample_bytree': 0.26091128287341453, 'reg_alpha': 0.0009775252075979464, 'reg_lambda': 0.00011572777884256162, 'early_stoppig_rounds': 46}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9729276981062697


[32m[I 2023-01-17 15:48:26,374][0m Trial 30 finished with value: 0.9187699944842802 and parameters: {'n_estimators': 172, 'max_depth': 3, 'learning_rate': 0.11579180348095146, 'min_child_weight': 6, 'gamma': 5.729798069565094e-05, 'subsample': 0.4124062983856407, 'colsample_bytree': 0.7343591212849289, 'reg_alpha': 0.004928228001257092, 'reg_lambda': 1.0573253572914707e-05, 'early_stoppig_rounds': 52}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9187699944842802


[32m[I 2023-01-17 15:48:53,226][0m Trial 31 finished with value: 0.9776519121161978 and parameters: {'n_estimators': 390, 'max_depth': 9, 'learning_rate': 0.06989512245010746, 'min_child_weight': 3, 'gamma': 0.006770930429181321, 'subsample': 0.8105666581486954, 'colsample_bytree': 0.8063420926569271, 'reg_alpha': 0.035198999794081504, 'reg_lambda': 0.0008926679792727595, 'early_stoppig_rounds': 40}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9776519121161978


[32m[I 2023-01-17 15:49:17,063][0m Trial 32 finished with value: 0.9751468560397131 and parameters: {'n_estimators': 325, 'max_depth': 7, 'learning_rate': 0.06948899842368107, 'min_child_weight': 3, 'gamma': 0.011215855060762392, 'subsample': 0.7795987595276247, 'colsample_bytree': 0.7657106475137162, 'reg_alpha': 0.008010499000001566, 'reg_lambda': 0.000666360434428271, 'early_stoppig_rounds': 32}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9751468560397131


[32m[I 2023-01-17 15:49:34,599][0m Trial 33 finished with value: 0.9735999264570692 and parameters: {'n_estimators': 380, 'max_depth': 4, 'learning_rate': 0.22320670049121122, 'min_child_weight': 4, 'gamma': 0.024783738598950163, 'subsample': 0.6737951234945221, 'colsample_bytree': 0.6579241023847427, 'reg_alpha': 0.3643397120460628, 'reg_lambda': 0.00022092268210575022, 'early_stoppig_rounds': 41}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9735999264570692


[32m[I 2023-01-17 15:49:47,435][0m Trial 34 finished with value: 0.9772214561500275 and parameters: {'n_estimators': 333, 'max_depth': 6, 'learning_rate': 0.17783572967055833, 'min_child_weight': 2, 'gamma': 0.0010084812922661025, 'subsample': 0.8903289523129757, 'colsample_bytree': 0.8738329210035025, 'reg_alpha': 6.497972040824082e-05, 'reg_lambda': 3.2442546863209e-05, 'early_stoppig_rounds': 33}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9772214561500275


[32m[I 2023-01-17 15:49:55,139][0m Trial 35 finished with value: 0.9713175675675675 and parameters: {'n_estimators': 389, 'max_depth': 6, 'learning_rate': 0.2944094557638636, 'min_child_weight': 2, 'gamma': 0.001123674402220413, 'subsample': 0.8994602627153454, 'colsample_bytree': 0.8872580263117622, 'reg_alpha': 6.916224145279172e-05, 'reg_lambda': 2.224956746038093e-05, 'early_stoppig_rounds': 19}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9713175675675675


[32m[I 2023-01-17 15:50:19,456][0m Trial 36 finished with value: 0.9788154991726421 and parameters: {'n_estimators': 337, 'max_depth': 7, 'learning_rate': 0.06766511150133776, 'min_child_weight': 2, 'gamma': 0.004516420828170786, 'subsample': 0.5264416476644186, 'colsample_bytree': 0.7589702183154302, 'reg_alpha': 0.02889212597839019, 'reg_lambda': 0.0017491954785500447, 'early_stoppig_rounds': 31}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9788154991726421


[32m[I 2023-01-17 15:50:39,370][0m Trial 37 finished with value: 0.9687288564074278 and parameters: {'n_estimators': 284, 'max_depth': 7, 'learning_rate': 0.06479144243503338, 'min_child_weight': 5, 'gamma': 0.04679793770955634, 'subsample': 0.549988484718217, 'colsample_bytree': 0.7291145489171107, 'reg_alpha': 0.034043483230157344, 'reg_lambda': 0.0021718630735335057, 'early_stoppig_rounds': 28}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9687288564074278


[32m[I 2023-01-17 15:50:58,917][0m Trial 38 finished with value: 0.9773110865968008 and parameters: {'n_estimators': 251, 'max_depth': 9, 'learning_rate': 0.04284663259850494, 'min_child_weight': 1, 'gamma': 0.005466936211844331, 'subsample': 0.4371352205126278, 'colsample_bytree': 0.41496690292384375, 'reg_alpha': 0.016052393339126732, 'reg_lambda': 0.0008668901927241444, 'early_stoppig_rounds': 77}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9773110865968008


[32m[I 2023-01-17 15:51:05,172][0m Trial 39 finished with value: 0.9324846019488877 and parameters: {'n_estimators': 84, 'max_depth': 10, 'learning_rate': 0.0543870127463922, 'min_child_weight': 5, 'gamma': 0.004881866034139004, 'subsample': 0.5195261661571989, 'colsample_bytree': 0.3654405611505073, 'reg_alpha': 0.07047142448504622, 'reg_lambda': 0.0003695568456938824, 'early_stoppig_rounds': 61}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9324846019488877


[32m[I 2023-01-17 15:51:27,028][0m Trial 40 finished with value: 0.9758036863394006 and parameters: {'n_estimators': 441, 'max_depth': 7, 'learning_rate': 0.10529835743113833, 'min_child_weight': 4, 'gamma': 0.4044630489721483, 'subsample': 0.5041951700384434, 'colsample_bytree': 0.7767830681224517, 'reg_alpha': 0.12423445850745743, 'reg_lambda': 0.00161093027165994, 'early_stoppig_rounds': 18}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9758036863394006


[32m[I 2023-01-17 15:51:47,426][0m Trial 41 finished with value: 0.9781200588343445 and parameters: {'n_estimators': 254, 'max_depth': 9, 'learning_rate': 0.04201328021524606, 'min_child_weight': 1, 'gamma': 0.003638200777606468, 'subsample': 0.4494426550729542, 'colsample_bytree': 0.41982419953848127, 'reg_alpha': 0.018962155941644174, 'reg_lambda': 0.0006926864556177296, 'early_stoppig_rounds': 87}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9781200588343445


[32m[I 2023-01-17 15:52:04,864][0m Trial 42 finished with value: 0.9777596984739843 and parameters: {'n_estimators': 250, 'max_depth': 9, 'learning_rate': 0.03829814230388908, 'min_child_weight': 1, 'gamma': 0.0035412199727192823, 'subsample': 0.33357031489903494, 'colsample_bytree': 0.3094212610721479, 'reg_alpha': 0.009105133258606294, 'reg_lambda': 0.0031976234518815588, 'early_stoppig_rounds': 89}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9777596984739843


[32m[I 2023-01-17 15:52:19,832][0m Trial 43 finished with value: 0.9689306398234969 and parameters: {'n_estimators': 196, 'max_depth': 9, 'learning_rate': 0.03289971627010361, 'min_child_weight': 1, 'gamma': 0.004587442386708208, 'subsample': 0.294120989080185, 'colsample_bytree': 0.30010048566533676, 'reg_alpha': 0.008092737223344124, 'reg_lambda': 0.0033180519656639874, 'early_stoppig_rounds': 85}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9689306398234969


[32m[I 2023-01-17 15:52:37,935][0m Trial 44 finished with value: 0.9731522338665195 and parameters: {'n_estimators': 260, 'max_depth': 10, 'learning_rate': 0.02941972387474527, 'min_child_weight': 1, 'gamma': 0.0016939929631944759, 'subsample': 0.33927738737416935, 'colsample_bytree': 0.20212987572077662, 'reg_alpha': 0.024285640293147837, 'reg_lambda': 0.0014653215139032987, 'early_stoppig_rounds': 99}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9731522338665195


[32m[I 2023-01-17 15:52:57,374][0m Trial 45 finished with value: 0.9735960194888765 and parameters: {'n_estimators': 275, 'max_depth': 8, 'learning_rate': 0.042963498995465595, 'min_child_weight': 2, 'gamma': 0.014069089788578893, 'subsample': 0.40215266335579813, 'colsample_bytree': 0.30868836833133295, 'reg_alpha': 0.00901599663276421, 'reg_lambda': 0.0005562333432487226, 'early_stoppig_rounds': 86}. Best is trial 18 with value: 0.980439189189189.[0m


Avg AUC: 0.9735960194888765


[32m[I 2023-01-17 15:53:23,344][0m Trial 46 finished with value: 0.9810438499724216 and parameters: {'n_estimators': 394, 'max_depth': 8, 'learning_rate': 0.060964837555996326, 'min_child_weight': 1, 'gamma': 0.0031694603056515933, 'subsample': 0.4559715609566049, 'colsample_bytree': 0.23967865997253113, 'reg_alpha': 0.003420475291716086, 'reg_lambda': 0.009667717445021552, 'early_stoppig_rounds': 75}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9810438499724216


[32m[I 2023-01-17 15:53:41,002][0m Trial 47 finished with value: 0.9735656830299689 and parameters: {'n_estimators': 244, 'max_depth': 8, 'learning_rate': 0.019715573293424802, 'min_child_weight': 1, 'gamma': 0.0034336743851272607, 'subsample': 0.46365565875392095, 'colsample_bytree': 0.2559447506336328, 'reg_alpha': 0.004533175550665214, 'reg_lambda': 0.0428045813585813, 'early_stoppig_rounds': 75}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9735656830299689


[32m[I 2023-01-17 15:54:03,403][0m Trial 48 finished with value: 0.9798315407243978 and parameters: {'n_estimators': 338, 'max_depth': 8, 'learning_rate': 0.08289014157505277, 'min_child_weight': 1, 'gamma': 0.0017814454643783572, 'subsample': 0.3384050935613869, 'colsample_bytree': 0.25257264630515397, 'reg_alpha': 0.0028145805171228312, 'reg_lambda': 0.010639193864184012, 'early_stoppig_rounds': 92}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9798315407243978


[32m[I 2023-01-17 15:54:25,008][0m Trial 49 finished with value: 0.975936063614635 and parameters: {'n_estimators': 340, 'max_depth': 8, 'learning_rate': 0.08067090700474852, 'min_child_weight': 2, 'gamma': 0.001827586016003052, 'subsample': 0.3696207385102222, 'colsample_bytree': 0.2292477442780862, 'reg_alpha': 0.00276964664999117, 'reg_lambda': 0.01036861059035581, 'early_stoppig_rounds': 80}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.975936063614635


[32m[I 2023-01-17 15:54:47,691][0m Trial 50 finished with value: 0.9723809523809523 and parameters: {'n_estimators': 373, 'max_depth': 8, 'learning_rate': 0.06323267401764646, 'min_child_weight': 1, 'gamma': 0.034568618348886526, 'subsample': 0.25100571304770136, 'colsample_bytree': 0.24461478240601953, 'reg_alpha': 0.07737244304770749, 'reg_lambda': 0.02965526817399547, 'early_stoppig_rounds': 91}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9723809523809523


[32m[I 2023-01-17 15:55:11,917][0m Trial 51 finished with value: 0.9755279003493289 and parameters: {'n_estimators': 349, 'max_depth': 9, 'learning_rate': 0.04696487709042312, 'min_child_weight': 1, 'gamma': 0.0032866667465852955, 'subsample': 0.313472608364795, 'colsample_bytree': 0.2827201275674628, 'reg_alpha': 0.015727984232488678, 'reg_lambda': 0.005375245605405896, 'early_stoppig_rounds': 93}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9755279003493289


[32m[I 2023-01-17 15:55:31,016][0m Trial 52 finished with value: 0.9668776429490714 and parameters: {'n_estimators': 287, 'max_depth': 7, 'learning_rate': 0.03873613592202052, 'min_child_weight': 2, 'gamma': 0.00022497029060509422, 'subsample': 0.29098546800590924, 'colsample_bytree': 0.3336201445558448, 'reg_alpha': 0.004992684600142973, 'reg_lambda': 0.003050598029157409, 'early_stoppig_rounds': 85}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9668776429490714


[32m[I 2023-01-17 15:55:45,695][0m Trial 53 finished with value: 0.9706297113439971 and parameters: {'n_estimators': 227, 'max_depth': 8, 'learning_rate': 0.05823683272652086, 'min_child_weight': 1, 'gamma': 0.0018717879851886056, 'subsample': 0.26666680927575465, 'colsample_bytree': 0.28671145898385814, 'reg_alpha': 0.011015725905316322, 'reg_lambda': 0.08994476326332251, 'early_stoppig_rounds': 70}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9706297113439971


[32m[I 2023-01-17 15:56:12,850][0m Trial 54 finished with value: 0.9803366887295459 and parameters: {'n_estimators': 431, 'max_depth': 10, 'learning_rate': 0.08293508011611567, 'min_child_weight': 1, 'gamma': 0.0006809388448469386, 'subsample': 0.4141437072329266, 'colsample_bytree': 0.37635974793540494, 'reg_alpha': 0.0016846160674570702, 'reg_lambda': 0.00931163753195361, 'early_stoppig_rounds': 81}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9803366887295459


[32m[I 2023-01-17 15:56:39,505][0m Trial 55 finished with value: 0.9739639639639639 and parameters: {'n_estimators': 438, 'max_depth': 10, 'learning_rate': 0.09379463549791943, 'min_child_weight': 2, 'gamma': 0.008379707018373513, 'subsample': 0.40457164551307856, 'colsample_bytree': 0.3890487024526686, 'reg_alpha': 0.0016191478240288014, 'reg_lambda': 0.010693752724153759, 'early_stoppig_rounds': 79}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9739639639639639


[32m[I 2023-01-17 15:57:07,121][0m Trial 56 finished with value: 0.9789129435558006 and parameters: {'n_estimators': 474, 'max_depth': 10, 'learning_rate': 0.07480243948433411, 'min_child_weight': 1, 'gamma': 0.00047080628525778163, 'subsample': 0.4483221312660876, 'colsample_bytree': 0.34930810483351726, 'reg_alpha': 0.0003757075949528819, 'reg_lambda': 0.017593145295416463, 'early_stoppig_rounds': 72}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9789129435558006


[32m[I 2023-01-17 15:57:32,280][0m Trial 57 finished with value: 0.980731522338665 and parameters: {'n_estimators': 475, 'max_depth': 10, 'learning_rate': 0.08117012747803037, 'min_child_weight': 1, 'gamma': 0.0005357502314274344, 'subsample': 0.4914356698287651, 'colsample_bytree': 0.35582079231905345, 'reg_alpha': 0.0006240737204692321, 'reg_lambda': 0.022304285308429622, 'early_stoppig_rounds': 72}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.980731522338665


[32m[I 2023-01-17 15:57:59,339][0m Trial 58 finished with value: 0.9785027118955691 and parameters: {'n_estimators': 469, 'max_depth': 10, 'learning_rate': 0.07965528009356089, 'min_child_weight': 1, 'gamma': 0.0006809791259480869, 'subsample': 0.36567507781201397, 'colsample_bytree': 0.3482541769210403, 'reg_alpha': 0.00045706948892025476, 'reg_lambda': 0.0226205605265393, 'early_stoppig_rounds': 73}. Best is trial 46 with value: 0.9810438499724216.[0m


Avg AUC: 0.9785027118955691


[32m[I 2023-01-17 15:58:24,669][0m Trial 59 finished with value: 0.9815487681559109 and parameters: {'n_estimators': 476, 'max_depth': 10, 'learning_rate': 0.08472543890298774, 'min_child_weight': 1, 'gamma': 0.0005555972080472754, 'subsample': 0.49169558700648885, 'colsample_bytree': 0.38364258575245874, 'reg_alpha': 0.0007769883944296489, 'reg_lambda': 0.014998859836740474, 'early_stoppig_rounds': 82}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9815487681559109


[32m[I 2023-01-17 15:58:50,841][0m Trial 60 finished with value: 0.9768149016363303 and parameters: {'n_estimators': 421, 'max_depth': 10, 'learning_rate': 0.10312298748541042, 'min_child_weight': 2, 'gamma': 0.00022898405703969114, 'subsample': 0.49588258453351486, 'colsample_bytree': 0.39439041438346406, 'reg_alpha': 0.0010290517501890645, 'reg_lambda': 0.07046624125654943, 'early_stoppig_rounds': 82}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9768149016363303


[32m[I 2023-01-17 15:59:16,677][0m Trial 61 finished with value: 0.9754493013421586 and parameters: {'n_estimators': 477, 'max_depth': 10, 'learning_rate': 0.08172294741902375, 'min_child_weight': 1, 'gamma': 0.0005004546525989469, 'subsample': 0.47024836544198706, 'colsample_bytree': 0.3682153758817197, 'reg_alpha': 0.000389213007226417, 'reg_lambda': 0.007602718141938221, 'early_stoppig_rounds': 73}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9754493013421586


[32m[I 2023-01-17 15:59:44,972][0m Trial 62 finished with value: 0.9786709413495128 and parameters: {'n_estimators': 468, 'max_depth': 10, 'learning_rate': 0.07803827323446931, 'min_child_weight': 1, 'gamma': 0.000590933538972624, 'subsample': 0.42800029940871703, 'colsample_bytree': 0.4589114242426984, 'reg_alpha': 0.0006502810459972044, 'reg_lambda': 0.03918760573238252, 'early_stoppig_rounds': 66}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9786709413495128


[32m[I 2023-01-17 16:00:16,534][0m Trial 63 finished with value: 0.9771125206839493 and parameters: {'n_estimators': 441, 'max_depth': 10, 'learning_rate': 0.057827351837060725, 'min_child_weight': 2, 'gamma': 0.0002386701060795042, 'subsample': 0.43353335318907776, 'colsample_bytree': 0.33370845450464937, 'reg_alpha': 0.0015668229921865313, 'reg_lambda': 0.015017322846658207, 'early_stoppig_rounds': 83}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9771125206839493


[32m[I 2023-01-17 16:00:44,441][0m Trial 64 finished with value: 0.9791112796469941 and parameters: {'n_estimators': 488, 'max_depth': 10, 'learning_rate': 0.08823163600200443, 'min_child_weight': 1, 'gamma': 0.0014660342016495887, 'subsample': 0.39338564972071743, 'colsample_bytree': 0.22008437408998646, 'reg_alpha': 0.0003088598315654422, 'reg_lambda': 0.014921790947128833, 'early_stoppig_rounds': 62}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9791112796469941


[32m[I 2023-01-17 16:01:11,348][0m Trial 65 finished with value: 0.9804003493289208 and parameters: {'n_estimators': 500, 'max_depth': 9, 'learning_rate': 0.08857699595201608, 'min_child_weight': 1, 'gamma': 0.001654113669653529, 'subsample': 0.3824930257166049, 'colsample_bytree': 0.22484292044172158, 'reg_alpha': 0.0010663191666673763, 'reg_lambda': 0.16806719532859582, 'early_stoppig_rounds': 64}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9804003493289208


[32m[I 2023-01-17 16:01:34,953][0m Trial 66 finished with value: 0.972571934179077 and parameters: {'n_estimators': 402, 'max_depth': 9, 'learning_rate': 0.1196747579741344, 'min_child_weight': 3, 'gamma': 0.002404095115918021, 'subsample': 0.3642777678955022, 'colsample_bytree': 0.20708634876461685, 'reg_alpha': 0.002125822498407447, 'reg_lambda': 0.1765715122639243, 'early_stoppig_rounds': 65}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.972571934179077


[32m[I 2023-01-17 16:02:01,676][0m Trial 67 finished with value: 0.9728532358889501 and parameters: {'n_estimators': 496, 'max_depth': 8, 'learning_rate': 0.14265211868043456, 'min_child_weight': 2, 'gamma': 0.0012392207948997108, 'subsample': 0.31154290342846713, 'colsample_bytree': 0.231052015829467, 'reg_alpha': 0.000975876957274569, 'reg_lambda': 0.6215394066210692, 'early_stoppig_rounds': 69}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9728532358889501


[32m[I 2023-01-17 16:02:25,442][0m Trial 68 finished with value: 0.9770493197278911 and parameters: {'n_estimators': 454, 'max_depth': 9, 'learning_rate': 0.10709357447387151, 'min_child_weight': 1, 'gamma': 0.00033161513161330153, 'subsample': 0.3486174641463502, 'colsample_bytree': 0.25693148652039727, 'reg_alpha': 0.0001351783753722234, 'reg_lambda': 0.12087416263215328, 'early_stoppig_rounds': 56}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9770493197278911


[32m[I 2023-01-17 16:02:56,061][0m Trial 69 finished with value: 0.9773864681007538 and parameters: {'n_estimators': 457, 'max_depth': 9, 'learning_rate': 0.05185413673121143, 'min_child_weight': 2, 'gamma': 0.0008917044678178648, 'subsample': 0.4186738816626984, 'colsample_bytree': 0.2846912821726607, 'reg_alpha': 0.0037004597757030796, 'reg_lambda': 0.025303554509893963, 'early_stoppig_rounds': 93}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9773864681007538


[32m[I 2023-01-17 16:03:19,823][0m Trial 70 finished with value: 0.9753477201691488 and parameters: {'n_estimators': 431, 'max_depth': 8, 'learning_rate': 0.15582982471810747, 'min_child_weight': 3, 'gamma': 7.426177338582387e-05, 'subsample': 0.38636901919924477, 'colsample_bytree': 0.4931226987812139, 'reg_alpha': 0.0006421785430133886, 'reg_lambda': 0.3106861203810668, 'early_stoppig_rounds': 76}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9753477201691488


[32m[I 2023-01-17 16:03:44,620][0m Trial 71 finished with value: 0.9783025372311087 and parameters: {'n_estimators': 485, 'max_depth': 10, 'learning_rate': 0.08953128926789024, 'min_child_weight': 1, 'gamma': 0.001574312596780999, 'subsample': 0.39633737511684647, 'colsample_bytree': 0.21796876679260824, 'reg_alpha': 0.0013007198015364781, 'reg_lambda': 0.007766612270054598, 'early_stoppig_rounds': 59}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9783025372311087


[32m[I 2023-01-17 16:04:07,355][0m Trial 72 finished with value: 0.9803024453024454 and parameters: {'n_estimators': 486, 'max_depth': 10, 'learning_rate': 0.09935522975109756, 'min_child_weight': 1, 'gamma': 0.0013024225517687302, 'subsample': 0.4807386733595715, 'colsample_bytree': 0.23869252403310703, 'reg_alpha': 0.0003468521834181518, 'reg_lambda': 0.059468816193275036, 'early_stoppig_rounds': 63}. Best is trial 59 with value: 0.9815487681559109.[0m


Avg AUC: 0.9803024453024454


[32m[I 2023-01-17 16:04:28,697][0m Trial 73 finished with value: 0.9819256756756756 and parameters: {'n_estimators': 458, 'max_depth': 9, 'learning_rate': 0.10065464248125155, 'min_child_weight': 1, 'gamma': 0.0006408049333290192, 'subsample': 0.4799928881077963, 'colsample_bytree': 0.24310265653526805, 'reg_alpha': 0.0020318315296524053, 'reg_lambda': 0.06500334815062654, 'early_stoppig_rounds': 53}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9819256756756756


[32m[I 2023-01-17 16:04:54,504][0m Trial 74 finished with value: 0.9774609303180732 and parameters: {'n_estimators': 500, 'max_depth': 9, 'learning_rate': 0.09957222346386702, 'min_child_weight': 2, 'gamma': 0.00016307158197870778, 'subsample': 0.4778267491054088, 'colsample_bytree': 0.2328494297603591, 'reg_alpha': 0.0006884214979339891, 'reg_lambda': 0.06897419462374445, 'early_stoppig_rounds': 54}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9774609303180732


[32m[I 2023-01-17 16:05:17,827][0m Trial 75 finished with value: 0.9789772936201506 and parameters: {'n_estimators': 456, 'max_depth': 9, 'learning_rate': 0.1160678468122851, 'min_child_weight': 1, 'gamma': 0.0007427771298615461, 'subsample': 0.5315021281419526, 'colsample_bytree': 0.241887431163967, 'reg_alpha': 0.002131717743991787, 'reg_lambda': 0.1469865440783895, 'early_stoppig_rounds': 63}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9789772936201506


[32m[I 2023-01-17 16:05:45,820][0m Trial 76 finished with value: 0.9775985934914507 and parameters: {'n_estimators': 415, 'max_depth': 10, 'learning_rate': 0.060605417246845726, 'min_child_weight': 2, 'gamma': 0.0005583591663568078, 'subsample': 0.5762644477292815, 'colsample_bytree': 0.2733111158459092, 'reg_alpha': 0.00015926145096660795, 'reg_lambda': 0.05095959615521841, 'early_stoppig_rounds': 48}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9775985934914507


[32m[I 2023-01-17 16:06:07,871][0m Trial 77 finished with value: 0.9559296286082001 and parameters: {'n_estimators': 399, 'max_depth': 10, 'learning_rate': 0.12849532841341035, 'min_child_weight': 8, 'gamma': 0.00030369996435348535, 'subsample': 0.49647433645915084, 'colsample_bytree': 0.21450866444215563, 'reg_alpha': 0.0002439622143350251, 'reg_lambda': 0.20328004801841262, 'early_stoppig_rounds': 69}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9559296286082001


[32m[I 2023-01-17 16:06:23,488][0m Trial 78 finished with value: 0.9741041551755838 and parameters: {'n_estimators': 428, 'max_depth': 9, 'learning_rate': 0.16372798252225435, 'min_child_weight': 1, 'gamma': 0.002599011441934071, 'subsample': 0.4540635294492747, 'colsample_bytree': 0.270354657509878, 'reg_alpha': 0.0008794437271033802, 'reg_lambda': 0.03297041031251653, 'early_stoppig_rounds': 59}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9741041551755838


[32m[I 2023-01-17 16:06:54,159][0m Trial 79 finished with value: 0.9755862750505608 and parameters: {'n_estimators': 481, 'max_depth': 10, 'learning_rate': 0.07096672454600522, 'min_child_weight': 2, 'gamma': 0.0011161809907725999, 'subsample': 0.6484298083858206, 'colsample_bytree': 0.4344586660355124, 'reg_alpha': 0.0014312058620596752, 'reg_lambda': 0.05622041083698199, 'early_stoppig_rounds': 79}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9755862750505608


[32m[I 2023-01-17 16:07:15,720][0m Trial 80 finished with value: 0.9808455138812281 and parameters: {'n_estimators': 466, 'max_depth': 10, 'learning_rate': 0.11215630582467628, 'min_child_weight': 1, 'gamma': 0.00040583187185046205, 'subsample': 0.5126523502856611, 'colsample_bytree': 0.2405756413347166, 'reg_alpha': 2.4420378905643925e-05, 'reg_lambda': 0.09580458467519536, 'early_stoppig_rounds': 71}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9808455138812281


[32m[I 2023-01-17 16:07:35,536][0m Trial 81 finished with value: 0.9809944383158669 and parameters: {'n_estimators': 449, 'max_depth': 10, 'learning_rate': 0.1306525573362489, 'min_child_weight': 1, 'gamma': 0.0003999980778333924, 'subsample': 0.5178182231462405, 'colsample_bytree': 0.24263681759747013, 'reg_alpha': 2.3735372808323226e-05, 'reg_lambda': 0.12990395951549244, 'early_stoppig_rounds': 71}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9809944383158669


[32m[I 2023-01-17 16:07:55,710][0m Trial 82 finished with value: 0.9770814947600662 and parameters: {'n_estimators': 447, 'max_depth': 10, 'learning_rate': 0.13379670928846604, 'min_child_weight': 1, 'gamma': 0.00019381100938303903, 'subsample': 0.5155684529714466, 'colsample_bytree': 0.22409605300866267, 'reg_alpha': 1.2845438363354556e-05, 'reg_lambda': 0.10041327214325681, 'early_stoppig_rounds': 70}. Best is trial 73 with value: 0.9819256756756756.[0m


Avg AUC: 0.9770814947600662


[32m[I 2023-01-17 16:08:10,210][0m Trial 83 finished with value: 0.98196083838941 and parameters: {'n_estimators': 464, 'max_depth': 9, 'learning_rate': 0.20681743049444995, 'min_child_weight': 1, 'gamma': 0.00040446917493805213, 'subsample': 0.6005668660176993, 'colsample_bytree': 0.3997513124843916, 'reg_alpha': 2.6175431867334218e-05, 'reg_lambda': 0.3556952133171837, 'early_stoppig_rounds': 75}. Best is trial 83 with value: 0.98196083838941.[0m


Avg AUC: 0.98196083838941


[32m[I 2023-01-17 16:08:24,074][0m Trial 84 finished with value: 0.9760535024820738 and parameters: {'n_estimators': 464, 'max_depth': 9, 'learning_rate': 0.2208834153443906, 'min_child_weight': 1, 'gamma': 0.0003892608271756738, 'subsample': 0.538631379434221, 'colsample_bytree': 0.405982442683254, 'reg_alpha': 2.2390032170549503e-05, 'reg_lambda': 0.5314096586559265, 'early_stoppig_rounds': 66}. Best is trial 83 with value: 0.98196083838941.[0m


Avg AUC: 0.9760535024820738


[32m[I 2023-01-17 16:08:38,190][0m Trial 85 finished with value: 0.9735672917815774 and parameters: {'n_estimators': 464, 'max_depth': 9, 'learning_rate': 0.2508309093069284, 'min_child_weight': 1, 'gamma': 9.829499488634055e-05, 'subsample': 0.5976538744434851, 'colsample_bytree': 0.2484396398114058, 'reg_alpha': 3.0190213177953125e-05, 'reg_lambda': 0.32731059522214845, 'early_stoppig_rounds': 77}. Best is trial 83 with value: 0.98196083838941.[0m


Avg AUC: 0.9735672917815774


[32m[I 2023-01-17 16:08:56,788][0m Trial 86 finished with value: 0.9769318808604524 and parameters: {'n_estimators': 452, 'max_depth': 9, 'learning_rate': 0.19254281170390544, 'min_child_weight': 2, 'gamma': 0.00029604071975189206, 'subsample': 0.7396045172258645, 'colsample_bytree': 0.20987422858648597, 'reg_alpha': 5.3104110560376885e-05, 'reg_lambda': 0.271790566374086, 'early_stoppig_rounds': 74}. Best is trial 83 with value: 0.98196083838941.[0m


Avg AUC: 0.9769318808604524


[32m[I 2023-01-17 16:09:23,162][0m Trial 87 finished with value: 0.9768358154072441 and parameters: {'n_estimators': 480, 'max_depth': 9, 'learning_rate': 0.11259246468896843, 'min_child_weight': 2, 'gamma': 0.00042293809820318125, 'subsample': 0.5708415615298192, 'colsample_bytree': 0.5380931164331297, 'reg_alpha': 3.020531277726164e-05, 'reg_lambda': 0.49078070253556655, 'early_stoppig_rounds': 71}. Best is trial 83 with value: 0.98196083838941.[0m


Avg AUC: 0.9768358154072441


[32m[I 2023-01-17 16:09:40,422][0m Trial 88 finished with value: 0.9762021970950544 and parameters: {'n_estimators': 498, 'max_depth': 8, 'learning_rate': 0.17201234146881134, 'min_child_weight': 1, 'gamma': 3.55486337698646e-05, 'subsample': 0.6158501151323951, 'colsample_bytree': 0.2724796841002367, 'reg_alpha': 1.4468903190896825e-05, 'reg_lambda': 0.9210838727132865, 'early_stoppig_rounds': 67}. Best is trial 83 with value: 0.98196083838941.[0m


Avg AUC: 0.9762021970950544


[32m[I 2023-01-17 16:09:53,921][0m Trial 89 finished with value: 0.9827056903842617 and parameters: {'n_estimators': 441, 'max_depth': 9, 'learning_rate': 0.20300021345847943, 'min_child_weight': 1, 'gamma': 0.0008115467781576025, 'subsample': 0.5050862735638452, 'colsample_bytree': 0.43919480348226175, 'reg_alpha': 1.9990500613860418e-05, 'reg_lambda': 0.1194571930207276, 'early_stoppig_rounds': 44}. Best is trial 89 with value: 0.9827056903842617.[0m


Avg AUC: 0.9827056903842617


[32m[I 2023-01-17 16:10:09,244][0m Trial 90 finished with value: 0.9767445762088618 and parameters: {'n_estimators': 416, 'max_depth': 7, 'learning_rate': 0.2062956382408375, 'min_child_weight': 3, 'gamma': 0.0001586692565563557, 'subsample': 0.5488349441505361, 'colsample_bytree': 0.4483305794597446, 'reg_alpha': 1.9807239922473692e-05, 'reg_lambda': 0.12773012961743418, 'early_stoppig_rounds': 35}. Best is trial 89 with value: 0.9827056903842617.[0m


Avg AUC: 0.9767445762088618


[32m[I 2023-01-17 16:10:25,231][0m Trial 91 finished with value: 0.9829345927560211 and parameters: {'n_estimators': 447, 'max_depth': 9, 'learning_rate': 0.15261034541616603, 'min_child_weight': 1, 'gamma': 0.0008612511495453191, 'subsample': 0.5104323093724731, 'colsample_bytree': 0.4697863696481417, 'reg_alpha': 2.4627692128919065e-05, 'reg_lambda': 0.2171339908189592, 'early_stoppig_rounds': 50}. Best is trial 91 with value: 0.9829345927560211.[0m


Avg AUC: 0.9829345927560211


[32m[I 2023-01-17 16:10:40,933][0m Trial 92 finished with value: 0.9807984004412577 and parameters: {'n_estimators': 440, 'max_depth': 9, 'learning_rate': 0.1453415351612024, 'min_child_weight': 1, 'gamma': 0.0007923031714807115, 'subsample': 0.4944133149582801, 'colsample_bytree': 0.4318444423379814, 'reg_alpha': 1.1224943651522311e-05, 'reg_lambda': 0.0952209634270257, 'early_stoppig_rounds': 51}. Best is trial 91 with value: 0.9829345927560211.[0m


Avg AUC: 0.9807984004412577


[32m[I 2023-01-17 16:10:59,558][0m Trial 93 finished with value: 0.9788483636697922 and parameters: {'n_estimators': 441, 'max_depth': 9, 'learning_rate': 0.12822968602194668, 'min_child_weight': 1, 'gamma': 0.0008527258433392262, 'subsample': 0.5080755310072881, 'colsample_bytree': 0.46674350953359284, 'reg_alpha': 1.0466154193638061e-05, 'reg_lambda': 0.11386153044705294, 'early_stoppig_rounds': 51}. Best is trial 91 with value: 0.9829345927560211.[0m


Avg AUC: 0.9788483636697922


[32m[I 2023-01-17 16:11:14,606][0m Trial 94 finished with value: 0.9793893638536495 and parameters: {'n_estimators': 403, 'max_depth': 10, 'learning_rate': 0.147487331147299, 'min_child_weight': 1, 'gamma': 0.0005728143443870471, 'subsample': 0.4813687462047201, 'colsample_bytree': 0.5010460000843753, 'reg_alpha': 1.613943231418735e-05, 'reg_lambda': 0.08349119708466672, 'early_stoppig_rounds': 54}. Best is trial 91 with value: 0.9829345927560211.[0m


Avg AUC: 0.9793893638536495


[32m[I 2023-01-17 16:11:27,805][0m Trial 95 finished with value: 0.9840308420665563 and parameters: {'n_estimators': 462, 'max_depth': 9, 'learning_rate': 0.24754839357747555, 'min_child_weight': 1, 'gamma': 0.0002768382878439927, 'subsample': 0.4962635986679422, 'colsample_bytree': 0.43112487341982364, 'reg_alpha': 2.4326609397287665e-05, 'reg_lambda': 0.21844284657376126, 'early_stoppig_rounds': 45}. Best is trial 95 with value: 0.9840308420665563.[0m


Avg AUC: 0.9840308420665563


[32m[I 2023-01-17 16:11:39,918][0m Trial 96 finished with value: 0.9793089262732121 and parameters: {'n_estimators': 434, 'max_depth': 9, 'learning_rate': 0.2511502099659057, 'min_child_weight': 1, 'gamma': 0.0002832943244253122, 'subsample': 0.45842491254816203, 'colsample_bytree': 0.42864564060456684, 'reg_alpha': 3.748510254698418e-05, 'reg_lambda': 0.3949468715093616, 'early_stoppig_rounds': 43}. Best is trial 95 with value: 0.9840308420665563.[0m


Avg AUC: 0.9793089262732121


[32m[I 2023-01-17 16:12:00,540][0m Trial 97 finished with value: 0.9637550560764847 and parameters: {'n_estimators': 462, 'max_depth': 8, 'learning_rate': 0.2623155273322966, 'min_child_weight': 9, 'gamma': 0.000388534886437654, 'subsample': 0.5184164306674669, 'colsample_bytree': 0.47876713311233976, 'reg_alpha': 2.384974666269081e-05, 'reg_lambda': 0.2141928296459153, 'early_stoppig_rounds': 49}. Best is trial 95 with value: 0.9840308420665563.[0m


Avg AUC: 0.9637550560764847


[32m[I 2023-01-17 16:12:13,233][0m Trial 98 finished with value: 0.9814772936201507 and parameters: {'n_estimators': 448, 'max_depth': 9, 'learning_rate': 0.22290807602502186, 'min_child_weight': 1, 'gamma': 0.0007243578280907595, 'subsample': 0.5616947500145373, 'colsample_bytree': 0.39715330581607244, 'reg_alpha': 2.6525220599178655e-05, 'reg_lambda': 0.40054912804554493, 'early_stoppig_rounds': 44}. Best is trial 95 with value: 0.9840308420665563.[0m


Avg AUC: 0.9814772936201507


[32m[I 2023-01-17 16:12:27,103][0m Trial 99 finished with value: 0.9719725592939877 and parameters: {'n_estimators': 450, 'max_depth': 9, 'learning_rate': 0.20768139160423302, 'min_child_weight': 2, 'gamma': 0.0009948889624499533, 'subsample': 0.5654274458108787, 'colsample_bytree': 0.5178585262143724, 'reg_alpha': 4.493459399335371e-05, 'reg_lambda': 0.27279634347114556, 'early_stoppig_rounds': 46}. Best is trial 95 with value: 0.9840308420665563.[0m


Avg AUC: 0.9719725592939877


In [33]:
study_xgb.best_value

0.9840308420665563

In [34]:
study_xgb.best_params

{'n_estimators': 462,
 'max_depth': 9,
 'learning_rate': 0.24754839357747555,
 'min_child_weight': 1,
 'gamma': 0.0002768382878439927,
 'subsample': 0.4962635986679422,
 'colsample_bytree': 0.43112487341982364,
 'reg_alpha': 2.4326609397287665e-05,
 'reg_lambda': 0.21844284657376126,
 'early_stoppig_rounds': 45}

In [35]:
## INSIGHTS: Okay okay, the score looks like it's overfitting but there's only one way to find out. i.e. Submit.
# And honestly, we should test this before going on with tuning more models to see if this method is even working or not!

In [36]:
xgb_params = {'n_estimators': 462,
                 'max_depth': 9,
                 'learning_rate': 0.24754839357747555,
                 'min_child_weight': 1,
                 'gamma': 0.0002768382878439927,
                 'subsample': 0.4962635986679422,
                 'colsample_bytree': 0.43112487341982364,
                 'reg_alpha': 2.4326609397287665e-05,
                 'reg_lambda': 0.21844284657376126,
                 'early_stoppig_rounds': 45}

In [37]:
X_train_fr, X_val, y_train_fr, y_val = train_test_split(X_train, y, test_size=0.1, shuffle=True, random_state=1337,
                                                        stratify=y)

In [40]:
xgb_tuned_clf = xgb.XGBClassifier(**xgb_params)
xgb_tuned_clf.fit(X_train_fr, y_train_fr, eval_set=[(X_val, y_val)], verbose=False)

Parameters: { "early_stoppig_rounds" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1,
              colsample_bytree=0.43112487341982364, early_stoppig_rounds=45,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0.0002768382878439927, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.24754839357747555,
              max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=9,
              max_leaves=0, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=462, n_jobs=0,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=2.4326609397287665e-05, ...)

In [41]:
xgb_tuned_preds = xgb_tuned_clf.predict_proba(X_test)[:, 1]

In [42]:
submission = pd.DataFrame({"id": test_idx, "Attrition": xgb_tuned_preds})
submission.head()

Unnamed: 0,id,Attrition
0,1677,0.021201
1,1678,0.001126
2,1679,1.7e-05
3,1680,0.000405
4,1681,0.947035


In [43]:
submission.to_csv("submission.csv", index=False)