## Import Libraries

In [52]:
# Data analysis library
import numpy as np
import pandas as pd
import joblib
import seaborn as sns
%matplotlib inline
# Machine Learning library
import sklearn
from sklearn.metrics import roc_curve, auc, accuracy_score, plot_confusion_matrix, plot_roc_curve
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from lightgbm import LGBMClassifier
from lightgbm import plot_importance, plot_metric
from sklearn.metrics import f1_score
# Hyperparameter tunning library
import optuna

# Model experimentation library
import mlflow
import mlflow.lightgbm
from mlflow.tracking import MlflowClient

import pickle
# Plotting library
import matplotlib.pyplot as plt
# Prevent figures from displaying by turning interactive mode off using the function
plt.ioff()
import warnings
warnings.filterwarnings("ignore")

In [53]:
print(f'Numpy version is {np.__version__}')
print(f'Pandas version is {pd.__version__}')
print(f'sklearn version is {sklearn.__version__}')
print(f'joblib version is {joblib.__version__}')
print(f'optuna version is {optuna.__version__}')
print(f'mlflow version is {mlflow.__version__}')

Numpy version is 1.20.2
Pandas version is 1.2.4
sklearn version is 0.24.1
joblib version is 1.0.1
optuna version is 2.7.0
mlflow version is 1.15.0


## Load Dataset


In [54]:
## Files
data_file = 'dataset/train.csv'

# to display all the columns of the dataframe in the notebook
pd.set_option('display.max_columns', None)
# Load train loan dataset 
try:
    df = pd.read_csv(data_file)
    print("The dataset has {} samples with {} features.".format(*df.shape))
except:
    print("The dataset could not be loaded. Is the dataset missing?")

The dataset has 532428 samples with 45 features.


In [55]:
df.head()

Unnamed: 0,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,batch_enrolled,int_rate,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,pymnt_plan,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,collections_12_mths_ex_med,mths_since_last_major_derog,application_type,verification_status_joint,last_week_pay,acc_now_delinq,tot_coll_amt,tot_cur_bal,total_rev_hi_lim,loan_status
0,58189336,14350,14350,14350.0,36 months,,19.19,E,E3,clerk,9 years,OWN,28700.0,Source Verified,n,,debt_consolidation,Debt consolidation,349xx,FL,33.88,0.0,1.0,50.0,75.0,14.0,1.0,22515.0,73.1,28.0,f,1173.84,0.0,0.0,0.0,0.0,74.0,INDIVIDUAL,,26th week,0.0,0.0,28699.0,30800.0,0
1,70011223,4800,4800,4800.0,36 months,BAT1586599,10.99,B,B4,Human Resources Specialist,< 1 year,MORTGAGE,65000.0,Source Verified,n,,home_improvement,Home improvement,209xx,MD,3.64,0.0,1.0,,,6.0,0.0,7624.0,23.2,13.0,w,83.95,0.0,0.0,0.0,0.0,,INDIVIDUAL,,9th week,0.0,0.0,9974.0,32900.0,0
2,70255675,10000,10000,10000.0,36 months,BAT1586599,7.26,A,A4,Driver,2 years,OWN,45000.0,Not Verified,n,,debt_consolidation,Debt consolidation,447xx,OH,18.42,0.0,0.0,,,5.0,0.0,10877.0,31.2,19.0,w,56.47,0.0,0.0,0.0,0.0,,INDIVIDUAL,,9th week,0.0,65.0,38295.0,34900.0,0
3,1893936,15000,15000,15000.0,36 months,BAT4808022,19.72,D,D5,Us office of Personnel Management,10+ years,RENT,105000.0,Not Verified,n,> My goal is to obtain a loan to pay off my hi...,debt_consolidation,Debt consolidation,221xx,VA,14.97,0.0,2.0,46.0,,10.0,0.0,13712.0,55.5,21.0,f,4858.62,0.0,0.0,0.0,0.0,,INDIVIDUAL,,135th week,0.0,0.0,55564.0,24700.0,0
4,7652106,16000,16000,16000.0,36 months,BAT2833642,10.64,B,B2,LAUSD-HOLLYWOOD HIGH SCHOOL,10+ years,RENT,52000.0,Verified,n,,credit_card,refi,900xx,CA,20.16,0.0,0.0,,,11.0,0.0,35835.0,76.2,27.0,w,2296.41,0.0,0.0,0.0,0.0,,INDIVIDUAL,,96th week,0.0,0.0,47159.0,47033.0,0


## Tracking Experiments

### Tracking  Metadata 

MLflow supports two types of backend stores: *file store* and *database-backed* store.

- Local file path (specified as file:/my/local/dir), where data is just directly stored locally. Defaults to `mlruns/`
- Database encoded as <dialect>+<driver>://<username>:<password>@<host>:<port>/<database>. Mlflow supports the dialects mysql, mssql, sqlite, and postgresql. For more details, see SQLAlchemy database uri.
- HTTP server (specified as https://my-server:5000), which is a server hosting an MLFlow tracking server.
 
**For this Demo we are usking Sqlite as a Database to store metadata.**

### Artifact stores
- Amazon S3
- Azure Blob Storage
- Google Cloud Storage
- FTP server
- SFTP Server
- NFS
- HDFS
    
**We are using local mlrun folder to store all required artifacts.**

Base Syntax to start mlflow as a service

```
mlflow server \
    --backend-store-uri /mnt/persistent-disk \
    --default-artifact-root s3://my-mlflow-bucket/ \
    --host 0.0.0.0
    --port 5000
```
![alt text](images/localhost_sqlite.png "Title")
**Note:** To start the service for this experiment copy below command and execute it in Anaconda prompt
```bash
mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root mlruns/ --host 127.0.0.1 --port 5000
```

In [56]:
experiment_name = "Loan deaulter predictionv1"
artifact_repository = './mlflow'

# Provide uri and connect to your tracking server
mlflow.set_tracking_uri('http://127.0.0.1:5000/')

# Initialize client
client = MlflowClient()

# If experiment doesn't exist then it will create new
# else it will take the experiment id and will use to to run the experiments
try:
    # Create experiment 
    experiment_id = client.create_experiment(experiment_name, artifact_location=artifact_repository)
except:
    # Get the experiment id if it already exists
    experiment_id = client.get_experiment_by_name(experiment_name).experiment_id

## Separate dataset into train and test

In [57]:
# Let's separate into train and test set
# Remember to seet the seed (random_state for this sklearn function)

X_train, X_test, y_train, y_test = train_test_split(df, df.loan_status,
                                                    test_size=0.1,
                                                    random_state=42) # we are setting the seed here
X_train.shape, X_test.shape

((479185, 45), (53243, 45))


**Remember: the aim of this demo and this particular project is to show you how use mlflow for tracking and put models in production. Surely there are additional things you can do on this dataset, to extract additional value from the features.**

**In order to capitalise on the deployment aspect of things, we deliberately kept the engineering side simple, yet include many of the traditional engineering steps, so you get a full flavour of building and deploying a machine learning model.**

## Handling Missing Value as NaN

In [58]:
# converting blank value to NaN value.
X_train = X_train.replace(' ', np.nan)
X_test = X_test.replace(' ', np.nan)

#ratio of null values
X_train.isnull().sum()/X_train.shape[0] *100

member_id                       0.000000
loan_amnt                       0.000000
funded_amnt                     0.000000
funded_amnt_inv                 0.000000
term                            0.000000
batch_enrolled                 35.907635
int_rate                        0.000000
grade                           0.000000
sub_grade                       0.000000
emp_title                       5.787952
emp_length                      5.044816
home_ownership                  0.000000
annual_inc                      0.000626
verification_status             0.000000
pymnt_plan                      0.000000
desc                           85.810073
purpose                         0.000000
title                           0.017321
zip_code                        0.000000
addr_state                      0.000000
dti                             0.000000
delinq_2yrs                     0.002713
inq_last_6mths                  0.002713
mths_since_last_delinq         51.149139
mths_since_last_

In [71]:
# to create a file as pickle
def picklesave(filename,file):
    with open(filename, 'wb') as handle:
        pickle.dump(file, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Converting into numerical feature and New feature

In [60]:
emp_len_dict = {'< 1 year':0,'1 year':1,'2 years':2,'3 years':3,'4 years':4,'5 years':5,'6 years':6,'7 years':7,'8 years':8,'9 years':9,'10+ years':10}
X_train['emp_length'] = X_train['emp_length'].map(emp_len_dict)
X_test['emp_length'] = X_test['emp_length'].map(emp_len_dict)

picklesave('artifacts/emp_len_dict.pickle', emp_len_dict)

# create the temporal var
X_train['Long_emp_length'] = X_train['emp_length'].apply(lambda x : 'Yes' if x==10 else 'No')
X_test['Long_emp_length'] = X_test['emp_length'].apply(lambda x : 'Yes' if x==10 else 'No')



## Handling Missing categorical values

For categorical variables, we will fill missing information by adding an additional category: "missing"

In [61]:
# make a list of the categorical variables that contain missing values
vars_with_na = [var for var in X_train.columns if X_train[var].dtypes=='O']

# print the variable name and the percentage of missing values
for var in vars_with_na:
    print(var, np.round(X_train[var].isnull().mean(), 3),  ' % missing values')

term 0.0  % missing values
batch_enrolled 0.359  % missing values
grade 0.0  % missing values
sub_grade 0.0  % missing values
emp_title 0.058  % missing values
home_ownership 0.0  % missing values
verification_status 0.0  % missing values
pymnt_plan 0.0  % missing values
desc 0.858  % missing values
purpose 0.0  % missing values
title 0.0  % missing values
zip_code 0.0  % missing values
addr_state 0.0  % missing values
initial_list_status 0.0  % missing values
application_type 0.0  % missing values
verification_status_joint 0.999  % missing values
last_week_pay 0.0  % missing values
Long_emp_length 0.0  % missing values


In [62]:
# function to replace NA in categorical variables
for col in ["emp_title","title"]:
    X_train[col].fillna('Missing',inplace=True)
    X_test[col].fillna('Missing',inplace=True)
    
X_train[vars_with_na].isnull().sum()

## Handling Missing Numerical values

In [64]:
mean_var_dict = {}
impute_col = ['emp_length','annual_inc','delinq_2yrs','inq_last_6mths','open_acc','pub_rec','revol_util','total_acc','collections_12_mths_ex_med','acc_now_delinq','tot_coll_amt','tot_cur_bal','total_rev_hi_lim']
# Imputing with Mean
#impute_col = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
for col in impute_col:
    # calculate the mean
    mean_val = X_train[col].mean()
    
    X_train[col].fillna(mean_val, inplace=True)
    X_test[col].fillna(mean_val, inplace=True)
    # we persist the mean in the dictionary
    mean_var_dict[col] = mean_val
# we save the dictionary for later
picklesave('artifacts/mean_var_dict.pickle', mean_var_dict)
#print(mean_var_dict)
X_train[impute_col].isnull().sum()

emp_length                    0
annual_inc                    0
delinq_2yrs                   0
inq_last_6mths                0
open_acc                      0
pub_rec                       0
revol_util                    0
total_acc                     0
collections_12_mths_ex_med    0
acc_now_delinq                0
tot_coll_amt                  0
tot_cur_bal                   0
total_rev_hi_lim              0
dtype: int64

## Handling Rare values 

In [65]:
# define your threshold here

threshlold = 0.0005
frequent_labels_dict = {}

categorical_col = ["zip_code",'last_week_pay','emp_title']

for variable in categorical_col:
    # locate all the categories that are not rare.
    counts = X_train.groupby([variable])[variable].count() / len(X_train)
    frequent_labels = [x for x in counts.loc[counts>threshlold].index.values]
    frequent_labels_dict[variable] = frequent_labels

    # change the rare category names with the word rare, and thus encoding it.
    X_train[variable] = np.where(X_train[variable].isin(frequent_labels), X_train[variable], 'Rare')
    X_test[variable] = np.where(X_test[variable].isin(frequent_labels), X_test[variable], 'Rare')

# now we save the dictionary
picklesave('artifacts/FrequentLabels.pickle', frequent_labels_dict)

## Feature Selection

In [66]:
exclude_feature = ['loan_amnt','funded_amnt','title','batch_enrolled','member_id','loan_status','desc','mths_since_last_delinq','mths_since_last_record','mths_since_last_major_derog','verification_status_joint']
# Define Target columns
#target = data['loan_status']

# Define numeric and categorical features
numeric_columns = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_columns = X_train.select_dtypes(include=['object']).columns.tolist()
numeric_features = [col for col in numeric_columns if col not in exclude_feature]
categorical_features = [col for col in categorical_columns if col not in exclude_feature]

# Define final feature list for training and validation
features = numeric_features + categorical_features
# Final data for training and validation
X_train = X_train[features]
X_train = X_train.fillna(0)
# now we save the selected list of features
pd.Series(features).to_csv('artifacts/selected_features.csv', index=False)

## Categorical feature Encoding

In [67]:
# Split data in train and validation
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

label_dict = {}


# Perform label encoding for categorical variable
for feature in categorical_features:
    le = LabelEncoder()
    X_train[feature]=  X_train[feature].astype('str')
    #test_data[feature]=  test_data[feature].astype('str')
    X_valid[feature]=  X_valid[feature].astype('str')
    X_test[feature]=  X_test[feature].astype('str')
    le.fit(X_train.loc[:, feature])
    X_train.loc[:, feature] = le.transform(X_train.loc[:, feature])
    X_valid.loc[:, feature] = le.transform(X_valid.loc[:, feature])
    X_test.loc[:, feature] = le.transform(X_test.loc[:, feature])
    label_dict[feature] = le

picklesave("artifacts/labelEncoder.pickle",label_dict)

![alt text](images/MLflow_Model_experimentation.png "Title")

**source: vivek kumar [YouTube]**

### Model Training and Tracking using MLflow

In [68]:
def model_training_tracking(params):
    with mlflow.start_run(experiment_id=experiment_id, run_name='Lightgbm_model1') as run:
        # Get run id 
        run_id = run.info.run_uuid

        # Set the notes for the run
        MlflowClient().set_tag(run_id,
                               "mlflow.note.content",
                               "This is experiment for hyperparameter optimzation for lightgbm models for the Load Defaulter prediction")

        # Define customer tag
        tags = {"Application": "Loan Defaulter Monitoring",
                "release.version": "1.0.0"}

        # Set Tag
        mlflow.set_tags(tags)

        # Log python environment details
        mlflow.log_artifact('requirements.txt')
        # Log selected feature and feature enginering data 
        mlflow.log_artifact('artifacts/selected_features.csv')      
        mlflow.log_artifact('artifacts/FrequentLabels.pickle')      
        mlflow.log_artifact('artifacts/mean_var_dict.pickle')      
        mlflow.log_artifact('artifacts/emp_len_dict.pickle')       
        mlflow.log_artifact("artifacts/labelEncoder.pickle")

        # logging params
        mlflow.log_params(params)
        auc_list = []
        mlflow.lightgbm.autolog()
        lgb_clf = LGBMClassifier(**params)
        lgb_clf.fit(X_train, y_train, 
                    eval_set = [(X_train, y_train), (X_valid, y_valid)], 
                    early_stopping_rounds=100,
                    verbose=20)
        # Log model artifacts
        mlflow.sklearn.log_model(lgb_clf, "model")
        
        # Perform model evaluation 
        lgb_valid_prediction = lgb_clf.predict_proba(X_valid)[:, 1]
        fpr, tpr, thresholds = roc_curve(y_valid, lgb_valid_prediction)
        roc_auc = auc(fpr, tpr) # compute area under the curve
        print("=====================================")
        print("Validation AUC:{}".format(roc_auc))
        print("=====================================")   

        score_lgb_valid_prediction = lgb_clf.predict(X_valid)
        score = f1_score(y_valid, score_lgb_valid_prediction, average='weighted')# compute f1 score
        score = 100* score
        print("=====================================")
        print("Validation f1 score:{}".format(score))
        print("=====================================")  
        
        # log metrics
        mlflow.log_metrics({"Validation_AUC": roc_auc,"Validation_f1": score})
        
        # Plot and save feature importance details
        ax = plot_importance(lgb_clf, height=0.4)
        filename = './images/lgb_validation_feature_importance.png'
        plt.savefig(filename)
        # log model artifacts
        mlflow.log_artifact(filename)

        ax = plot_metric(lgb_clf.evals_result_)
        filename = './images/lgb_validation_metrics_comparision.png'
        plt.savefig(filename)
        # log model artifacts
        mlflow.log_artifact(filename)

        # Plot and save metrics details    
        plot_confusion_matrix(lgb_clf, X_valid, y_valid, 
                              display_labels=['Placed', 'Not Placed'],
                              cmap='magma')
        plt.title('Confusion Matrix')
        filename = './images/lgb_validation_confusion_matrix.png'
        plt.savefig(filename)
        # log model artifacts
        mlflow.log_artifact(filename)

        # Plot and save AUC details  
        plot_roc_curve(lgb_clf, X_valid, y_valid, name='Validation')
        plt.title('ROC AUC Curve')
        filename = './images/lgb_validation_roc_curve.png'
        plt.savefig(filename)
        # log model artifacts
        mlflow.log_artifact(filename)
        return roc_auc

In [69]:
def objective(trial):

    param = {
        "objective": "binary",
        "metric": "auc",
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 1e-1, log=True),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
        "subsample": trial.suggest_float("subsample", 0.4, 1.0),
        "random_state": 42,
        "boosting_type": "gbdt",
        #"max_depth": 10
    }
    
    score = model_training_tracking(param)
    return score

In [70]:
# Create a study object and optimize the objective function.
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=40)
trial = study.best_trial
print('score: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2021-04-18 14:25:43,841][0m A new study created in memory with name: no-name-85b0bee1-836e-4847-8de8-b270e7de7f3d[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.81228	valid_1's auc: 0.810512
[40]	training's auc: 0.810725	valid_1's auc: 0.809361
[60]	training's auc: 0.814203	valid_1's auc: 0.812824
[80]	training's auc: 0.817833	valid_1's auc: 0.816397
[100]	training's auc: 0.820986	valid_1's auc: 0.819753
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.820986	valid_1's auc: 0.819753
Validation AUC:0.8197533707276448
Validation f1 score:66.23195524719897


[32m[I 2021-04-18 14:26:16,443][0m Trial 0 finished with value: 0.8197533707276448 and parameters: {'learning_rate': 0.0020422511327989144, 'colsample_bytree': 0.7848430373096997, 'subsample': 0.410500457288687}. Best is trial 0 with value: 0.8197533707276448.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.823706	valid_1's auc: 0.821301
[40]	training's auc: 0.829392	valid_1's auc: 0.827781
[60]	training's auc: 0.830937	valid_1's auc: 0.829384
[80]	training's auc: 0.836588	valid_1's auc: 0.835101
[100]	training's auc: 0.844873	valid_1's auc: 0.843553
Did not meet early stopping. Best iteration is:
[99]	training's auc: 0.844974	valid_1's auc: 0.843671
Validation AUC:0.8436710040011889
Validation f1 score:78.70018350525483


[32m[I 2021-04-18 14:27:00,084][0m Trial 1 finished with value: 0.8436710040011889 and parameters: {'learning_rate': 0.010839306644327647, 'colsample_bytree': 0.584899480775861, 'subsample': 0.511910344666861}. Best is trial 1 with value: 0.8436710040011889.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.839889	valid_1's auc: 0.838474
[40]	training's auc: 0.844333	valid_1's auc: 0.842773
[60]	training's auc: 0.840935	valid_1's auc: 0.839152
[80]	training's auc: 0.84098	valid_1's auc: 0.839095
[100]	training's auc: 0.844037	valid_1's auc: 0.842058
Did not meet early stopping. Best iteration is:
[41]	training's auc: 0.845636	valid_1's auc: 0.844097
Validation AUC:0.8440968655380945
Validation f1 score:68.04029794103043


[32m[I 2021-04-18 14:27:34,024][0m Trial 2 finished with value: 0.8440968655380945 and parameters: {'learning_rate': 0.011430245506954158, 'colsample_bytree': 0.3405540850390676, 'subsample': 0.9931644372579348}. Best is trial 2 with value: 0.8440968655380945.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.83067	valid_1's auc: 0.828688
[40]	training's auc: 0.839293	valid_1's auc: 0.838043
[60]	training's auc: 0.846266	valid_1's auc: 0.844821
[80]	training's auc: 0.85343	valid_1's auc: 0.851889
[100]	training's auc: 0.861798	valid_1's auc: 0.860122
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.861798	valid_1's auc: 0.860122
Validation AUC:0.8601218281666481
Validation f1 score:81.19379135626819


[32m[I 2021-04-18 14:28:25,423][0m Trial 3 finished with value: 0.8601218281666481 and parameters: {'learning_rate': 0.022658423013662367, 'colsample_bytree': 0.785767690191687, 'subsample': 0.4550845652256418}. Best is trial 3 with value: 0.8601218281666481.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.829829	valid_1's auc: 0.827645
[40]	training's auc: 0.837867	valid_1's auc: 0.836245
[60]	training's auc: 0.831597	valid_1's auc: 0.830185
[80]	training's auc: 0.830869	valid_1's auc: 0.829228
[100]	training's auc: 0.830992	valid_1's auc: 0.829319
Did not meet early stopping. Best iteration is:
[41]	training's auc: 0.838873	valid_1's auc: 0.837261
Validation AUC:0.8372614964556686
Validation f1 score:66.23195524719897


[32m[I 2021-04-18 14:28:58,006][0m Trial 4 finished with value: 0.8372614964556686 and parameters: {'learning_rate': 0.0027201322481540382, 'colsample_bytree': 0.45151164246621617, 'subsample': 0.41718042496220165}. Best is trial 3 with value: 0.8601218281666481.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.834594	valid_1's auc: 0.832514
[40]	training's auc: 0.843649	valid_1's auc: 0.842034
[60]	training's auc: 0.842054	valid_1's auc: 0.840392
[80]	training's auc: 0.842956	valid_1's auc: 0.84104
[100]	training's auc: 0.847916	valid_1's auc: 0.845932
Did not meet early stopping. Best iteration is:
[99]	training's auc: 0.847958	valid_1's auc: 0.845965
Validation AUC:0.8459646788250761
Validation f1 score:78.79438057143179


[32m[I 2021-04-18 14:29:35,577][0m Trial 5 finished with value: 0.8459646788250761 and parameters: {'learning_rate': 0.013837853400512318, 'colsample_bytree': 0.48061575972194615, 'subsample': 0.4686765073765141}. Best is trial 3 with value: 0.8601218281666481.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.821517	valid_1's auc: 0.81897
[40]	training's auc: 0.825002	valid_1's auc: 0.823631
[60]	training's auc: 0.824351	valid_1's auc: 0.82284
[80]	training's auc: 0.827023	valid_1's auc: 0.825575
[100]	training's auc: 0.831271	valid_1's auc: 0.829875
Did not meet early stopping. Best iteration is:
[99]	training's auc: 0.831426	valid_1's auc: 0.830056
Validation AUC:0.8300562279588453
Validation f1 score:66.23195524719897


[32m[I 2021-04-18 14:30:13,450][0m Trial 6 finished with value: 0.8300562279588453 and parameters: {'learning_rate': 0.004051758441224662, 'colsample_bytree': 0.5858246418623649, 'subsample': 0.8114607018680617}. Best is trial 3 with value: 0.8601218281666481.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.818744	valid_1's auc: 0.817049
[40]	training's auc: 0.82412	valid_1's auc: 0.823019
[60]	training's auc: 0.824441	valid_1's auc: 0.823229
[80]	training's auc: 0.83026	valid_1's auc: 0.829165
[100]	training's auc: 0.835013	valid_1's auc: 0.834068
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.835013	valid_1's auc: 0.834068
Validation AUC:0.8340680083507753
Validation f1 score:76.785504037567


[32m[I 2021-04-18 14:30:44,799][0m Trial 7 finished with value: 0.8340680083507753 and parameters: {'learning_rate': 0.0055961043575050474, 'colsample_bytree': 0.7297255490670804, 'subsample': 0.8398560917771687}. Best is trial 3 with value: 0.8601218281666481.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.803456	valid_1's auc: 0.801909
[40]	training's auc: 0.810095	valid_1's auc: 0.808727
[60]	training's auc: 0.815776	valid_1's auc: 0.814585
[80]	training's auc: 0.822003	valid_1's auc: 0.820998
[100]	training's auc: 0.824897	valid_1's auc: 0.82411
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.824897	valid_1's auc: 0.82411
Validation AUC:0.8241098570732841
Validation f1 score:66.23195524719897


[32m[I 2021-04-18 14:31:13,120][0m Trial 8 finished with value: 0.8241098570732841 and parameters: {'learning_rate': 0.004450339885564292, 'colsample_bytree': 0.85072415435969, 'subsample': 0.9075906367278835}. Best is trial 3 with value: 0.8601218281666481.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.838555	valid_1's auc: 0.837638
[40]	training's auc: 0.861326	valid_1's auc: 0.859773
[60]	training's auc: 0.881958	valid_1's auc: 0.880333
[80]	training's auc: 0.895033	valid_1's auc: 0.893473
[100]	training's auc: 0.9019	valid_1's auc: 0.89989
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.9019	valid_1's auc: 0.89989
Validation AUC:0.8998897442850574
Validation f1 score:85.89366078549551


[32m[I 2021-04-18 14:31:43,848][0m Trial 9 finished with value: 0.8998897442850574 and parameters: {'learning_rate': 0.05464328975812618, 'colsample_bytree': 0.9372422486848113, 'subsample': 0.6833068503134679}. Best is trial 9 with value: 0.8998897442850574.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.843955	valid_1's auc: 0.842763
[40]	training's auc: 0.882624	valid_1's auc: 0.881251
[60]	training's auc: 0.898669	valid_1's auc: 0.897053
[80]	training's auc: 0.907026	valid_1's auc: 0.904693
[100]	training's auc: 0.914016	valid_1's auc: 0.910909
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.914016	valid_1's auc: 0.910909
Validation AUC:0.9109090511340305
Validation f1 score:87.19214208168086


[32m[I 2021-04-18 14:32:17,117][0m Trial 10 finished with value: 0.9109090511340305 and parameters: {'learning_rate': 0.08238742883412471, 'colsample_bytree': 0.9836040873255602, 'subsample': 0.6295705153244137}. Best is trial 10 with value: 0.9109090511340305.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.844429	valid_1's auc: 0.843621
[40]	training's auc: 0.877121	valid_1's auc: 0.876149
[60]	training's auc: 0.895817	valid_1's auc: 0.89461
[80]	training's auc: 0.904666	valid_1's auc: 0.902647
[100]	training's auc: 0.912134	valid_1's auc: 0.909442
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.912134	valid_1's auc: 0.909442
Validation AUC:0.9094421246323612
Validation f1 score:86.93227909585397


[32m[I 2021-04-18 14:32:46,945][0m Trial 11 finished with value: 0.9094421246323612 and parameters: {'learning_rate': 0.07711467232260501, 'colsample_bytree': 0.9932107983185507, 'subsample': 0.6229008853782586}. Best is trial 10 with value: 0.9109090511340305.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.850197	valid_1's auc: 0.849196
[40]	training's auc: 0.88508	valid_1's auc: 0.883665
[60]	training's auc: 0.900381	valid_1's auc: 0.898486
[80]	training's auc: 0.910614	valid_1's auc: 0.90798
[100]	training's auc: 0.917541	valid_1's auc: 0.914046
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.917541	valid_1's auc: 0.914046
Validation AUC:0.9140463465852074
Validation f1 score:87.65106147541059


[32m[I 2021-04-18 14:33:15,042][0m Trial 12 finished with value: 0.9140463465852074 and parameters: {'learning_rate': 0.08966994027538293, 'colsample_bytree': 0.9796143618325008, 'subsample': 0.6126984124726019}. Best is trial 12 with value: 0.9140463465852074.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.829352	valid_1's auc: 0.828631
[40]	training's auc: 0.847458	valid_1's auc: 0.846181
[60]	training's auc: 0.866071	valid_1's auc: 0.864591
[80]	training's auc: 0.881153	valid_1's auc: 0.879608
[100]	training's auc: 0.891194	valid_1's auc: 0.889625
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.891194	valid_1's auc: 0.889625
Validation AUC:0.8896254249935519
Validation f1 score:84.82292082268773


[32m[I 2021-04-18 14:33:45,442][0m Trial 13 finished with value: 0.8896254249935519 and parameters: {'learning_rate': 0.04101702306505094, 'colsample_bytree': 0.9979031198940949, 'subsample': 0.5827438819907678}. Best is trial 12 with value: 0.9140463465852074.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.85291	valid_1's auc: 0.851725
[40]	training's auc: 0.883252	valid_1's auc: 0.881949
[60]	training's auc: 0.899714	valid_1's auc: 0.898053
[80]	training's auc: 0.9097	valid_1's auc: 0.907126
[100]	training's auc: 0.917344	valid_1's auc: 0.914511
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.917344	valid_1's auc: 0.914511
Validation AUC:0.9145109201692941
Validation f1 score:87.49614094039487


[32m[I 2021-04-18 14:34:17,408][0m Trial 14 finished with value: 0.9145109201692941 and parameters: {'learning_rate': 0.09007799284234508, 'colsample_bytree': 0.8988050942982481, 'subsample': 0.7162703009298693}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.827544	valid_1's auc: 0.826681
[40]	training's auc: 0.841097	valid_1's auc: 0.840121
[60]	training's auc: 0.85369	valid_1's auc: 0.852429
[80]	training's auc: 0.863518	valid_1's auc: 0.86197
[100]	training's auc: 0.87491	valid_1's auc: 0.873379
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.87491	valid_1's auc: 0.873379
Validation AUC:0.8733789971055222
Validation f1 score:83.13491240689078


[32m[I 2021-04-18 14:34:51,865][0m Trial 15 finished with value: 0.8733789971055222 and parameters: {'learning_rate': 0.02968863415646677, 'colsample_bytree': 0.8823723255293743, 'subsample': 0.7534811998784636}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.856943	valid_1's auc: 0.855448
[40]	training's auc: 0.885251	valid_1's auc: 0.883617
[60]	training's auc: 0.899461	valid_1's auc: 0.897584
[80]	training's auc: 0.909808	valid_1's auc: 0.907082
[100]	training's auc: 0.917133	valid_1's auc: 0.914069
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.917133	valid_1's auc: 0.914069
Validation AUC:0.9140690762100223
Validation f1 score:87.15287837724112


[32m[I 2021-04-18 14:35:24,480][0m Trial 16 finished with value: 0.9140690762100223 and parameters: {'learning_rate': 0.08624471219693579, 'colsample_bytree': 0.679608155804861, 'subsample': 0.7206145515603901}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.834834	valid_1's auc: 0.833076
[40]	training's auc: 0.843144	valid_1's auc: 0.841884
[60]	training's auc: 0.849946	valid_1's auc: 0.848763
[80]	training's auc: 0.85605	valid_1's auc: 0.85451
[100]	training's auc: 0.862778	valid_1's auc: 0.861226
Did not meet early stopping. Best iteration is:
[99]	training's auc: 0.862784	valid_1's auc: 0.861227
Validation AUC:0.8612267825182177
Validation f1 score:80.84866330519687


[32m[I 2021-04-18 14:36:11,922][0m Trial 17 finished with value: 0.8612267825182177 and parameters: {'learning_rate': 0.02181692490669437, 'colsample_bytree': 0.6991335285988934, 'subsample': 0.7308787091959236}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.823003	valid_1's auc: 0.821064
[40]	training's auc: 0.823755	valid_1's auc: 0.822504
[60]	training's auc: 0.820469	valid_1's auc: 0.819205
[80]	training's auc: 0.823254	valid_1's auc: 0.821984
[100]	training's auc: 0.824945	valid_1's auc: 0.823808
Did not meet early stopping. Best iteration is:
[23]	training's auc: 0.828319	valid_1's auc: 0.826827
Validation AUC:0.826827263304218
Validation f1 score:66.23195524719897


[32m[I 2021-04-18 14:36:43,332][0m Trial 18 finished with value: 0.826827263304218 and parameters: {'learning_rate': 0.0010050871156577655, 'colsample_bytree': 0.6434399507056876, 'subsample': 0.8052702352544749}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.841859	valid_1's auc: 0.840099
[40]	training's auc: 0.863432	valid_1's auc: 0.861502
[60]	training's auc: 0.872091	valid_1's auc: 0.869869
[80]	training's auc: 0.882032	valid_1's auc: 0.879235
[100]	training's auc: 0.89363	valid_1's auc: 0.890945
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.89363	valid_1's auc: 0.890945
Validation AUC:0.8909452226090759
Validation f1 score:83.97904758688418


[32m[I 2021-04-18 14:37:15,503][0m Trial 19 finished with value: 0.8909452226090759 and parameters: {'learning_rate': 0.04835474724925842, 'colsample_bytree': 0.48579442759410346, 'subsample': 0.9055597511476832}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.826001	valid_1's auc: 0.823456
[40]	training's auc: 0.843601	valid_1's auc: 0.840548
[60]	training's auc: 0.864669	valid_1's auc: 0.861394
[80]	training's auc: 0.880627	valid_1's auc: 0.877294
[100]	training's auc: 0.887491	valid_1's auc: 0.883513
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.887491	valid_1's auc: 0.883513
Validation AUC:0.8835126105756509
Validation f1 score:83.38742358254197


[32m[I 2021-04-18 14:37:55,534][0m Trial 20 finished with value: 0.8835126105756509 and parameters: {'learning_rate': 0.09872087091061325, 'colsample_bytree': 0.21847999618801583, 'subsample': 0.6911663593084817}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.855054	valid_1's auc: 0.853768
[40]	training's auc: 0.886889	valid_1's auc: 0.885543
[60]	training's auc: 0.901492	valid_1's auc: 0.899184
[80]	training's auc: 0.910247	valid_1's auc: 0.907584
[100]	training's auc: 0.917148	valid_1's auc: 0.913915
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.917148	valid_1's auc: 0.913915
Validation AUC:0.913914604179447
Validation f1 score:87.59614199454975


[32m[I 2021-04-18 14:39:01,688][0m Trial 21 finished with value: 0.913914604179447 and parameters: {'learning_rate': 0.08914901833251698, 'colsample_bytree': 0.8917995387023382, 'subsample': 0.5409769925146161}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.846059	valid_1's auc: 0.845443
[40]	training's auc: 0.863745	valid_1's auc: 0.862248
[60]	training's auc: 0.886145	valid_1's auc: 0.884332
[80]	training's auc: 0.898842	valid_1's auc: 0.896975
[100]	training's auc: 0.906429	valid_1's auc: 0.904197
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.906429	valid_1's auc: 0.904197
Validation AUC:0.9041974601146214
Validation f1 score:86.24631280480659


[32m[I 2021-04-18 14:39:48,839][0m Trial 22 finished with value: 0.9041974601146214 and parameters: {'learning_rate': 0.06050795486299561, 'colsample_bytree': 0.8261195425434218, 'subsample': 0.6391973767498468}. Best is trial 14 with value: 0.9145109201692941.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.859576	valid_1's auc: 0.858419
[40]	training's auc: 0.889776	valid_1's auc: 0.888189
[60]	training's auc: 0.904298	valid_1's auc: 0.901876
[80]	training's auc: 0.914034	valid_1's auc: 0.910679
[100]	training's auc: 0.920144	valid_1's auc: 0.916437
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.920144	valid_1's auc: 0.916437
Validation AUC:0.9164365758321581
Validation f1 score:87.95398589828419


[32m[I 2021-04-18 14:40:23,980][0m Trial 23 finished with value: 0.9164365758321581 and parameters: {'learning_rate': 0.09932232379257026, 'colsample_bytree': 0.9242832082547825, 'subsample': 0.7667178763425719}. Best is trial 23 with value: 0.9164365758321581.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.839922	valid_1's auc: 0.838332
[40]	training's auc: 0.851944	valid_1's auc: 0.850638
[60]	training's auc: 0.862549	valid_1's auc: 0.860698
[80]	training's auc: 0.876543	valid_1's auc: 0.874654
[100]	training's auc: 0.886998	valid_1's auc: 0.885148
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.886998	valid_1's auc: 0.885148
Validation AUC:0.885148249380665
Validation f1 score:83.8084668118542


[32m[I 2021-04-18 14:41:10,531][0m Trial 24 finished with value: 0.885148249380665 and parameters: {'learning_rate': 0.03735019030806094, 'colsample_bytree': 0.7272975784022456, 'subsample': 0.7643826172710312}. Best is trial 23 with value: 0.9164365758321581.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.84186	valid_1's auc: 0.841151
[40]	training's auc: 0.868666	valid_1's auc: 0.867361
[60]	training's auc: 0.887538	valid_1's auc: 0.886351
[80]	training's auc: 0.899187	valid_1's auc: 0.897666
[100]	training's auc: 0.90571	valid_1's auc: 0.903771
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.90571	valid_1's auc: 0.903771
Validation AUC:0.9037711289173991
Validation f1 score:86.47825127861746


[32m[I 2021-04-18 14:41:54,389][0m Trial 25 finished with value: 0.9037711289173991 and parameters: {'learning_rate': 0.06379261150552702, 'colsample_bytree': 0.9147026576732995, 'subsample': 0.8615172361853989}. Best is trial 23 with value: 0.9164365758321581.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.833915	valid_1's auc: 0.832209
[40]	training's auc: 0.848609	valid_1's auc: 0.847321
[60]	training's auc: 0.856776	valid_1's auc: 0.85529
[80]	training's auc: 0.864887	valid_1's auc: 0.863064
[100]	training's auc: 0.872921	valid_1's auc: 0.870785
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.872921	valid_1's auc: 0.870785
Validation AUC:0.8707853963434129
Validation f1 score:81.91506588137354


[32m[I 2021-04-18 14:42:59,117][0m Trial 26 finished with value: 0.8707853963434129 and parameters: {'learning_rate': 0.028644869161465074, 'colsample_bytree': 0.6393722028427833, 'subsample': 0.7067940268031445}. Best is trial 23 with value: 0.9164365758321581.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.857101	valid_1's auc: 0.85543
[40]	training's auc: 0.888778	valid_1's auc: 0.887339
[60]	training's auc: 0.905178	valid_1's auc: 0.903276
[80]	training's auc: 0.914046	valid_1's auc: 0.911473
[100]	training's auc: 0.920555	valid_1's auc: 0.917147
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.920555	valid_1's auc: 0.917147
Validation AUC:0.9171468688157753
Validation f1 score:87.89681137890868


[32m[I 2021-04-18 14:44:15,106][0m Trial 27 finished with value: 0.9171468688157753 and parameters: {'learning_rate': 0.09789100804571241, 'colsample_bytree': 0.8204468127501213, 'subsample': 0.7816910577829784}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.825129	valid_1's auc: 0.823788
[40]	training's auc: 0.833065	valid_1's auc: 0.832056
[60]	training's auc: 0.838578	valid_1's auc: 0.837603
[80]	training's auc: 0.845458	valid_1's auc: 0.844219
[100]	training's auc: 0.851269	valid_1's auc: 0.84989
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.851269	valid_1's auc: 0.84989
Validation AUC:0.8498896961905513
Validation f1 score:80.11450580397037


[32m[I 2021-04-18 14:45:21,375][0m Trial 28 finished with value: 0.8498896961905513 and parameters: {'learning_rate': 0.016018300963265283, 'colsample_bytree': 0.8090242984051945, 'subsample': 0.7872047294377177}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.817589	valid_1's auc: 0.816066
[40]	training's auc: 0.814012	valid_1's auc: 0.812718
[60]	training's auc: 0.812299	valid_1's auc: 0.810835
[80]	training's auc: 0.814582	valid_1's auc: 0.813186
[100]	training's auc: 0.81934	valid_1's auc: 0.818103
Did not meet early stopping. Best iteration is:
[23]	training's auc: 0.821385	valid_1's auc: 0.820354
Validation AUC:0.8203544252354271
Validation f1 score:66.23195524719897


[32m[I 2021-04-18 14:46:11,878][0m Trial 29 finished with value: 0.8203544252354271 and parameters: {'learning_rate': 0.0011820093925574019, 'colsample_bytree': 0.7681870159882112, 'subsample': 0.8920941317547995}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.842488	valid_1's auc: 0.841719
[40]	training's auc: 0.869265	valid_1's auc: 0.867991
[60]	training's auc: 0.889602	valid_1's auc: 0.888513
[80]	training's auc: 0.898874	valid_1's auc: 0.8972
[100]	training's auc: 0.906576	valid_1's auc: 0.904183
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.906576	valid_1's auc: 0.904183
Validation AUC:0.9041831059196701
Validation f1 score:86.27892558511476


[32m[I 2021-04-18 14:46:41,766][0m Trial 30 finished with value: 0.9041831059196701 and parameters: {'learning_rate': 0.06366279082364887, 'colsample_bytree': 0.9360905164789527, 'subsample': 0.9454685164172257}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.85344	valid_1's auc: 0.852351
[40]	training's auc: 0.889372	valid_1's auc: 0.887632
[60]	training's auc: 0.901803	valid_1's auc: 0.899586
[80]	training's auc: 0.912701	valid_1's auc: 0.909961
[100]	training's auc: 0.920133	valid_1's auc: 0.916621
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.920133	valid_1's auc: 0.916621
Validation AUC:0.9166210749556316
Validation f1 score:87.7279282465797


[32m[I 2021-04-18 14:47:14,604][0m Trial 31 finished with value: 0.9166210749556316 and parameters: {'learning_rate': 0.09634752607773946, 'colsample_bytree': 0.8669431725406348, 'subsample': 0.6670408965693327}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.855356	valid_1's auc: 0.854496
[40]	training's auc: 0.888616	valid_1's auc: 0.887062
[60]	training's auc: 0.903936	valid_1's auc: 0.9016
[80]	training's auc: 0.913545	valid_1's auc: 0.910686
[100]	training's auc: 0.920439	valid_1's auc: 0.916961
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.920439	valid_1's auc: 0.916961
Validation AUC:0.9169606802048456
Validation f1 score:87.87599225439253


[32m[I 2021-04-18 14:47:51,626][0m Trial 32 finished with value: 0.9169606802048456 and parameters: {'learning_rate': 0.0984052617227734, 'colsample_bytree': 0.8606086466611862, 'subsample': 0.6563684928689014}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.832809	valid_1's auc: 0.832544
[40]	training's auc: 0.848058	valid_1's auc: 0.847734
[60]	training's auc: 0.866572	valid_1's auc: 0.865811
[80]	training's auc: 0.881826	valid_1's auc: 0.880788
[100]	training's auc: 0.893259	valid_1's auc: 0.892173
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.893259	valid_1's auc: 0.892173
Validation AUC:0.8921728362218544
Validation f1 score:84.7278593726696


[32m[I 2021-04-18 14:48:20,576][0m Trial 33 finished with value: 0.8921728362218544 and parameters: {'learning_rate': 0.04137784591544719, 'colsample_bytree': 0.8464091393779001, 'subsample': 0.656194669733061}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.856979	valid_1's auc: 0.855391
[40]	training's auc: 0.891683	valid_1's auc: 0.889706
[60]	training's auc: 0.904329	valid_1's auc: 0.901573
[80]	training's auc: 0.913383	valid_1's auc: 0.910105
[100]	training's auc: 0.920239	valid_1's auc: 0.916583
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.920239	valid_1's auc: 0.916583
Validation AUC:0.9165834741870955
Validation f1 score:87.75746433690318


[32m[I 2021-04-18 14:49:05,995][0m Trial 34 finished with value: 0.9165834741870955 and parameters: {'learning_rate': 0.09831015180910678, 'colsample_bytree': 0.7658477939815294, 'subsample': 0.5651558948000437}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.823925	valid_1's auc: 0.822355
[40]	training's auc: 0.82731	valid_1's auc: 0.826027
[60]	training's auc: 0.830195	valid_1's auc: 0.828914
[80]	training's auc: 0.834172	valid_1's auc: 0.832908
[100]	training's auc: 0.839313	valid_1's auc: 0.838236
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.839313	valid_1's auc: 0.838236
Validation AUC:0.8382358578333062
Validation f1 score:78.47959630131344


[32m[I 2021-04-18 14:49:42,586][0m Trial 35 finished with value: 0.8382358578333062 and parameters: {'learning_rate': 0.007911826717862768, 'colsample_bytree': 0.75122993907536, 'subsample': 0.5757973644013197}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.847483	valid_1's auc: 0.845989
[40]	training's auc: 0.872405	valid_1's auc: 0.870646
[60]	training's auc: 0.892327	valid_1's auc: 0.890294
[80]	training's auc: 0.901932	valid_1's auc: 0.899347
[100]	training's auc: 0.910059	valid_1's auc: 0.907187
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.910059	valid_1's auc: 0.907187
Validation AUC:0.9071868469575477
Validation f1 score:86.49631006196294


[32m[I 2021-04-18 14:50:12,580][0m Trial 36 finished with value: 0.9071868469575477 and parameters: {'learning_rate': 0.06914826854701721, 'colsample_bytree': 0.7964480054110606, 'subsample': 0.5631482062042256}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.839863	valid_1's auc: 0.839179
[40]	training's auc: 0.859326	valid_1's auc: 0.85831
[60]	training's auc: 0.877826	valid_1's auc: 0.876824
[80]	training's auc: 0.891619	valid_1's auc: 0.89027
[100]	training's auc: 0.900644	valid_1's auc: 0.898972
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.900644	valid_1's auc: 0.898972
Validation AUC:0.8989723525963975
Validation f1 score:85.67782116452123


[32m[I 2021-04-18 14:51:13,137][0m Trial 37 finished with value: 0.8989723525963975 and parameters: {'learning_rate': 0.051852492624258374, 'colsample_bytree': 0.8438167587880953, 'subsample': 0.5195868295653016}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.856932	valid_1's auc: 0.855546
[40]	training's auc: 0.890737	valid_1's auc: 0.888823
[60]	training's auc: 0.904689	valid_1's auc: 0.901762
[80]	training's auc: 0.913132	valid_1's auc: 0.909635
[100]	training's auc: 0.920239	valid_1's auc: 0.916397
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.920239	valid_1's auc: 0.916397
Validation AUC:0.916396611715711
Validation f1 score:87.74703083007044


[32m[I 2021-04-18 14:51:53,746][0m Trial 38 finished with value: 0.916396611715711 and parameters: {'learning_rate': 0.0990172452413256, 'colsample_bytree': 0.7734126989852521, 'subsample': 0.676245554113416}. Best is trial 27 with value: 0.9171468688157753.[0m


Training until validation scores don't improve for 100 rounds
[20]	training's auc: 0.818636	valid_1's auc: 0.816786
[40]	training's auc: 0.819014	valid_1's auc: 0.817724
[60]	training's auc: 0.818987	valid_1's auc: 0.817784
[80]	training's auc: 0.823302	valid_1's auc: 0.822252
[100]	training's auc: 0.825835	valid_1's auc: 0.824951
Did not meet early stopping. Best iteration is:
[99]	training's auc: 0.825931	valid_1's auc: 0.825076
Validation AUC:0.8250757199636066
Validation f1 score:66.23195524719897


[32m[I 2021-04-18 14:52:23,940][0m Trial 39 finished with value: 0.8250757199636066 and parameters: {'learning_rate': 0.002048925984772501, 'colsample_bytree': 0.7025974840379042, 'subsample': 0.46060154599361697}. Best is trial 27 with value: 0.9171468688157753.[0m


score: 0.9171468688157753
Best hyperparameters: {'learning_rate': 0.09789100804571241, 'colsample_bytree': 0.8204468127501213, 'subsample': 0.7816910577829784}


## Model Validation on test data

In [263]:
# Load best model
lgb_best_model = mlflow.sklearn.load_model("./mlflow-run/5ae87c7ebe0346daac0d0b362d1327c7/artifacts/model")
X_test = X_test[features]

# Make prediction aganist Validation data
lgb_best_val_prediction = lgb_best_model.predict(X_test)

fpr, tpr, thresholds = roc_curve(y_test, lgb_best_val_prediction)
roc_auc = auc(fpr, tpr) # compute area under the curve
print("=====================================")
print("Test AUC:{}".format(roc_auc))
print("=====================================")   

Validation AUC:0.6707697944535749
