# <font color=#6026B2>**Projet 7 : Implémenter un modèle de scoring**</font>

## Import des librairies

In [1]:
import numpy as np
import pandas as pd
import gc
import time
import re
from contextlib import contextmanager
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.dummy import DummyClassifier
from lightgbm import LGBMClassifier, early_stopping

import mlflow
#import mlflow.sklearn
from mlflow.models import infer_signature

from bayes_opt import BayesianOptimization

#import warnings
#warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
# URL MLFlow
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

## EDA

## Feature Extraction

### Data train

In [3]:
num_rows = None
nan_as_category = False

# Read data and merge
train_df = pd.read_csv('Input_data/application_train.csv', nrows= num_rows)
test_df = pd.read_csv('Input_data/application_test.csv', nrows= num_rows)
print("Train samples: {}, test samples: {}".format(len(train_df), len(test_df)))
df = pd.concat([train_df, test_df]).reset_index(drop=True)
# Optional: Remove 4 applications with XNA CODE_GENDER (train set)
df = df[df['CODE_GENDER'] != 'XNA']

# Categorical features with Binary encode (0 or 1; two categories)
for bin_feature in ['CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY']:
    df[bin_feature], uniques = pd.factorize(df[bin_feature])

# Categorical features with One-Hot encode
original_columns = list(df.columns)
categorical_columns = [col for col in df.columns if df[col].dtype == 'object']

# instancie objet OHE
onehot_encoder = OneHotEncoder(sparse_output=False)

# Utilise OHE
df_new_cols = onehot_encoder.fit_transform(df[categorical_columns])

# Créez un DataFrame avec les nouvelles colonnes encodées
encoded_df = pd.DataFrame(df_new_cols, columns=onehot_encoder.get_feature_names_out(categorical_columns))

# Supprimer les colonnes catégorielles d'origine du DataFrame initial
df = df.drop(categorical_columns, axis=1)

# Concaténez le DataFrame d'origine avec le DataFrame contenant les nouvelles colonnes encodées
df = pd.concat([df, encoded_df], axis=1)

# NaN values for DAYS_EMPLOYED: 365.243 -> nan
df['DAYS_EMPLOYED'].replace(365243, np.nan, inplace= True)
# Some simple new features (percentages)
df['DAYS_EMPLOYED_PERC'] = df['DAYS_EMPLOYED'] / df['DAYS_BIRTH']
df['INCOME_CREDIT_PERC'] = df['AMT_INCOME_TOTAL'] / df['AMT_CREDIT']
df['INCOME_PER_PERSON'] = df['AMT_INCOME_TOTAL'] / df['CNT_FAM_MEMBERS']
df['ANNUITY_INCOME_PERC'] = df['AMT_ANNUITY'] / df['AMT_INCOME_TOTAL']
df['PAYMENT_RATE'] = df['AMT_ANNUITY'] / df['AMT_CREDIT']

Train samples: 307511, test samples: 48744


In [4]:
df.dtypes.value_counts()

float64    253
Name: count, dtype: int64

## Modelisation

### Baseline

In [5]:
# Initialiser expérience MLflow
mlflow.set_experiment("Baseline")  # Nommer l'expérience

# Divide in training/validation and test data
train_df = df[df['TARGET'].notnull()]
test_df = df[df['TARGET'].isnull()]

print (train_df.shape)
print (test_df.shape)

# Séparation de la variable cible et des features
y = train_df["TARGET"]
X = train_df.drop("TARGET", axis=1)

# Créer et entraîner un modèle de baseline avec DummyClassifier
dummy_clf = DummyClassifier(strategy="most_frequent")
dummy_clf.fit(X, y)

# Faire des prédictions sur les données d'entraînement
y_pred = dummy_clf.predict(X)
y_proba = dummy_clf.predict_proba(X)[:, 1]

# Calculer les métriques
accuracy = accuracy_score(y, y_pred)
recall = recall_score(y, y_pred)
precision = precision_score(y, y_pred)
f1 = f1_score(y, y_pred)
auc = roc_auc_score(y, y_proba)
conf_matrix = confusion_matrix(y, y_pred)

# Démarrer un nouveau run dans MLflow
with mlflow.start_run():
    
    # Enregistrer le modèle DummyClassifier
    mlflow.sklearn.log_model(dummy_clf, "dummy_classifier")

    # Enregistrer les métriques dans MLflow
    mlflow.log_metric("Accuracy", accuracy)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("F1 Score", f1)
    mlflow.log_metric("AUC", auc)

    # Enregistrer la matrice de confusion comme artefact
    with open("confusion_matrix.txt", "w") as f:
        f.write(str(conf_matrix))
    mlflow.log_artifact("confusion_matrix.txt")

    # Enregistrer la stratégie utilisée pour DummyClassifier comme paramètre
    mlflow.log_param("strategy", "most_frequent")

(307507, 253)
(48748, 253)


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:02:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run thundering-wolf-787 at: http://127.0.0.1:8080/#/experiments/926137080585531510/runs/a03151ed88c74ca59dbdff8747299566.
2024/09/19 19:02:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/926137080585531510.


### LGBMclassifier

In [6]:
# Initialiser expérience MLflow
mlflow.set_experiment("LGBMClassifier")  # Nommer l'expérience

x_train, x_valid, y_train, y_valid = train_test_split(X, y, test_size=0.15, stratify=y, random_state=88)
x_test = test_df
print('Train shape:', x_train.shape)
print('Test shape:', x_test.shape)
print('Valid shape:', x_valid.shape)

# remove caractères spéciaux
x_train = x_train.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
x_test = x_test.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
x_valid = x_valid.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))

# On initialise le dictionnaire des hyperparamètres du modèle avec des valeurs arbitraires
param_V0 = {
        'n_estimators': 300,
        'num_leaves': 15,
        'colsample_bytree': 0.8,
        'subsample': 0.8,
        'max_depth': 7,
        'reg_alpha': 0.1,
        'reg_lambda': 0.1,
        'min_split_gain': 0.01
        }

# Initialisation du modèle
clf = LGBMClassifier(**param_V0)

#Entraînement du modèle
clf.fit(x_train,y_train, eval_metric='auc')

# Prédictions sur le jeu de validation
y_pred = clf.predict(x_valid)
y_proba = clf.predict_proba(x_valid)[:, 1]

# Calculer les métriques sur le jeu de validation
accuracy = accuracy_score(y_valid, y_pred)
recall = recall_score(y_valid, y_pred)
precision = precision_score(y_valid, y_pred)
f1 = f1_score(y_valid, y_pred)
auc = roc_auc_score(y_valid, y_proba)
conf_matrix = confusion_matrix(y_valid, y_pred)

# Calculer l'AUROC avec validation croisée
auroc_cv = cross_val_score(clf, x_train, y_train, cv=5, scoring='roc_auc')
print(f"AUROC moyen en validation croisée: {auroc_cv.mean()}")

# Démarrer un nouveau run dans MLflow
with mlflow.start_run():
    
    # Enregistrer le modèle LGBMClassifier
    mlflow.sklearn.log_model(clf, "LGBMClassifier", registered_model_name="V0")

    # Enregistrer les métriques dans MLflow
    mlflow.log_metric("Accuracy", accuracy)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("F1 Score", f1)
    mlflow.log_metric("AUC", auc)
    mlflow.log_metric("AUROC_CV", auroc_cv.mean())  # AUROC avec validation croisée

    # Enregistrer la matrice de confusion comme artefact
    with open("confusion_matrix.txt", "w") as f:
        f.write(str(conf_matrix))
    mlflow.log_artifact("confusion_matrix.txt")

    # Enregistrer les hyperparamètres dans MLflow
    mlflow.log_params(param_V0)

Train shape: (261380, 252)
Test shape: (48748, 253)
Valid shape: (46127, 252)
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042615 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
[LightGBM] [Info] Number of positive: 16881, number of negative: 192223
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068359 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12975
[LightGBM] [Info] Number of

Registered model 'V0' already exists. Creating a new version of this model...
2024/09/19 19:03:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: V0, version 6
Created version '6' of model 'V0'.
2024/09/19 19:03:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run fortunate-bird-558 at: http://127.0.0.1:8080/#/experiments/900250469276927826/runs/9a7d691f0e674c78a75bf726ddce4640.
2024/09/19 19:03:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/900250469276927826.


In [7]:
# Initialiser expérience MLflow
mlflow.set_experiment("LGBMClassifier_CustomLoss")  # Nommer l'expérience

# Fonction de perte personnalisée (pondérer FN 10 fois plus que FP)
def custom_loss(y_true, y_pred):
    # Convertir les probabilités en prédictions binaires
    y_pred_binary = np.where(y_pred > 0.5, 1, 0)

    # Calcul de la matrice de confusion
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred_binary).ravel()

    # Calcul du coût pondéré
    cost = 10 * fn + fp  # Faux négatif = 10 fois plus coûteux que faux positif
    
    # Retourner le coût
    return 'custom_cost', cost, False

# Initialisation du modèle avec une fonction de perte personnalisée
clf = LGBMClassifier(**param_V0)

# Entraînement du modèle en utilisant la fonction de perte personnalisée
clf.fit(x_train, y_train, eval_set=[(x_valid, y_valid)], eval_metric=custom_loss, callbacks=[early_stopping(stopping_rounds=10)]) 

# Prédictions sur le jeu de validation
y_pred = clf.predict(x_valid)
y_proba = clf.predict_proba(x_valid)[:, 1]

# Calculer les métriques sur le jeu de validation
accuracy = accuracy_score(y_valid, y_pred)
recall = recall_score(y_valid, y_pred)
precision = precision_score(y_valid, y_pred)
f1 = f1_score(y_valid, y_pred)
auc = roc_auc_score(y_valid, y_proba)
conf_matrix = confusion_matrix(y_valid, y_pred)

# Calculer l'AUROC avec validation croisée
auroc_cv = cross_val_score(clf, x_train, y_train, cv=5, scoring='roc_auc')
print(f"AUROC moyen en validation croisée: {auroc_cv.mean()}")

# Démarrer un nouveau run dans MLflow
with mlflow.start_run():
    
    # Enregistrer le modèle LGBMClassifier
    mlflow.sklearn.log_model(clf, "LGBMClassifier_CustomLoss", registered_model_name="V0_CustomLoss")

    # Enregistrer les métriques dans MLflow
    mlflow.log_metric("Accuracy", accuracy)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("F1 Score", f1)
    mlflow.log_metric("AUC", auc)
    mlflow.log_metric("AUROC_CV", auroc_cv.mean())  # AUROC avec validation croisée

    # Enregistrer la matrice de confusion comme artefact
    with open("confusion_matrix.txt", "w") as f:
        f.write(str(conf_matrix))
    mlflow.log_artifact("confusion_matrix.txt")

    # Enregistrer les hyperparamètres dans MLflow
    mlflow.log_params(param_V0)


[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.172053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.27581	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Number of positive: 16881, number of negative: 192223
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.033837 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12975
[LightGBM] [Info] Number of data points in the train set: 209104, number of used features: 241
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467
[LightGBM] [Info] Number of positive: 16881, number of negative: 192223
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.050328 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12978
[LightGBM] [Info] Number of data points in the train set: 209104, number of used features: 242
[LightGBM]

Successfully registered model 'V0_CustomLoss'.
2024/09/19 19:03:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: V0_CustomLoss, version 1
Created version '1' of model 'V0_CustomLoss'.
2024/09/19 19:03:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run redolent-horse-507 at: http://127.0.0.1:8080/#/experiments/499045072515100634/runs/6b27cd0f637845f2a3f57d94f3305818.
2024/09/19 19:03:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/499045072515100634.


### Optimisation bayésienne

In [8]:
# Créer une liste pour stocker les résultats intermédiaires
intermediate_results = []

# Initialiser MLflow
mlflow.set_experiment("LightGBM-Model-Optimization")  # Nommer l'expérience

# Démarrer une nouvelle exécution
with mlflow.start_run(run_name="LightGBM BayesOpt - Hyperparameter Tuning") as run:

    # Fonction d'évaluation à optimiser
    def lgb_evaluate(n_estimators, num_leaves, colsample_bytree, subsample, max_depth, reg_alpha, reg_lambda, min_split_gain):
        # Définir le modèle avec les hyperparamètres à optimiser
        clf = LGBMClassifier(
            n_estimators=int(n_estimators),          
            num_leaves=int(num_leaves),              
            colsample_bytree=max(min(colsample_bytree, 1), 0), 
            subsample=max(min(subsample, 1), 0),     
            max_depth=int(max_depth),                
            reg_alpha=max(reg_alpha, 0),            
            reg_lambda=max(reg_lambda, 0),           
            min_split_gain=min_split_gain           
        )

        # Entraîner le modèle sur l'ensemble d'entraînement
        clf.fit(x_train, y_train, eval_set=[(x_valid, y_valid)], eval_metric=custom_loss, callbacks=[early_stopping(stopping_rounds=10)])

        # Prédire les probabilités sur le jeu de validation
        y_proba_val = clf.predict_proba(x_valid)[:, 1]
        y_pred = clf.predict(x_valid)

        # Calculer les taux de FP et FN
        recall = recall_score(y_valid, y_pred)
        precision = precision_score(y_valid, y_pred)
        f1 = f1_score(y_valid, y_pred)
        
        # Calculer l'AUC sur le jeu de validation
        auc = roc_auc_score(y_valid, y_proba_val)

        with mlflow.start_run(nested=True):
            # Loguer les métriques dans MLflow
            mlflow.log_metric("AUC", auc)
            mlflow.log_metric("Recall", recall)
            mlflow.log_metric("Precision", precision)
            mlflow.log_metric("F1 Score", f1)

            # Loguer les hyperparamètres dans MLflow
            mlflow.log_param("n_estimators", int(n_estimators))
            mlflow.log_param("num_leaves", int(num_leaves))
            mlflow.log_param("colsample_bytree", colsample_bytree)
            mlflow.log_param("subsample", subsample)
            mlflow.log_param("max_depth", int(max_depth))
            mlflow.log_param("reg_alpha", reg_alpha)
            mlflow.log_param("reg_lambda", reg_lambda)
            mlflow.log_param("min_split_gain", min_split_gain)
        
        return auc

    # Définir les limites de recherche pour chaque hyperparamètre
    param_V1 = {
                'n_estimators': (100, 1000),              # Nombre d'estimations entre 100 et 1000
                'num_leaves': (10, 100),                  # Nombre de feuilles entre 10 et 100
                'colsample_bytree': (0.5, 1),             # Pourcentage d'échantillons par arbre
                'subsample': (0.5, 1),                    # Pourcentage d'échantillons dans chaque arbre
                'max_depth': (5, 20),                     # Profondeur maximale de l'arbre
                'reg_alpha': (0, 1),                      # Régularisation L1
                'reg_lambda': (0, 1),                     # Régularisation L2
                'min_split_gain': (0, 0.1)                # Gain minimal pour diviser un nœud
                }

    # Instancier l'optimiseur bayésien
    optimizer = BayesianOptimization(f=lgb_evaluate, pbounds=param_V1, random_state=88)

    # Lancer l'optimisation bayésienne
    optimizer.maximize(init_points=5, n_iter=25)
    
    # Enregistrer les meilleurs hyperparamètres obtenus dans MLflow
    best_params = optimizer.max['params']
    mlflow.log_params(best_params)
    
    # Enregistrer la meilleure performance (AUC) obtenue dans MLflow
    mlflow.log_metric("Best AUC", optimizer.max['target'])

|   iter    |  target   | colsam... | max_depth | min_sp... | n_esti... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041206 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.274822	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:08:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run shivering-stork-605 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/4ea755ff31774e12908d7a122137acd0.
2024/09/19 19:08:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m1        [0m | [0m0.7167   [0m | [0m0.8238   [0m | [0m12.61    [0m | [0m0.05283  [0m | [0m906.7    [0m | [0m73.0     [0m | [0m0.7143   [0m | [0m0.7173   [0m | [0m0.6114   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.129263 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.276683	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:08:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run unique-gnat-226 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/d7a92e32058b44e094b1a6531c8f8c7b.
2024/09/19 19:08:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m2        [0m | [0m0.6844   [0m | [0m0.5876   [0m | [0m11.85    [0m | [0m0.09287  [0m | [0m108.9    [0m | [0m18.09    [0m | [0m0.8502   [0m | [0m0.4856   [0m | [0m0.9384   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.047646 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.276076	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:08:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run melodic-carp-670 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/834b7b7983884fccb9b888d7a38377fc.
2024/09/19 19:08:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m3        [0m | [0m0.6923   [0m | [0m0.6537   [0m | [0m10.82    [0m | [0m0.05814  [0m | [0m202.3    [0m | [0m79.11    [0m | [0m0.3126   [0m | [0m0.646    [0m | [0m0.5163   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062967 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.276018	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:08:40 INFO mlflow.tracking._tracking_service.client: 🏃 View run fearless-conch-990 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/6d97b14f28b540a28b303494e30a564f.
2024/09/19 19:08:40 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m4        [0m | [0m0.6912   [0m | [0m0.5832   [0m | [0m11.45    [0m | [0m0.02278  [0m | [0m969.7    [0m | [0m91.44    [0m | [0m0.8657   [0m | [0m0.03216  [0m | [0m0.8997   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051428 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.274707	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:08:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run honorable-yak-714 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/07a79f5dbc4b408688c4183128c9bae2.
2024/09/19 19:08:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m5        [0m | [0m0.7159   [0m | [0m0.9531   [0m | [0m15.56    [0m | [0m0.02297  [0m | [0m470.3    [0m | [0m98.08    [0m | [0m0.329    [0m | [0m0.5955   [0m | [0m0.7679   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041412 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.274635	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:08:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run respected-boar-694 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/3d57e703c5de4b7e969bc92aa98b5cf7.
2024/09/19 19:08:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [95m6        [0m | [95m0.7178   [0m | [95m0.8016   [0m | [95m15.22    [0m | [95m0.0949   [0m | [95m468.2    [0m | [95m96.61    [0m | [95m0.1101   [0m | [95m0.9599   [0m | [95m0.7319   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.180217 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.274707	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:08:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run clean-snake-506 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/77a77ba5cec747e384d66d307eac471c.
2024/09/19 19:08:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m7        [0m | [0m0.7169   [0m | [0m0.8209   [0m | [0m13.63    [0m | [0m0.001519 [0m | [0m439.3    [0m | [0m86.88    [0m | [0m0.8864   [0m | [0m0.4162   [0m | [0m0.6807   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.274881	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:08:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run loud-horse-201 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/95a23c52f5834ec1b45bb157b4211fdf.
2024/09/19 19:08:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m8        [0m | [0m0.7148   [0m | [0m0.7113   [0m | [0m9.334    [0m | [0m0.0408   [0m | [0m466.3    [0m | [0m60.66    [0m | [0m0.04462  [0m | [0m0.5043   [0m | [0m0.7964   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067390 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.274961	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run unequaled-skink-162 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/e23b97515d2b446b91c7dcd381b7734d.
2024/09/19 19:09:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m9        [0m | [0m0.7137   [0m | [0m0.9619   [0m | [0m16.32    [0m | [0m0.08567  [0m | [0m865.1    [0m | [0m61.56    [0m | [0m0.2863   [0m | [0m0.9691   [0m | [0m0.6945   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.046902 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.275393	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:04 INFO mlflow.tracking._tracking_service.client: 🏃 View run spiffy-rook-478 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/b49d189132894e4ebd4e51e439642f1b.
2024/09/19 19:09:04 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m10       [0m | [0m0.7059   [0m | [0m0.8837   [0m | [0m6.675    [0m | [0m0.02004  [0m | [0m899.8    [0m | [0m26.33    [0m | [0m0.5089   [0m | [0m0.2457   [0m | [0m0.8501   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.047827 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.275395	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run unique-penguin-340 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/48a646abea3346df8b349eb359a459f4.
2024/09/19 19:09:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m11       [0m | [0m0.7089   [0m | [0m0.887    [0m | [0m5.311    [0m | [0m0.007111 [0m | [0m885.6    [0m | [0m99.68    [0m | [0m0.6429   [0m | [0m0.4088   [0m | [0m0.5021   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.289215 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.27831	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run rare-owl-35 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/311af99e369444d6902ee390735f9b86.
2024/09/19 19:09:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m12       [0m | [0m0.6363   [0m | [0m0.5135   [0m | [0m14.63    [0m | [0m0.02852  [0m | [0m810.2    [0m | [0m61.63    [0m | [0m0.2155   [0m | [0m0.9324   [0m | [0m0.7381   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.036130 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.278689	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run silent-grouse-622 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/cb3347662861415ea603131534341351.
2024/09/19 19:09:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m13       [0m | [0m0.6237   [0m | [0m0.5256   [0m | [0m19.95    [0m | [0m0.02003  [0m | [0m401.5    [0m | [0m15.81    [0m | [0m0.9745   [0m | [0m0.3998   [0m | [0m0.5038   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.035570 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.278922	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run tasteful-bug-154 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/5216fa11bee94e13879617246f6a739d.
2024/09/19 19:09:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m14       [0m | [0m0.6249   [0m | [0m0.5      [0m | [0m5.0      [0m | [0m0.1      [0m | [0m551.7    [0m | [0m29.35    [0m | [0m0.0      [0m | [0m1.0      [0m | [0m0.5      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.050381 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.275962	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run colorful-gull-875 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/c3b2f99e88e643f58fa66cd2bf89255d.
2024/09/19 19:09:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m15       [0m | [0m0.6931   [0m | [0m0.7584   [0m | [0m19.48    [0m | [0m0.001115 [0m | [0m998.2    [0m | [0m12.3     [0m | [0m0.5706   [0m | [0m0.9847   [0m | [0m0.7855   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.061651 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.278922	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run unleashed-snail-976 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/2c99e4896f2f496f91916618ae98720e.
2024/09/19 19:09:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m16       [0m | [0m0.6245   [0m | [0m0.5      [0m | [0m5.0      [0m | [0m0.1      [0m | [0m291.9    [0m | [0m100.0    [0m | [0m0.0      [0m | [0m1.0      [0m | [0m1.0      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.124520 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.278922	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run bemused-stork-445 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/2cad5f8699864415a4fb46f44d77227f.
2024/09/19 19:09:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m17       [0m | [0m0.6245   [0m | [0m0.5      [0m | [0m5.0      [0m | [0m0.1      [0m | [0m100.0    [0m | [0m100.0    [0m | [0m0.0      [0m | [0m1.0      [0m | [0m1.0      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.039910 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.276085	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:34 INFO mlflow.tracking._tracking_service.client: 🏃 View run able-cow-21 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/488f9743dfa14144a21b507c72b80775.
2024/09/19 19:09:34 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m18       [0m | [0m0.6926   [0m | [0m1.0      [0m | [0m20.0     [0m | [0m0.0      [0m | [0m198.1    [0m | [0m10.0     [0m | [0m1.0      [0m | [0m1.0      [0m | [0m0.5      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.046922 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[71]	valid_0's binary_logloss: 0.245727	valid_0's custom_cost: 36434


2024/09/19 19:09:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run sedate-shrew-568 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/acc0495beb02462085d8b7f7d6756bc7.
2024/09/19 19:09:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [95m19       [0m | [95m0.7587   [0m | [95m1.0      [0m | [95m15.7     [0m | [95m0.005677 [0m | [95m669.9    [0m | [95m100.0    [0m | [95m0.0      [0m | [95m0.0      [0m | [95m1.0      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054947 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.27497	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run grandiose-auk-513 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/1091f1eaac8f41ac9c997fbb1e1857d4.
2024/09/19 19:09:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m20       [0m | [0m0.7137   [0m | [0m0.9828   [0m | [0m20.0     [0m | [0m0.0      [0m | [0m678.7    [0m | [0m58.53    [0m | [0m0.6726   [0m | [0m0.0      [0m | [0m0.9935   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.034738 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.278922	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:09:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run kindly-bird-267 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/2d6c9b449e434f72a99dd6c763948d79.
2024/09/19 19:09:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m21       [0m | [0m0.6245   [0m | [0m0.5      [0m | [0m5.0      [0m | [0m0.1      [0m | [0m633.6    [0m | [0m100.0    [0m | [0m0.0      [0m | [0m1.0      [0m | [0m0.5      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079671 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[51]	valid_0's binary_logloss: 0.245853	valid_0's custom_cost: 36636


2024/09/19 19:10:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run zealous-cub-33 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/b0bd4382daab478694eb92b5ae91ba9d.
2024/09/19 19:10:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [95m22       [0m | [95m0.7593   [0m | [95m1.0      [0m | [95m20.0     [0m | [95m0.0      [0m | [95m692.0    [0m | [95m99.96    [0m | [95m0.06498  [0m | [95m0.0      [0m | [95m1.0      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.052713 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.275402	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:10:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run enthused-ant-331 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/47fb3154fbfc4f51b687008bec5dab3c.
2024/09/19 19:10:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m23       [0m | [0m0.7089   [0m | [0m0.9393   [0m | [0m5.655    [0m | [0m0.06252  [0m | [0m684.8    [0m | [0m82.98    [0m | [0m0.5077   [0m | [0m0.5914   [0m | [0m0.6239   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.179140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.274677	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:10:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run auspicious-pig-424 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/bfc3de74ba524b749dd9f51aeda57727.
2024/09/19 19:10:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m24       [0m | [0m0.7172   [0m | [0m0.6921   [0m | [0m16.95    [0m | [0m0.03689  [0m | [0m726.6    [0m | [0m98.26    [0m | [0m0.3986   [0m | [0m0.6616   [0m | [0m0.9722   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.033466 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.279354	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:10:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run skillful-crane-759 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/0031fa91d68e4d31865d402e18e125ce.
2024/09/19 19:10:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m25       [0m | [0m0.5975   [0m | [0m0.5      [0m | [0m20.0     [0m | [0m0.0      [0m | [0m694.1    [0m | [0m10.0     [0m | [0m0.0      [0m | [0m0.0      [0m | [0m1.0      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043870 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.275241	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:10:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run rebellious-steed-644 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/7aa6f9b2e78046db8eb8d823d6fa4261.
2024/09/19 19:10:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m26       [0m | [0m0.7107   [0m | [0m0.9312   [0m | [0m16.73    [0m | [0m0.02083  [0m | [0m945.9    [0m | [0m39.75    [0m | [0m0.6323   [0m | [0m0.1979   [0m | [0m0.5715   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038618 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.278921	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:10:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run clean-croc-372 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/acd06fc02c784c2b8abe7691cb916eb6.
2024/09/19 19:10:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m27       [0m | [0m0.6245   [0m | [0m0.5      [0m | [0m5.0      [0m | [0m0.1      [0m | [0m392.6    [0m | [0m100.0    [0m | [0m0.0      [0m | [0m0.6099   [0m | [0m0.7121   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.083602 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.276071	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:10:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run clean-shark-600 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/b19b2b2b386f4e958bbf43a88b1fe48d.
2024/09/19 19:10:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m28       [0m | [0m0.6926   [0m | [0m1.0      [0m | [0m20.0     [0m | [0m0.0      [0m | [0m262.8    [0m | [0m10.0     [0m | [0m0.0      [0m | [0m0.0      [0m | [0m1.0      [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.057230 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.275959	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:10:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run clumsy-cat-499 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/8e0de41360c84af99ffe5ef6bdc84cd9.
2024/09/19 19:10:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m29       [0m | [0m0.6931   [0m | [0m0.7672   [0m | [0m16.61    [0m | [0m0.07404  [0m | [0m861.0    [0m | [0m12.83    [0m | [0m0.8428   [0m | [0m0.6179   [0m | [0m0.5994   [0m |
[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.047373 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.275398	valid_0's custom_cost: 37240


  _warn_prf(average, modifier, msg_start, len(result))
2024/09/19 19:10:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run victorious-hound-936 at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/460d6cefef484c99973a6283b72515ce.
2024/09/19 19:10:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.
2024/09/19 19:10:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run LightGBM BayesOpt - Hyperparameter Tuning at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/8f34d48226de4c7babfcb7e223d49a27.
2024/09/19 19:10:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.


| [0m30       [0m | [0m0.709    [0m | [0m1.0      [0m | [0m5.0      [0m | [0m0.1      [0m | [0m1e+03    [0m | [0m56.65    [0m | [0m0.0      [0m | [0m1.0      [0m | [0m1.0      [0m |


In [10]:
# Hyperparamètres optimisés
param_V2 = {
            'n_estimators': int(optimizer.max['params']['n_estimators']),
            'num_leaves': int(optimizer.max['params']['num_leaves']),
            'colsample_bytree': optimizer.max['params']['colsample_bytree'],
            'subsample': optimizer.max['params']['subsample'],
            'max_depth': int(optimizer.max['params']['max_depth']),
            'reg_alpha': optimizer.max['params']['reg_alpha'],
            'reg_lambda': optimizer.max['params']['reg_lambda'],
            'min_split_gain': optimizer.max['params']['min_split_gain']
            }

# Démarrer une session MLflow
with mlflow.start_run(run_name="LightGBM BayesOpt - Hyperparameter Tuning") as run:    
    
    # Instanciation du modèle avec les hyperparamètres optimisés
    clf_V2 = LGBMClassifier(**param_V2)

    # Entraîner le modèle avec les données d'entraînement
    clf_V2.fit(x_train, y_train, eval_set=[(x_valid, y_valid)], eval_metric=custom_loss, callbacks=[early_stopping(stopping_rounds=10)])

    # Prédire sur le jeu de validation et calculer l'AUC
    y_proba_val = clf_V2.predict_proba(x_valid)[:, 1]
    auc_score = roc_auc_score(y_valid, y_proba_val)

    print(f'AUC sur le jeu de validation: {auc_score}')
    
    # Prédire les classes sur le jeu de validation
    y_pred = clf_V2.predict(x_valid)

    # Obtenir la matrice de confusion
    tn, fp, fn, tp = confusion_matrix(y_valid, y_pred).ravel()

    # Calculer les taux de FP et FN
    taux_fp = fp / (fp + tn)
    taux_fn = fn / (fn + tp)
    recall = recall_score(y_valid, y_pred)
    precision = precision_score(y_valid, y_pred)
    specificity = tn / (tn + fp)
    f1 = f1_score(y_valid, y_pred)
    cm = confusion_matrix(y_valid, y_pred)

    print(f"Taux de Faux Positifs (FP): {taux_fp}")
    print(f"Taux de Faux Négatifs (FN): {taux_fn}")
    print(f"Recall (Rappel): {recall}")
    print(f"Precision (Précision): {precision}")
    print(f"Specificity (Spécificité): {specificity}")
    print(f"F1 Score: {f1}")
    
    # Enregistrer la matrice de confusion dans un fichier
    np.savetxt("confusion_matrix.txt", cm, fmt='%d')
    
    # Enregistrer les métriques dans MLflow
    mlflow.log_metric("AUC", auc_score)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("F1 Score", f1)

    # Enregistrer la matrice de confusion comme artefact
    mlflow.log_artifact("confusion_matrix.txt")

    # Enregistrer les hyperparamètres dans MLflow
    mlflow.log_param("n_estimators", int(optimizer.max['params']['n_estimators']))
    mlflow.log_param("num_leaves", int(optimizer.max['params']['num_leaves']))
    mlflow.log_param("colsample_bytree", optimizer.max['params']['colsample_bytree'])
    mlflow.log_param("subsample", optimizer.max['params']['subsample'])
    mlflow.log_param("max_depth", int(optimizer.max['params']['max_depth']))
    mlflow.log_param("reg_alpha", optimizer.max['params']['reg_alpha'])
    mlflow.log_param("reg_lambda", optimizer.max['params']['reg_lambda'])
    mlflow.log_param("min_split_gain", optimizer.max['params']['min_split_gain'])

    # Enregistrer le modèle LightGBM dans MLflow
    mlflow.lightgbm.log_model(clf_V2, "lightgbm_model")

[LightGBM] [Info] Number of positive: 21101, number of negative: 240279
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.048571 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12979
[LightGBM] [Info] Number of data points in the train set: 261380, number of used features: 243
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432480
[LightGBM] [Info] Start training from score -2.432480
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[51]	valid_0's binary_logloss: 0.245853	valid_0's custom_cost: 36636
AUC sur le jeu de validation: 0.7593065254158269
Taux de Faux Positifs (FP): 0.001556493644317619
Taux de Faux Négatifs (FN): 0.9820085929108485
Recall (Rappel): 0.01799140708915145
Precision (Précision): 0.5037593984962406
Specificity (Spécificité): 0.9984435063556824
F1 

2024/09/19 19:11:26 INFO mlflow.tracking._tracking_service.client: 🏃 View run LightGBM BayesOpt - Hyperparameter Tuning at: http://127.0.0.1:8080/#/experiments/681732145855945694/runs/90c435f414a944bf82e83c685b56ac45.
2024/09/19 19:11:26 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/681732145855945694.
