# Imports

In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report, roc_curve, auc
import joblib
import optuna
import mlflow
import matplotlib.pyplot as plt
import seaborn as sns
import shap
from tqdm import tqdm

Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


# Settings

In [24]:
# Exports
LOCAL_EXPORT_FOLDER_PATH='/content/exports'
# Exports > Manual check path 
LOCAL_EXPORT_MANUAL_CHECK_PATCH_FOLDER_PATH=LOCAL_EXPORT_FOLDER_PATH+'/manual_check_patch'
TARGET_COLUMNS=['TARGET',]
LOCAL_EXPORT_MODELIZATION_FOLDER_PATH=LOCAL_EXPORT_FOLDER_PATH+'/modelization'
MLFLOW_EXPERIMENT_NAME = 'generic_model_experiment'
LOCAL_EXPORT_MODELIZATION_EVALUATION_RESULT_FOLDER_PATH=LOCAL_EXPORT_FOLDER_PATH+'/modelization_evaluation'

# Export > General Settings
TESTING_MODE=True
TESTING_MODE_MAX_LINES=1000
TESTING_MODE_SUB_FOLDER_NAME='testing_data'
GENERAL_CHUNK_SIZE=100000


In [3]:
def display_head_of_files(base_path, file_extension='csv', chunk_size=1000):
    """
    Parcours tous les fichiers dans le répertoire donné et affiche les premières lignes de chaque fichier CSV.

    Args:
        base_path (str): Le chemin du répertoire de base où se trouvent les fichiers.
        file_extension (str): L'extension des fichiers à traiter (par défaut 'csv').
        chunk_size (int): Taille des chunks pour lire les fichiers partiellement.

    Returns:
        None
    """
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith(file_extension):
                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                
                # Lire les premières lignes du fichier CSV
                for chunk in pd.read_csv(file_path, chunksize=chunk_size):
                    print(chunk.head())
                    break  # On ne lit qu'un seul chunk pour obtenir le head
                print("\n" + "="*80 + "\n")

# Exemple d'utilisation

In [None]:
display_head_of_files(base_path=LOCAL_EXPORT_MANUAL_CHECK_PATCH_FOLDER_PATH)

# Models

In [50]:
# Liste des modèles et leurs hyperparamètres
models = {
    'Logistic Regression': {
        'model': LogisticRegression(),
        'params': {
            'C': np.logspace(-3, 3, 7),
            'solver': ['newton-cg', 'lbfgs', 'liblinear']
        }
    },
    'Random Forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [100, 200, 300],
            'max_depth': [10, 20, 30],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
    },
    'Gradient Boosting': {
        'model': GradientBoostingClassifier(),
        'params': {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 4, 5],
            'subsample': [0.8, 0.9, 1.0]
        }
    },
    'XGBoost': {
        'model': xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
        'params': {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 4, 5],
            'colsample_bytree': [0.3, 0.7]
        }
    },
    # 'LightGBM': {
    #     'model': lgb.LGBMClassifier(),
    #     'params': {
    #         'n_estimators': [100, 200, 300],
    #         'learning_rate': [0.01, 0.05, 0.1],
    #         'num_leaves': [31, 62, 127],
    #         'boosting_type': ['gbdt', 'dart']
    #     }
    # },
    # 'CatBoost': {
    #     'model': cb.CatBoostClassifier(verbose=0),
    #     'params': {
    #         'iterations': [100, 200, 300],
    #         'learning_rate': [0.01, 0.05, 0.1],
    #         'depth': [3, 4, 5],
    #         'l2_leaf_reg': [3, 5, 7]
    #     }
    # },
    # 'SVM': {
    #     'model': SVC(probability=True),
    #     'params': {
    #         'C': np.logspace(-3, 3, 7),
    #         'kernel': ['linear', 'rbf', 'poly'],
    #         'degree': [3, 4, 5]
    #     }
    # },
    # 'KNN': {
    #     'model': KNeighborsClassifier(),
    #     'params': {
    #         'n_neighbors': [5, 10, 20],
    #         'weights': ['uniform', 'distance'],
    #         'metric': ['euclidean', 'manhattan']
    #     }
    # },
    # 'Neural Network': {
    #     'model': MLPClassifier(max_iter=500),
    #     'params': {
    #         'hidden_layer_sizes': ['50,50', '100', '100,50'],
    #         'activation': ['tanh', 'relu'],
    #         'alpha': [0.0001, 0.001, 0.01]
    #     }
    # }
}

# Entrainment

## Hyperparameter optimization method

In [6]:
# Fonction d'optimisation des hyperparamètres avec Optuna
def objective(trial, X_train, y_train):
    classifier_name = trial.suggest_categorical('classifier', list(models.keys()))
    classifier_info = models[classifier_name]
    classifier = classifier_info['model']
    params = classifier_info['params']

    trial_params = {}
    for param, values in params.items():
        if param == 'hidden_layer_sizes':
            hidden_layer_size_str = trial.suggest_categorical(param, values)
            trial_params[param] = tuple(map(int, hidden_layer_size_str.split(',')))
        elif isinstance(values[0], int):
            trial_params[param] = trial.suggest_int(param, min(values), max(values))
        elif isinstance(values[0], float):
            trial_params[param] = trial.suggest_float(param, min(values), max(values))
        else:
            trial_params[param] = trial.suggest_categorical(param, values)
    
    classifier.set_params(**trial_params)
    
    pipeline = Pipeline(steps=[
        ('classifier', classifier)
    ])
    
    score = cross_val_score(pipeline, X_train, y_train, n_jobs=-1, cv=5, scoring='accuracy', error_score='raise')
    accuracy = score.mean()
    return accuracy

## Optuna optimize hyperameters

In [7]:
# Fonction d'optimisation des hyperparamètres avec Optuna
def objective(trial, X_train, y_train):
    classifier_name = trial.suggest_categorical('classifier', list(models.keys()))
    classifier_info = models[classifier_name]
    classifier = classifier_info['model']
    params = classifier_info['params']

    trial_params = {}
    for param, values in params.items():
        if param == 'hidden_layer_sizes':
            hidden_layer_size_str = trial.suggest_categorical(param, values)
            trial_params[param] = tuple(map(int, hidden_layer_size_str.split(',')))
        elif isinstance(values[0], int):
            trial_params[param] = trial.suggest_int(param, min(values), max(values))
        elif isinstance(values[0], float):
            trial_params[param] = trial.suggest_float(param, min(values), max(values))
        else:
            trial_params[param] = trial.suggest_categorical(param, values)
    
    classifier.set_params(**trial_params)
    
    pipeline = Pipeline(steps=[
        ('classifier', classifier)
    ])
    
    score = cross_val_score(pipeline, X_train, y_train, n_jobs=-1, cv=5, scoring='accuracy', error_score='raise')
    accuracy = score.mean()
    return accuracy

# Re-entrainment method for best increase

In [8]:
# Fonction pour ré-entraîner un modèle jusqu'à atteindre une amélioration significative
def retrain_model(best_pipeline, X_train, y_train, X_test, y_test, threshold=0.01, max_iter=10):
    previous_score = 0
    for iteration in range(max_iter):
        best_pipeline.fit(X_train, y_train)
        y_pred = best_pipeline.predict(X_test)
        current_score = accuracy_score(y_test, y_pred)
        improvement = current_score - previous_score
        if improvement < threshold:
            break
        previous_score = current_score
        print(f"Iteration {iteration + 1}, Accuracy: {current_score}, Improvement: {improvement}")
    return best_pipeline, current_score

## Principal to entrain model

In [57]:
# Fonction d'optimisation des hyperparamètres avec Optuna
def objective(trial, X_train, y_train):
    classifier_name = trial.suggest_categorical('classifier', list(models.keys()))
    classifier_info = models[classifier_name]
    classifier = classifier_info['model']
    params = classifier_info['params']

    trial_params = {}
    for param, values in params.items():
        if param == 'hidden_layer_sizes':
            hidden_layer_size_str = trial.suggest_categorical(param, values)
            trial_params[param] = tuple(map(int, hidden_layer_size_str.split(',')))
        elif isinstance(values[0], int):
            trial_params[param] = trial.suggest_int(param, min(values), max(values))
        elif isinstance(values[0], float):
            trial_params[param] = trial.suggest_float(param, min(values), max(values))
        else:
            trial_params[param] = trial.suggest_categorical(param, values)
    
    classifier.set_params(**trial_params)
    
    pipeline = Pipeline(steps=[
        ('classifier', classifier)
    ])
    
    score = cross_val_score(pipeline, X_train, y_train, n_jobs=-1, cv=5, scoring='accuracy', error_score='raise')
    accuracy = score.mean()
    return accuracy

# Fonction pour ré-entraîner un modèle jusqu'à atteindre une amélioration significative
def retrain_model(best_pipeline, X_train, y_train, threshold=0.01, max_iter=10):
    previous_score = 0
    for iteration in range(max_iter):
        best_pipeline.fit(X_train, y_train)
        y_pred = best_pipeline.predict(X_train)
        current_score = accuracy_score(y_train, y_pred)
        improvement = current_score - previous_score
        if improvement < threshold:
            break
        previous_score = current_score
        print(f"Iteration {iteration + 1}, Accuracy: {current_score}, Improvement: {improvement}")
    return best_pipeline, current_score

def train_and_evaluate_models(base_path, output_folder, target_columns, max_features=5, testing=False, chunk_size=1000, testing_sub_path_name='test'):
    all_scores = {}
    total_files = sum([len(files) for r, d, files in os.walk(base_path) if any(f.endswith('application_train.csv') for f in files)])
    pbar = tqdm(total=total_files, desc="Processing files")

    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('application_train.csv'):
                file_path = os.path.join(root, file)
                test_file_path = file_path.replace('application_train.csv', 'application_test.csv')

                print(f"Processing file: {file_path}")

                # Lire les fichiers CSV par chunks
                for chunk in pd.read_csv(file_path, chunksize=chunk_size):
                    test_data = pd.read_csv(test_file_path)

                    for target_column in target_columns:
                        print(f"Using target column: {target_column}")

                        # Calculer les corrélations et sélectionner les meilleures caractéristiques
                        correlations = chunk.corr()[target_column].abs().sort_values(ascending=False)
                        top_features = correlations.index[1:max_features+1].tolist()

                        # Séparation des features et de la cible
                        X_train = chunk[top_features]
                        y_train = chunk[target_column]

                        X_test = test_data[top_features]
                        y_test = test_data[target_column] if target_column in test_data.columns else None

                        # Vérifier le nombre de classes dans y_train
                        if y_train.nunique() < 2:
                            print(f"Skipping optimization for {target_column} as it contains only one class in the training data.")
                            continue

                        # Optimisation des hyperparamètres avec Optuna
                        study = optuna.create_study(direction='maximize')
                        study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)

                        print('Number of finished trials: ', len(study.trials))
                        print('Best trial:')
                        trial = study.best_trial

                        print('  Value: ', trial.value)
                        print('  Params: ')
                        for key, value in trial.params.items():
                            print('    {}: {}'.format(key, value))

                        # Entraîner le meilleur modèle
                        best_classifier_name = trial.params['classifier']
                        best_classifier_info = models[best_classifier_name]
                        best_classifier = best_classifier_info['model']
                        best_params = {k: v for k, v in trial.params.items() if k != 'classifier'}

                        best_classifier.set_params(**best_params)

                        # Création du pipeline avec le meilleur modèle
                        best_pipeline = Pipeline(steps=[
                            ('classifier', best_classifier)
                        ])

                        # Retrain the model with the entire training dataset
                        best_pipeline, _ = retrain_model(best_pipeline, X_train, y_train)

                        # Déterminer le chemin de sortie
                        relative_path = os.path.relpath(root, base_path)
                        
                        output_dir = os.path.join(output_folder, relative_path, target_column)

                        if not os.path.exists(output_dir):
                            os.makedirs(output_dir)

                        model_path = os.path.join(output_dir, f'best_{best_classifier_name}_model.pkl')
                        joblib.dump(best_pipeline, model_path)

                        # Enregistrer les données de test
                        test_data_path = os.path.join(output_dir, 'test_data.csv')
                        test_data[top_features].to_csv(test_data_path, index=False)

                        if y_test is not None:
                            # Évaluation du modèle
                            y_pred = best_pipeline.predict(X_test)
                            y_pred_proba = best_pipeline.predict_proba(X_test)[:, 1]  # Only use the probability for the positive class
                            accuracy = accuracy_score(y_test, y_pred)
                            roc_auc = roc_auc_score(y_test, y_pred_proba)

                            print(f"Accuracy: {accuracy}")
                            print(f"ROC AUC: {roc_auc}")
                            print(confusion_matrix(y_test, y_pred))
                            print(classification_report(y_test, y_pred))

                            # Logging avec mlflow
                            mlflow.set_experiment('credit_scoring')
                            with mlflow.start_run():
                                mlflow.log_params(trial.params)
                                mlflow.log_metric('accuracy', accuracy)
                                mlflow.log_metric('roc_auc', roc_auc)
                                mlflow.sklearn.log_model(best_pipeline, 'model')
                                mlflow.log_artifact(file_path)
                                mlflow.log_artifact(test_file_path)
                                mlflow.log_artifact(test_data_path)

                        print(f'Model saved at {model_path}')
                
                pbar.update(1)

    pbar.close()

    # Visualisation des scores de validation croisée
    model_names = list(all_scores.keys())
    model_scores = [score for scores in all_scores.values() for score in scores]
    model_names_repeated = [model for model in model_names for _ in range(len(all_scores[model]))]

    plot_cross_val_scores(model_scores, model_names_repeated, output_folder)

# Fonction de visualisation pour les scores de validation croisée
def plot_cross_val_scores(model_scores, model_names, output_dir):
    plt.figure(figsize=(12, 8))
    sns.boxplot(x=model_names, y=model_scores)
    plt.xlabel('Model')
    plt.ylabel('Cross-Validation Score')
    plt.title('Model Comparison - Cross-Validation Scores')
    plt.xticks(rotation=45)
    plt.savefig(os.path.join(output_dir, 'cross_val_scores.png'))
    plt.close()

## Calling modelization method

In [58]:
train_and_evaluate_models(
    base_path=LOCAL_EXPORT_MANUAL_CHECK_PATCH_FOLDER_PATH, 
    output_folder=LOCAL_EXPORT_MODELIZATION_FOLDER_PATH, 
    target_columns=TARGET_COLUMNS, 
    testing=TESTING_MODE, 
    chunk_size=GENERAL_CHUNK_SIZE, 
    testing_sub_path_name=TESTING_MODE_SUB_FOLDER_NAME)


Processing files:   0%|          | 0/270 [00:00<?, ?it/s][A[I 2024-07-08 15:30:13,393] A new study created in memory with name: no-name-a8041cc1-816c-4480-b8a2-ddcea49b3218


Processing file: /content/exports/manual_check_patch/testing_data/mean/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:30:14,673] Trial 0 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 552.4138310166662, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:30:15,252] Trial 1 finished with value: 0.9136363636363635 and parameters: {'classifier': 'XGBoost', 'n_estimators': 147, 'learning_rate': 0.09283774331258096, 'max_depth': 4, 'colsample_bytree': 0.44408751279305125}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:30:15,858] Trial 2 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 668.7020372706857, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:30:16,319] Trial 3 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 162.92946738209884, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:30:16,590] Trial 4 finish

Number of finished trials:  100
Best trial:
  Value:  0.9227272727272726
  Params: 
    classifier: Logistic Regression
    C: 1.0328132125486604
    solver: newton-cg
Iteration 1, Accuracy: 0.9227272727272727, Improvement: 0.9227272727272727
Model saved at /content/exports/modelization/testing_data/mean/LOF/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:30:27,144] Trial 0 finished with value: 0.909090909090909 and parameters: {'classifier': 'Random Forest', 'n_estimators': 167, 'max_depth': 25, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.909090909090909.
[I 2024-07-08 15:30:27,170] Trial 1 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 937.6027098298962, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.9181818181818182.
[I 2024-07-08 15:30:27,237] Trial 2 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 141.35471434589877, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9181818181818182.
[I 2024-07-08 15:30:27,436] Trial 3 finished with value: 0.9 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 188, 'learning_rate': 0.01286048793154563, 'max_depth': 3, 'subsample': 0.9552643815886475}. Best is trial 1 with value: 0.9181818181818182.
[I 2024-07-08 15:30:27,70

Number of finished trials:  100
Best trial:
  Value:  0.9272727272727274
  Params: 
    classifier: Logistic Regression
    C: 19.430483442269065
    solver: newton-cg
Iteration 1, Accuracy: 0.9272727272727272, Improvement: 0.9272727272727272
Model saved at /content/exports/modelization/testing_data/mean/LOF/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:30:36,650] Trial 1 finished with value: 0.9045454545454545 and parameters: {'classifier': 'XGBoost', 'n_estimators': 247, 'learning_rate': 0.06740201908146248, 'max_depth': 3, 'colsample_bytree': 0.6906721520846547}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:30:36,899] Trial 2 finished with value: 0.9136363636363637 and parameters: {'classifier': 'Random Forest', 'n_estimators': 205, 'max_depth': 18, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:30:36,947] Trial 3 finished with value: 0.909090909090909 and parameters: {'classifier': 'XGBoost', 'n_estimators': 157, 'learning_rate': 0.03309541879296128, 'max_depth': 4, 'colsample_bytree': 0.42597310093107676}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:30:36,963] Trial 4 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 649.7018201659434, 'solver': 'liblinear'}. 

Number of finished trials:  100
Best trial:
  Value:  0.9181818181818182
  Params: 
    classifier: Logistic Regression
    C: 786.6728040476513
    solver: lbfgs
Iteration 1, Accuracy: 0.9363636363636364, Improvement: 0.9363636363636364
Model saved at /content/exports/modelization/testing_data/mean/LOF/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in th

[I 2024-07-08 15:30:46,274] A new study created in memory with name: no-name-9094ae53-01c9-4953-a981-971b9fd0f0a2
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:30:46,302] Trial 0 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 715.8974153434767, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:30:46,350] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 131, 'learning_rate': 0.040979146145242756, 'max_depth': 4, 'colsample_bytree': 0.6443575416547911}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.


Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/MAD/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/DBSCAN/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:30:46,488] Trial 2 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 204, 'learning_rate': 0.05936803732353771, 'max_depth': 3, 'subsample': 0.9833554306553527}. Best is trial 2 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:30:46,526] Trial 3 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 115, 'learning_rate': 0.01763535925993645, 'max_depth': 5, 'colsample_bytree': 0.6008520768114232}. Best is trial 2 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:30:46,576] Trial 4 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 225, 'learning_rate': 0.056709882150631954, 'max_depth': 3, 'colsample_bytree': 0.6519782959049467}. Best is trial 2 with value: 0.7333

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 294
    max_depth: 30
    min_samples_split: 7
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667



Processing files:   4%|▎         | 10/270 [00:58<22:14,  5.13s/it][A[I 2024-07-08 15:31:11,715] A new study created in memory with name: no-name-bd1aaf5b-e4d2-4a3e-808d-2829708f0b38
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:31:11,863] Trial 0 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 199, 'learning_rate': 0.07263178748752286, 'max_depth': 4, 'subsample': 0.9647279227631784}. Best is trial 0 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:31:11,879] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 717.2703569342881, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7333333333333333.


Model saved at /content/exports/modelization/testing_data/mean/DBSCAN/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:31:12,117] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 217, 'max_depth': 17, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:31:12,266] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 128, 'max_depth': 26, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:31:12,484] Trial 4 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 188, 'max_depth': 28, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 0 with value: 

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Logistic Regression
    C: 7.786858654882167
    solver: lbfgs
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667
Model saved at /content/exports/modelization/testing_data/mean/DBSCAN/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:31:19,690] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 219, 'max_depth': 17, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:31:19,729] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 182, 'learning_rate': 0.03379904296913321, 'max_depth': 5, 'colsample_bytree': 0.5064892456462735}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:31:20,039] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 236, 'max_depth': 27, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Gradient Boosting
    n_estimators: 169
    learning_rate: 0.04500906535232826
    max_depth: 3
    subsample: 0.9983084587871486
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333
Model saved at /content/exports/modelization/testing_data/mean/DBSCAN/onehot/TARGET/best_Gradient Boosting_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/IQR/label/application_train.csv


[I 2024-07-08 15:31:32,120] A new study created in memory with name: no-name-c561a985-ac55-4a27-9d26-daa98eb8ba1b


Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/Isolation Forest/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:31:32,309] Trial 0 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 149, 'max_depth': 16, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:31:32,659] Trial 1 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 287, 'max_depth': 19, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:31:32,929] Trial 2 finished with value: 0.9466165413533835 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 285, 'learning_rate': 0.02770291392913151, 'max_depth': 4, 'subsample': 0.8728289544921772}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:31:33,178] Trial 3 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 204, 'max_depth': 18, 'min_samples_split': 8, 'min_samples_

Number of finished trials:  100
Best trial:
  Value:  0.9537593984962406
  Params: 
    classifier: Random Forest
    n_estimators: 149
    max_depth: 16
    min_samples_split: 6
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.9537366548042705, Improvement: 0.9537366548042705



Processing files:   6%|▌         | 16/270 [01:34<22:44,  5.37s/it][A[I 2024-07-08 15:31:47,878] A new study created in memory with name: no-name-f9b9c273-b5c1-45a2-959d-ba704d3f8143


Model saved at /content/exports/modelization/testing_data/mean/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:31:48,148] Trial 0 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 223, 'max_depth': 13, 'min_samples_split': 3, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:31:48,489] Trial 1 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 280, 'max_depth': 24, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:31:48,769] Trial 2 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 241, 'max_depth': 29, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:31:48,937] Trial 3 finished with value: 0.9501879699248119 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 155, 'learning_rate': 0.012836254460349016, 'max_depth': 5, 'subsample': 0.9532678

Number of finished trials:  100
Best trial:
  Value:  0.9537593984962406
  Params: 
    classifier: Random Forest
    n_estimators: 223
    max_depth: 13
    min_samples_split: 3
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9679715302491103, Improvement: 0.9679715302491103



Processing files:   6%|▋         | 17/270 [01:55<33:18,  7.90s/it][A[I 2024-07-08 15:32:09,333] A new study created in memory with name: no-name-8e2033b7-5a81-45a1-8087-52456e3cb594
[I 2024-07-08 15:32:09,382] Trial 0 finished with value: 0.9537593984962406 and parameters: {'classifier': 'XGBoost', 'n_estimators': 215, 'learning_rate': 0.01972990393237734, 'max_depth': 3, 'colsample_bytree': 0.3493511118147881}. Best is trial 0 with value: 0.9537593984962406.


Model saved at /content/exports/modelization/testing_data/mean/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:32:09,520] Trial 1 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 110, 'max_depth': 24, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:32:09,535] Trial 2 finished with value: 0.9501879699248119 and parameters: {'classifier': 'Logistic Regression', 'C': 105.69160770415752, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:32:09,763] Trial 3 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 187, 'max_depth': 13, 'min_samples_split': 9, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 15:32:10,002] Trial 4 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 196, 'max_depth': 24, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9537593984962406

Number of finished trials:  100
Best trial:
  Value:  0.9537593984962406
  Params: 
    classifier: XGBoost
    n_estimators: 215
    learning_rate: 0.01972990393237734
    max_depth: 3
    colsample_bytree: 0.3493511118147881


Parameters: { "use_label_encoder" } are not used.


Processing files:   7%|▋         | 18/270 [02:10<38:03,  9.06s/it][A[I 2024-07-08 15:32:23,531] A new study created in memory with name: no-name-220dd1fe-653c-4dcd-b603-a910493bc5b0


Iteration 1, Accuracy: 0.9537366548042705, Improvement: 0.9537366548042705
Model saved at /content/exports/modelization/testing_data/mean/Isolation Forest/onehot/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:32:23,821] Trial 0 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 253, 'max_depth': 27, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.952.
[I 2024-07-08 15:32:23,971] Trial 1 finished with value: 0.944 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 144, 'learning_rate': 0.03960985645604117, 'max_depth': 3, 'subsample': 0.8138089465900725}. Best is trial 0 with value: 0.952.
[I 2024-07-08 15:32:24,029] Trial 2 finished with value: 0.944 and parameters: {'classifier': 'XGBoost', 'n_estimators': 298, 'learning_rate': 0.043326012783391724, 'max_depth': 4, 'colsample_bytree': 0.5564604764389565}. Best is trial 0 with value: 0.952.
[I 2024-07-08 15:32:24,258] Trial 3 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 198, 'max_depth': 24, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.952.
[I 2024-07-08 15:32

Number of finished trials:  100
Best trial:
  Value:  0.952
  Params: 
    classifier: Random Forest
    n_estimators: 253
    max_depth: 27
    min_samples_split: 10
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.952, Improvement: 0.952



Processing files:   7%|▋         | 19/270 [02:29<46:48, 11.19s/it][A[I 2024-07-08 15:32:42,847] A new study created in memory with name: no-name-e802c8aa-bd07-4327-9ec7-e3c944276500
[I 2024-07-08 15:32:42,896] Trial 0 finished with value: 0.944 and parameters: {'classifier': 'XGBoost', 'n_estimators': 252, 'learning_rate': 0.04761013263357003, 'max_depth': 4, 'colsample_bytree': 0.556026138380035}. Best is trial 0 with value: 0.944.


Model saved at /content/exports/modelization/testing_data/bfill/LOF/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:32:43,074] Trial 1 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 157, 'max_depth': 27, 'min_samples_split': 8, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.952.
[I 2024-07-08 15:32:43,221] Trial 2 finished with value: 0.944 and parameters: {'classifier': 'Random Forest', 'n_estimators': 122, 'max_depth': 23, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.952.
[I 2024-07-08 15:32:43,349] Trial 3 finished with value: 0.9280000000000002 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 171, 'learning_rate': 0.07355668653998422, 'max_depth': 3, 'subsample': 0.9994585940347551}. Best is trial 1 with value: 0.952.
[I 2024-07-08 15:32:43,527] Trial 4 finished with value: 0.952 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 248, 'learning_rate': 0.07642067762055137, 'max_depth': 3, 'subsample': 0.8450203193991734}. Best is trial 1 with value: 0.952.
[I 20

Number of finished trials:  100
Best trial:
  Value:  0.952
  Params: 
    classifier: Random Forest
    n_estimators: 157
    max_depth: 27
    min_samples_split: 8
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.952, Improvement: 0.952



Processing files:   7%|▋         | 20/270 [02:50<56:20, 13.52s/it][A[I 2024-07-08 15:33:04,233] A new study created in memory with name: no-name-e2337335-c4d5-4c27-a335-9affd9592f48
[I 2024-07-08 15:33:04,250] Trial 0 finished with value: 0.952 and parameters: {'classifier': 'Logistic Regression', 'C': 636.9230080241356, 'solver': 'liblinear'}. Best is trial 0 with value: 0.952.


Model saved at /content/exports/modelization/testing_data/bfill/LOF/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:33:04,540] Trial 1 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 246, 'max_depth': 28, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.952.
[I 2024-07-08 15:33:04,860] Trial 2 finished with value: 0.96 and parameters: {'classifier': 'Random Forest', 'n_estimators': 267, 'max_depth': 10, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.96.
[I 2024-07-08 15:33:04,978] Trial 3 finished with value: 0.944 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 139, 'learning_rate': 0.05158236319005092, 'max_depth': 4, 'subsample': 0.8641627419589359}. Best is trial 2 with value: 0.96.
[I 2024-07-08 15:33:05,288] Trial 4 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 260, 'max_depth': 21, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.96.
[I 2024-07-08 15:33:05,357] Trial 5 finished with

Number of finished trials:  100
Best trial:
  Value:  0.968
  Params: 
    classifier: XGBoost
    n_estimators: 244
    learning_rate: 0.05337590637797074
    max_depth: 5
    colsample_bytree: 0.625261745513221


Parameters: { "use_label_encoder" } are not used.


Processing files:   8%|▊         | 21/270 [03:01<53:33, 12.91s/it][A

Iteration 1, Accuracy: 0.968, Improvement: 0.968
Model saved at /content/exports/modelization/testing_data/bfill/LOF/onehot/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Z-Score/label/application_train.csv
Using target column: TARGET



Processing files:   9%|▉         | 24/270 [03:02<25:50,  6.30s/it][A[I 2024-07-08 15:33:15,381] A new study created in memory with name: no-name-5f861983-fdce-440b-9f26-7ee99851d868
The least populated class in y has only 4 members, which is less than n_splits=5.


Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/DBSCAN/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:33:15,711] Trial 0 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 280, 'max_depth': 16, 'min_samples_split': 7, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:33:16,003] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 245, 'max_depth': 22, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:33:16,162] Trial 2 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 234, 'learning_rate': 0.0366135879185904, 'max_depth': 4, 'subsample': 0.9757139030784872}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has o

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 280
    max_depth: 16
    min_samples_split: 7
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333



Processing files:   9%|▉         | 25/270 [03:28<41:44, 10.22s/it][A[I 2024-07-08 15:33:41,633] A new study created in memory with name: no-name-09da8216-0868-4dec-b12d-5941e30a597c
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:33:41,731] Trial 0 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 133, 'learning_rate': 0.07040207408619574, 'max_depth': 4, 'subsample': 0.9065707194216558}. Best is trial 0 with value: 0.7999999999999999.


Model saved at /content/exports/modelization/testing_data/bfill/DBSCAN/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:33:41,870] Trial 1 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 226, 'learning_rate': 0.05703568514459128, 'max_depth': 5, 'subsample': 0.9775887809444279}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:33:42,211] Trial 2 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 287, 'max_depth': 24, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 2 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:33:42,532] Trial 3 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 282, 'max_depth': 16, 'min_samples_split': 7, 'min_samples_leaf': 2}. B

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 287
    max_depth: 24
    min_samples_split: 3
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333



Processing files:  10%|▉         | 26/270 [03:57<58:15, 14.32s/it][A[I 2024-07-08 15:34:10,572] A new study created in memory with name: no-name-36cc5b5d-23d1-4b32-b5dd-5c6797d2dd31
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:34:10,631] Trial 0 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 262, 'learning_rate': 0.09563744220694938, 'max_depth': 3, 'colsample_bytree': 0.31886436211064295}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:34:10,656] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 271.30635928590806, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7333333333333332.


Model saved at /content/exports/modelization/testing_data/bfill/DBSCAN/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:34:10,976] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 282, 'max_depth': 16, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:34:11,025] Trial 3 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 193, 'learning_rate': 0.08004719246400577, 'max_depth': 5, 'colsample_bytree': 0.3885231900103576}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:34:11,366] Trial 4 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 282, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 1}. Best

Number of finished trials:  100
Best trial:
  Value:  0.9333333333333332
  Params: 
    classifier: Random Forest
    n_estimators: 266
    max_depth: 30
    min_samples_split: 7
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333



Processing files:  10%|█         | 27/270 [04:23<1:09:23, 17.13s/it][A[I 2024-07-08 15:34:36,730] A new study created in memory with name: no-name-da39e3d8-7fb9-4a8c-85b1-e11592181b5e
[I 2024-07-08 15:34:36,757] Trial 0 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Logistic Regression', 'C': 369.82213080915346, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9547619047619047.


Model saved at /content/exports/modelization/testing_data/bfill/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Isolation Forest/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:34:37,039] Trial 1 finished with value: 0.9421768707482994 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 217, 'learning_rate': 0.016953208121057686, 'max_depth': 5, 'subsample': 0.8962944919899601}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:34:37,088] Trial 2 finished with value: 0.9547619047619047 and parameters: {'classifier': 'XGBoost', 'n_estimators': 137, 'learning_rate': 0.028063174252535003, 'max_depth': 4, 'colsample_bytree': 0.4612482988404226}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:34:37,148] Trial 3 finished with value: 0.9547619047619047 and parameters: {'classifier': 'XGBoost', 'n_estimators': 212, 'learning_rate': 0.01018137144744312, 'max_depth': 4, 'colsample_bytree': 0.5919428967247304}. Best is trial 0 with value: 0.9547619047619047.
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not use

Number of finished trials:  100
Best trial:
  Value:  0.9547619047619047
  Params: 
    classifier: Logistic Regression
    C: 369.82213080915346
    solver: lbfgs
Iteration 1, Accuracy: 0.9588477366255144, Improvement: 0.9588477366255144
Model saved at /content/exports/modelization/testing_data/bfill/Isolation Forest/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:34:46,176] Trial 0 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 125, 'max_depth': 20, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:34:46,435] Trial 1 finished with value: 0.9421768707482994 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 211, 'learning_rate': 0.026845329381430354, 'max_depth': 5, 'subsample': 0.9787113735811421}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:34:46,734] Trial 2 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 248, 'max_depth': 20, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:34:46,773] Trial 3 finished with value: 0.9547619047619047 and parameters: {'classifier': 'XGBoost', 'n_estimators': 103, 'learning_rate': 0.01847071661184053, 'max_depth': 4, 'col

Number of finished trials:  100
Best trial:
  Value:  0.9629251700680272
  Params: 
    classifier: Random Forest
    n_estimators: 136
    max_depth: 11
    min_samples_split: 5
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9711934156378601, Improvement: 0.9711934156378601



Processing files:  12%|█▏        | 32/270 [04:53<42:26, 10.70s/it][A[I 2024-07-08 15:35:06,937] A new study created in memory with name: no-name-e9e9fd76-d8dc-4bfc-bccb-c2bc86772289
[I 2024-07-08 15:35:06,978] Trial 0 finished with value: 0.9547619047619047 and parameters: {'classifier': 'XGBoost', 'n_estimators': 117, 'learning_rate': 0.05798125392071487, 'max_depth': 5, 'colsample_bytree': 0.6024874584754095}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:35:07,027] Trial 1 finished with value: 0.9547619047619047 and parameters: {'classifier': 'XGBoost', 'n_estimators': 162, 'learning_rate': 0.09744636651938421, 'max_depth': 3, 'colsample_bytree': 0.36157065754967654}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:35:07,056] Trial 2 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 678.5375387096151, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:35:07,08

Model saved at /content/exports/modelization/testing_data/bfill/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:35:07,386] Trial 4 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 236, 'max_depth': 20, 'min_samples_split': 3, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:35:07,412] Trial 5 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 716.5773427598732, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:35:07,438] Trial 6 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 334.8504476410194, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 15:35:07,647] Trial 7 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 269, 'learning_rate': 0.014686795082193716, 'max_depth': 3, 'subsample': 0.9629087425179657}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-0

Number of finished trials:  100
Best trial:
  Value:  0.9547619047619047
  Params: 
    classifier: XGBoost
    n_estimators: 117
    learning_rate: 0.05798125392071487
    max_depth: 5
    colsample_bytree: 0.6024874584754095


Parameters: { "use_label_encoder" } are not used.


Processing files:  12%|█▏        | 33/270 [05:04<42:28, 10.76s/it][A[I 2024-07-08 15:35:17,896] A new study created in memory with name: no-name-ba028730-fe6b-488e-a7e3-5072b5ff9b3a


Iteration 1, Accuracy: 0.9547325102880658, Improvement: 0.9547325102880658
Model saved at /content/exports/modelization/testing_data/bfill/Isolation Forest/onehot/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:35:17,913] Trial 0 finished with value: 0.9200000000000002 and parameters: {'classifier': 'Logistic Regression', 'C': 520.4510040983149, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9200000000000002.
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameter

Number of finished trials:  100
Best trial:
  Value:  0.9200000000000002
  Params: 
    classifier: Logistic Regression
    C: 520.4510040983149
    solver: liblinear
Iteration 1, Accuracy: 0.91, Improvement: 0.91
Model saved at /content/exports/modelization/testing_data/knn/LOF/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:35:25,189] Trial 0 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 156, 'max_depth': 30, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9099999999999999.
[I 2024-07-08 15:35:25,521] Trial 1 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 251, 'max_depth': 29, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9099999999999999.
[I 2024-07-08 15:35:25,659] Trial 2 finished with value: 0.8400000000000001 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 139, 'learning_rate': 0.08960487656879736, 'max_depth': 4, 'subsample': 0.9824284002051088}. Best is trial 0 with value: 0.9099999999999999.
[I 2024-07-08 15:35:25,717] Trial 3 finished with value: 0.9 and parameters: {'classifier': 'XGBoost', 'n_estimators': 255, 'learning_rate': 0.04185167213688409, 'max_depth': 3, 'colsample_bytree': 

Number of finished trials:  100
Best trial:
  Value:  0.9200000000000002
  Params: 
    classifier: Logistic Regression
    C: 786.5809806846437
    solver: liblinear
Iteration 1, Accuracy: 0.91, Improvement: 0.91
Model saved at /content/exports/modelization/testing_data/knn/LOF/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:35:33,177] Trial 1 finished with value: 0.8800000000000001 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 259, 'learning_rate': 0.017378341394859727, 'max_depth': 3, 'subsample': 0.929555654455614}. Best is trial 0 with value: 0.8800000000000001.
[I 2024-07-08 15:35:33,235] Trial 2 finished with value: 0.8800000000000001 and parameters: {'classifier': 'XGBoost', 'n_estimators': 294, 'learning_rate': 0.06658682541999673, 'max_depth': 4, 'colsample_bytree': 0.603636972036109}. Best is trial 0 with value: 0.8800000000000001.
[I 2024-07-08 15:35:33,384] Trial 3 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 115, 'max_depth': 13, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 3 with value: 0.9099999999999999.
[I 2024-07-08 15:35:33,409] Trial 4 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 975.5304231675715, 'solver': 'lbfgs'}. Be

Number of finished trials:  100
Best trial:
  Value:  0.9099999999999999
  Params: 
    classifier: Random Forest
    n_estimators: 115
    max_depth: 13
    min_samples_split: 9
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.91, Improvement: 0.91



Processing files:  13%|█▎        | 36/270 [05:30<38:30,  9.87s/it][A
Processing files:  14%|█▍        | 39/270 [05:30<18:18,  4.75s/it][A

Model saved at /content/exports/modelization/testing_data/knn/LOF/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/MAD/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing fil

[I 2024-07-08 15:35:44,151] A new study created in memory with name: no-name-44339764-e880-47fe-bf14-1dbda2e470eb
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:44,168] Trial 0 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 637.254518226626, 'solver': 'liblinear'}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:44,215] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 252, 'learning_rate': 0.02169949811679061, 'max_depth': 3, 'colsample_bytree': 0.5772682031881282}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:44,241] Trial 2 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C'

Using target column: TARGET


[I 2024-07-08 15:35:44,440] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 158, 'max_depth': 11, 'min_samples_split': 9, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:44,639] Trial 4 finished with value: 0.6 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 295, 'learning_rate': 0.013077090331888704, 'max_depth': 3, 'subsample': 0.969819082183263}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:44,929] Trial 5 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Random Forest', 'n_estimators': 250, 'max_depth': 22, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members,

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Logistic Regression
    C: 637.254518226626
    solver: liblinear
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667
Model saved at /content/exports/modelization/testing_data/knn/DBSCAN/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:50,617] Trial 6 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 384.9344828845487, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:50,634] Trial 7 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 691.420575997594, 'solver': 'liblinear'}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:50,651] Trial 8 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 997.6765314043286, 'solver': 'liblinear'}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_split

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Logistic Regression
    C: 971.310487690818
    solver: lbfgs
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667
Model saved at /content/exports/modelization/testing_data/knn/DBSCAN/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/DBSCAN/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:35:57,171] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 191, 'max_depth': 22, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:35:57,220] Trial 3 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 274, 'learning_rate': 0.05604023799958421, 'max_depth': 5, 'colsample_bytree': 0.5427962927225447}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are n

Number of finished trials:  100
Best trial:
  Value:  0.7999999999999999
  Params: 
    classifier: Random Forest
    n_estimators: 243
    max_depth: 29
    min_samples_split: 2
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.8, Improvement: 0.8



Processing files:  17%|█▋        | 45/270 [06:09<28:26,  7.58s/it][A[I 2024-07-08 15:36:22,664] A new study created in memory with name: no-name-963d7d88-892d-408a-a26f-e33f4f9c074a
[I 2024-07-08 15:36:22,713] Trial 0 finished with value: 0.952627450980392 and parameters: {'classifier': 'XGBoost', 'n_estimators': 140, 'learning_rate': 0.03415251857687873, 'max_depth': 5, 'colsample_bytree': 0.36762626739450593}. Best is trial 0 with value: 0.952627450980392.


Model saved at /content/exports/modelization/testing_data/knn/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/Isolation Forest/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:36:22,893] Trial 1 finished with value: 0.9527058823529412 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 112, 'learning_rate': 0.06297565240957106, 'max_depth': 5, 'subsample': 0.8681529021487011}. Best is trial 1 with value: 0.9527058823529412.
[I 2024-07-08 15:36:22,971] Trial 2 finished with value: 0.9527058823529412 and parameters: {'classifier': 'XGBoost', 'n_estimators': 238, 'learning_rate': 0.03928133833824024, 'max_depth': 5, 'colsample_bytree': 0.6349113778738946}. Best is trial 1 with value: 0.9527058823529412.
[I 2024-07-08 15:36:23,261] Trial 3 finished with value: 0.952627450980392 and parameters: {'classifier': 'Random Forest', 'n_estimators': 233, 'max_depth': 14, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.9527058823529412.
[I 2024-07-08 15:36:23,510] Trial 4 finished with value: 0.952627450980392 and parameters: {'classifier': 'Random Forest', 'n_estimators': 189, 'max_depth': 14, 'min_samples_sp

Number of finished trials:  100
Best trial:
  Value:  0.9605490196078431
  Params: 
    classifier: Random Forest
    n_estimators: 243
    max_depth: 19
    min_samples_split: 4
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9841897233201581, Improvement: 0.9841897233201581



Processing files:  18%|█▊        | 49/270 [06:35<26:03,  7.07s/it][A[I 2024-07-08 15:36:48,896] A new study created in memory with name: no-name-6edff4ad-e205-480e-a9c1-95b3b72bec63
[I 2024-07-08 15:36:48,913] Trial 0 finished with value: 0.952627450980392 and parameters: {'classifier': 'Logistic Regression', 'C': 230.93347555020216, 'solver': 'liblinear'}. Best is trial 0 with value: 0.952627450980392.


Model saved at /content/exports/modelization/testing_data/knn/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:36:49,173] Trial 1 finished with value: 0.9328627450980391 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 300, 'learning_rate': 0.0706888568843353, 'max_depth': 3, 'subsample': 0.9016105714259319}. Best is trial 0 with value: 0.952627450980392.
[I 2024-07-08 15:36:49,504] Trial 2 finished with value: 0.952627450980392 and parameters: {'classifier': 'Random Forest', 'n_estimators': 258, 'max_depth': 26, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.952627450980392.
[I 2024-07-08 15:36:49,530] Trial 3 finished with value: 0.948627450980392 and parameters: {'classifier': 'Logistic Regression', 'C': 627.9451645662973, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.952627450980392.
[I 2024-07-08 15:36:49,699] Trial 4 finished with value: 0.9565490196078432 and parameters: {'classifier': 'Random Forest', 'n_estimators': 127, 'max_depth': 19, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 4 with value: 0.9

Number of finished trials:  100
Best trial:
  Value:  0.9605490196078431
  Params: 
    classifier: Random Forest
    n_estimators: 215
    max_depth: 16
    min_samples_split: 5
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9802371541501976, Improvement: 0.9802371541501976



Processing files:  19%|█▊        | 50/270 [07:02<37:16, 10.16s/it][A[I 2024-07-08 15:37:16,268] A new study created in memory with name: no-name-81860574-0e22-4ae4-b961-38f87ab62547


Model saved at /content/exports/modelization/testing_data/knn/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:37:16,447] Trial 0 finished with value: 0.9327843137254902 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 222, 'learning_rate': 0.03608968771492973, 'max_depth': 3, 'subsample': 0.8479799631739303}. Best is trial 0 with value: 0.9327843137254902.
[I 2024-07-08 15:37:16,667] Trial 1 finished with value: 0.9327843137254902 and parameters: {'classifier': 'Random Forest', 'n_estimators': 194, 'max_depth': 28, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9327843137254902.
[I 2024-07-08 15:37:16,865] Trial 2 finished with value: 0.9328627450980391 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 221, 'learning_rate': 0.09497078015181301, 'max_depth': 5, 'subsample': 0.8855118384887678}. Best is trial 2 with value: 0.9328627450980391.
[I 2024-07-08 15:37:16,881] Trial 3 finished with value: 0.9565490196078432 and parameters: {'classifier': 'Logistic Regression', 'C': 843.3773319677815, 'solver': 'libline

Number of finished trials:  100
Best trial:
  Value:  0.9565490196078432
  Params: 
    classifier: Logistic Regression
    C: 843.3773319677815
    solver: liblinear
Iteration 1, Accuracy: 0.9604743083003953, Improvement: 0.9604743083003953
Model saved at /content/exports/modelization/testing_data/knn/Isolation Forest/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:37:24,107] Trial 1 finished with value: 0.8703030303030304 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 276, 'learning_rate': 0.08107135144558873, 'max_depth': 3, 'subsample': 0.885345258920653}. Best is trial 0 with value: 0.914949494949495.
[I 2024-07-08 15:37:24,155] Trial 2 finished with value: 0.9193939393939393 and parameters: {'classifier': 'XGBoost', 'n_estimators': 191, 'learning_rate': 0.02332219623147431, 'max_depth': 3, 'colsample_bytree': 0.4918791179996754}. Best is trial 2 with value: 0.9193939393939393.
[I 2024-07-08 15:37:24,374] Trial 3 finished with value: 0.9105050505050505 and parameters: {'classifier': 'Random Forest', 'n_estimators': 171, 'max_depth': 19, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 2 with value: 0.9193939393939393.
[I 2024-07-08 15:37:24,389] Trial 4 finished with value: 0.915050505050505 and parameters: {'classifier': 'Logistic Regression', 'C': 715.5767852421643, 'solver': 'lbfgs'}. Bes

Number of finished trials:  100
Best trial:
  Value:  0.9238383838383838
  Params: 
    classifier: Random Forest
    n_estimators: 116
    max_depth: 16
    min_samples_split: 7
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.9237668161434978, Improvement: 0.9237668161434978



Processing files:  19%|█▉        | 52/270 [07:28<41:41, 11.47s/it][A[I 2024-07-08 15:37:42,245] A new study created in memory with name: no-name-b0809c48-9e7b-40cf-8df3-ba2ce9ca32ee
[I 2024-07-08 15:37:42,261] Trial 0 finished with value: 0.914949494949495 and parameters: {'classifier': 'Logistic Regression', 'C': 200.37197719245788, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.914949494949495.
[I 2024-07-08 15:37:42,320] Trial 1 finished with value: 0.9104040404040402 and parameters: {'classifier': 'XGBoost', 'n_estimators': 254, 'learning_rate': 0.07543781664897561, 'max_depth': 4, 'colsample_bytree': 0.35659011814932645}. Best is trial 0 with value: 0.914949494949495.


Model saved at /content/exports/modelization/testing_data/median/LOF/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:37:42,499] Trial 2 finished with value: 0.9193939393939393 and parameters: {'classifier': 'Random Forest', 'n_estimators': 148, 'max_depth': 24, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.9193939393939393.
[I 2024-07-08 15:37:42,860] Trial 3 finished with value: 0.8927272727272728 and parameters: {'classifier': 'Random Forest', 'n_estimators': 281, 'max_depth': 12, 'min_samples_split': 7, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.9193939393939393.
[I 2024-07-08 15:37:43,171] Trial 4 finished with value: 0.8970707070707071 and parameters: {'classifier': 'Random Forest', 'n_estimators': 222, 'max_depth': 13, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.9193939393939393.
[I 2024-07-08 15:37:43,471] Trial 5 finished with value: 0.8748484848484848 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 296, 'learning_rate': 0.05235562638687687, 'max_depth': 4, 'subsample': 0.82676575

Number of finished trials:  100
Best trial:
  Value:  0.9238383838383838
  Params: 
    classifier: Random Forest
    n_estimators: 105
    max_depth: 29
    min_samples_split: 2
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9237668161434978, Improvement: 0.9237668161434978



Processing files:  20%|█▉        | 53/270 [07:46<46:11, 12.77s/it][A[I 2024-07-08 15:37:59,515] A new study created in memory with name: no-name-152e079a-7b68-4e6d-a30b-57c6e75cd418


Model saved at /content/exports/modelization/testing_data/median/LOF/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:37:59,807] Trial 0 finished with value: 0.8835353535353535 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 261, 'learning_rate': 0.057863804433210715, 'max_depth': 4, 'subsample': 0.8068147092722596}. Best is trial 0 with value: 0.8835353535353535.
[I 2024-07-08 15:38:00,108] Trial 1 finished with value: 0.8790909090909091 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 287, 'learning_rate': 0.02013427519014157, 'max_depth': 4, 'subsample': 0.8138999391708414}. Best is trial 0 with value: 0.8835353535353535.
[I 2024-07-08 15:38:00,448] Trial 2 finished with value: 0.9238383838383838 and parameters: {'classifier': 'Random Forest', 'n_estimators': 216, 'max_depth': 10, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.9238383838383838.
[I 2024-07-08 15:38:00,672] Trial 3 finished with value: 0.8835353535353535 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 181, 'learning_rate': 0.058

Number of finished trials:  100
Best trial:
  Value:  0.9282828282828282
  Params: 
    classifier: XGBoost
    n_estimators: 250
    learning_rate: 0.020520624118240016
    max_depth: 4
    colsample_bytree: 0.4221239034408971


Parameters: { "use_label_encoder" } are not used.


Processing files:  20%|██        | 54/270 [07:57<44:42, 12.42s/it][A

Iteration 1, Accuracy: 0.9372197309417041, Improvement: 0.9372197309417041
Model saved at /content/exports/modelization/testing_data/median/LOF/onehot/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/Z-Score/label/application_train.csv
Using target column: TARGET



Processing files:  21%|██        | 57/270 [07:57<21:40,  6.11s/it][A[I 2024-07-08 15:38:10,995] A new study created in memory with name: no-name-26784de2-cdb2-4f28-97b5-747fb6865070


Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/MAD/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/MAD/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/MAD/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/m

The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:11,174] Trial 0 finished with value: 0.5333333333333333 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 279, 'learning_rate': 0.029192271022455682, 'max_depth': 3, 'subsample': 0.9742469156421971}. Best is trial 0 with value: 0.5333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:11,190] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 745.2005501313878, 'solver': 'liblinear'}. Best is trial 1 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:11,308] Trial 2 finished with value: 0.5999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 168, 'learning_rate': 0.018692718284553433, 'max_depth': 4, 'subsample': 0.8363126967790291}. Best

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 296
    max_depth: 16
    min_samples_split: 6
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667



Processing files:  23%|██▎       | 61/270 [08:19<20:15,  5.81s/it][A[I 2024-07-08 15:38:33,017] A new study created in memory with name: no-name-c31299ad-ff7d-4a53-883d-0a9e49302e90
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:33,044] Trial 0 finished with value: 0.5999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 235.56434865914161, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:33,093] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 154, 'learning_rate': 0.048977211004179906, 'max_depth': 5, 'colsample_bytree': 0.42250174627153114}. Best is trial 1 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:33,119] Trial 2 finished with value: 0.6666

Model saved at /content/exports/modelization/testing_data/median/DBSCAN/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:33,234] Trial 5 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 130, 'learning_rate': 0.05979690485683153, 'max_depth': 5, 'colsample_bytree': 0.38202132417269774}. Best is trial 1 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:33,413] Trial 6 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 280, 'learning_rate': 0.01281141028218051, 'max_depth': 5, 'subsample': 0.9508207871630462}. Best is trial 1 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:33,621] Trial 7 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 187, 'max_depth': 23, 'min_samples_split': 3, 

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 228
    max_depth: 29
    min_samples_split: 6
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667



Processing files:  23%|██▎       | 62/270 [08:39<27:13,  7.86s/it][A[I 2024-07-08 15:38:52,462] A new study created in memory with name: no-name-f82674e9-f001-4ad2-a40e-da19a7232db6
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:52,610] Trial 0 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 213, 'learning_rate': 0.08909563286371633, 'max_depth': 4, 'subsample': 0.8341348963224683}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:52,628] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 35.9554226872167, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:52,643] Trial 2 finished with value: 0.73333

Model saved at /content/exports/modelization/testing_data/median/DBSCAN/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:52,659] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 412.3389374542954, 'solver': 'liblinear'}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:52,717] Trial 4 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 293, 'learning_rate': 0.09338448615482986, 'max_depth': 4, 'colsample_bytree': 0.5879807526834177}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:38:52,743] Trial 5 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 72.44853752727789, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.8666666666666666.
The least populated clas

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Gradient Boosting
    n_estimators: 213
    learning_rate: 0.08909563286371633
    max_depth: 4
    subsample: 0.8341348963224683
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333



Processing files:  23%|██▎       | 63/270 [08:52<30:30,  8.84s/it][A[I 2024-07-08 15:39:05,960] A new study created in memory with name: no-name-9e6839d0-16b8-4907-aa30-77e659bee326
[I 2024-07-08 15:39:06,009] Trial 0 finished with value: 0.955959595959596 and parameters: {'classifier': 'XGBoost', 'n_estimators': 167, 'learning_rate': 0.07016227470992042, 'max_depth': 3, 'colsample_bytree': 0.3033674957827324}. Best is trial 0 with value: 0.955959595959596.


Model saved at /content/exports/modelization/testing_data/median/DBSCAN/onehot/TARGET/best_Gradient Boosting_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/Isolation Forest/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:39:06,358] Trial 1 finished with value: 0.955959595959596 and parameters: {'classifier': 'Random Forest', 'n_estimators': 298, 'max_depth': 24, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:06,384] Trial 2 finished with value: 0.9523232323232322 and parameters: {'classifier': 'Logistic Regression', 'C': 732.1922534373178, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:06,694] Trial 3 finished with value: 0.955959595959596 and parameters: {'classifier': 'Random Forest', 'n_estimators': 265, 'max_depth': 21, 'min_samples_split': 7, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:06,752] Trial 4 finished with value: 0.9412121212121212 and parameters: {'classifier': 'XGBoost', 'n_estimators': 282, 'learning_rate': 0.0441318875248344, 'max_depth': 3, 'colsample_bytree': 0.6273808408130119}. Best is trial 0 with value: 0.9559

Number of finished trials:  100
Best trial:
  Value:  0.955959595959596
  Params: 
    classifier: XGBoost
    n_estimators: 167
    learning_rate: 0.07016227470992042
    max_depth: 3
    colsample_bytree: 0.3033674957827324
Iteration 1, Accuracy: 0.9558823529411765, Improvement: 0.9558823529411765
Model saved at /content/exports/modelization/testing_data/median/Isolation Forest/ordinal/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:39:22,807] Trial 1 finished with value: 0.955959595959596 and parameters: {'classifier': 'Logistic Regression', 'C': 196.62728138489834, 'solver': 'liblinear'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:22,845] Trial 2 finished with value: 0.9523232323232322 and parameters: {'classifier': 'XGBoost', 'n_estimators': 125, 'learning_rate': 0.08585352990284371, 'max_depth': 3, 'colsample_bytree': 0.5926271102634243}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:22,893] Trial 3 finished with value: 0.9523232323232322 and parameters: {'classifier': 'XGBoost', 'n_estimators': 228, 'learning_rate': 0.07425415815577595, 'max_depth': 3, 'colsample_bytree': 0.3428751523137276}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:22,969] Trial 4 finished with value: 0.9523232323232322 and parameters: {'classifier': 'Logistic Regression', 'C': 142.01464763369725, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9559

Number of finished trials:  100
Best trial:
  Value:  0.955959595959596
  Params: 
    classifier: Logistic Regression
    C: 19.58464988074235
    solver: lbfgs
Iteration 1, Accuracy: 0.9558823529411765, Improvement: 0.9558823529411765
Model saved at /content/exports/modelization/testing_data/median/Isolation Forest/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:39:37,749] Trial 0 finished with value: 0.955959595959596 and parameters: {'classifier': 'Random Forest', 'n_estimators': 103, 'max_depth': 30, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:37,765] Trial 1 finished with value: 0.9522558922558921 and parameters: {'classifier': 'Logistic Regression', 'C': 65.2458257454656, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:37,913] Trial 2 finished with value: 0.9522558922558921 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 198, 'learning_rate': 0.07342861368580711, 'max_depth': 3, 'subsample': 0.9812559498936702}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 15:39:37,939] Trial 3 finished with value: 0.9522558922558921 and parameters: {'classifier': 'Logistic Regression', 'C': 855.1577484918358, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-

Number of finished trials:  100
Best trial:
  Value:  0.955959595959596
  Params: 
    classifier: Random Forest
    n_estimators: 103
    max_depth: 30
    min_samples_split: 8
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9595588235294118, Improvement: 0.9595588235294118



Processing files:  26%|██▌       | 69/270 [09:43<32:52,  9.81s/it][A[I 2024-07-08 15:39:56,751] A new study created in memory with name: no-name-95c49d0d-d86d-4ca7-b2b3-222c23fd5de8


Model saved at /content/exports/modelization/testing_data/median/Isolation Forest/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:39:56,941] Trial 0 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Random Forest', 'n_estimators': 155, 'max_depth': 11, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 15:39:57,192] Trial 1 finished with value: 0.927741935483871 and parameters: {'classifier': 'Random Forest', 'n_estimators': 202, 'max_depth': 19, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.927741935483871.
[I 2024-07-08 15:39:57,352] Trial 2 finished with value: 0.895483870967742 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 179, 'learning_rate': 0.06883338048730088, 'max_depth': 3, 'subsample': 0.8438186198142359}. Best is trial 1 with value: 0.927741935483871.
[I 2024-07-08 15:39:57,449] Trial 3 finished with value: 0.9210752688172044 and parameters: {'classifier': 'Logistic Regression', 'C': 318.7291773265112, 'solver': 'newton-cg'}. Best is trial 1 with valu

Number of finished trials:  100
Best trial:
  Value:  0.927741935483871
  Params: 
    classifier: Random Forest
    n_estimators: 202
    max_depth: 19
    min_samples_split: 9
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9276315789473685, Improvement: 0.9276315789473685



Processing files:  26%|██▌       | 70/270 [10:09<43:47, 13.14s/it][A[I 2024-07-08 15:40:22,954] A new study created in memory with name: no-name-1c5f6ba0-291b-4575-a839-03cd085926d4


Model saved at /content/exports/modelization/testing_data/iterative/LOF/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:40:23,254] Trial 0 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Random Forest', 'n_estimators': 235, 'max_depth': 15, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 15:40:23,303] Trial 1 finished with value: 0.9019354838709678 and parameters: {'classifier': 'XGBoost', 'n_estimators': 205, 'learning_rate': 0.08003571929459909, 'max_depth': 3, 'colsample_bytree': 0.5696446222357688}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 15:40:23,480] Trial 2 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Random Forest', 'n_estimators': 128, 'max_depth': 17, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 15:40:23,750] Trial 3 finished with value: 0.8890322580645161 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 242, 'learning_rate': 0.06711594396531807, 'max_depth': 5,

Number of finished trials:  100
Best trial:
  Value:  0.927741935483871
  Params: 
    classifier: Random Forest
    n_estimators: 127
    max_depth: 28
    min_samples_split: 9
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9276315789473685, Improvement: 0.9276315789473685



Processing files:  26%|██▋       | 71/270 [10:34<52:38, 15.87s/it][A[I 2024-07-08 15:40:48,259] A new study created in memory with name: no-name-cdafbafa-3e5f-4649-bcf6-dbd1f6153f1d
[I 2024-07-08 15:40:48,286] Trial 0 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Logistic Regression', 'C': 971.5834544118927, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9212903225806451.


Model saved at /content/exports/modelization/testing_data/iterative/LOF/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:40:48,433] Trial 1 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 216, 'learning_rate': 0.04178977340717766, 'max_depth': 5, 'subsample': 0.9837105812589909}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 15:40:48,571] Trial 2 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 188, 'learning_rate': 0.02220644764032231, 'max_depth': 3, 'subsample': 0.8985992775861484}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 15:40:48,739] Trial 3 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 257, 'learning_rate': 0.09295766483917654, 'max_depth': 5, 'subsample': 0.8616652820844893}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 15:40:48,777] Trial 4 finished with value: 0.9212903225806451 and parameters: {'classifier': 'XGBoost', 'n_estimators': 109, 'learn

Number of finished trials:  100
Best trial:
  Value:  0.9212903225806451
  Params: 
    classifier: Logistic Regression
    C: 971.5834544118927
    solver: lbfgs
Iteration 1, Accuracy: 0.9473684210526315, Improvement: 0.9473684210526315
Model saved at /content/exports/modelization/testing_data/iterative/LOF/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains 

[I 2024-07-08 15:40:58,288] A new study created in memory with name: no-name-a57a2a03-9c28-4c53-a5ef-e0e42eeed534
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:40:58,327] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'XGBoost', 'n_estimators': 146, 'learning_rate': 0.03298801541457975, 'max_depth': 4, 'colsample_bytree': 0.3205205213698548}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.


Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/DBSCAN/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:40:58,485] Trial 1 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 237, 'learning_rate': 0.08260933139478732, 'max_depth': 5, 'subsample': 0.9606167154357956}. Best is trial 1 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:40:58,525] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'XGBoost', 'n_estimators': 128, 'learning_rate': 0.02039916805293189, 'max_depth': 3, 'colsample_bytree': 0.5973464010085892}. Best is trial 1 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:40:58,694] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 147, 'max_depth': 10, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.7999999999999999.
The least p

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Gradient Boosting
    n_estimators: 254
    learning_rate: 0.011220026149395053
    max_depth: 5
    subsample: 0.9531377510353138
Iteration 1, Accuracy: 1.0, Improvement: 1.0



Processing files:  29%|██▉       | 79/270 [11:01<18:02,  5.67s/it][A[I 2024-07-08 15:41:15,085] A new study created in memory with name: no-name-0da561b9-9dea-430a-9356-59645813f03c
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:15,204] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 106, 'max_depth': 16, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7333333333333332.


Model saved at /content/exports/modelization/testing_data/iterative/DBSCAN/ordinal/TARGET/best_Gradient Boosting_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:15,525] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 292, 'max_depth': 15, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:15,633] Trial 2 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 168, 'learning_rate': 0.07872964338139567, 'max_depth': 3, 'subsample': 0.9792003258197579}. Best is trial 2 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:15,762] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 106, 'max_depth': 24, 'min_samples_split': 3, 'min_samples_leaf': 4}. B

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Gradient Boosting
    n_estimators: 261
    learning_rate: 0.017483400829073695
    max_depth: 3
    subsample: 0.9504682529269294
Iteration 1, Accuracy: 1.0, Improvement: 1.0



Processing files:  30%|██▉       | 80/270 [11:20<24:11,  7.64s/it][A[I 2024-07-08 15:41:33,939] A new study created in memory with name: no-name-ed943e70-feea-4776-8548-0876ca6b6ff2
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:33,978] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'XGBoost', 'n_estimators': 151, 'learning_rate': 0.07914606954489153, 'max_depth': 3, 'colsample_bytree': 0.6297194442210019}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:34,004] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 567.3008709944665, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:34,032] Trial 2 finished with value: 0.7333333

Model saved at /content/exports/modelization/testing_data/iterative/DBSCAN/label/TARGET/best_Gradient Boosting_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:34,130] Trial 3 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 126, 'learning_rate': 0.06308845188228078, 'max_depth': 3, 'subsample': 0.9449998428131664}. Best is trial 3 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:34,268] Trial 4 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 208, 'learning_rate': 0.023841112126951922, 'max_depth': 3, 'subsample': 0.9164946722642644}. Best is trial 3 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:41:34,306] Trial 5 finished with value: 0.7333333333333332 and parameters: {'classifier': 'XGBoost', 'n_estimators': 124, 'learning_rate': 0.09973767120652934, 'max_d

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Gradient Boosting
    n_estimators: 128
    learning_rate: 0.04935541044131396
    max_depth: 3
    subsample: 0.9558867150389586
Iteration 1, Accuracy: 1.0, Improvement: 1.0
Model saved at /content/exports/modelization/testing_data/iterative/DBSCAN/onehot/TARGET/best_Gradient Boosting_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/IQR/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:41:56,676] A new study created in memory with name: no-name-04cbcc00-cc03-4e6f-8467-a5b1802819ce
[I 2024-07-08 15:41:56,702] Trial 0 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 854.5502617514104, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9578947368421054.


Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Isolation Forest/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:41:56,901] Trial 1 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 159, 'max_depth': 13, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 15:41:57,140] Trial 2 finished with value: 0.9368421052631579 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 222, 'learning_rate': 0.03072988702687547, 'max_depth': 3, 'subsample': 0.9203924717161713}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 15:41:57,165] Trial 3 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 549.7926631112607, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 15:41:57,254] Trial 4 finished with value: 0.9543859649122808 and parameters: {'classifier': 'XGBoost', 'n_estimators': 276, 'learning_rate': 0.09065689102500628, 'max_depth': 3, 'colsample_bytree': 0.511145230475189

Number of finished trials:  100
Best trial:
  Value:  0.9578947368421054
  Params: 
    classifier: Logistic Regression
    C: 854.5502617514104
    solver: newton-cg
Iteration 1, Accuracy: 0.9578947368421052, Improvement: 0.9578947368421052
Model saved at /content/exports/modelization/testing_data/iterative/Isolation Forest/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:42:12,262] Trial 0 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 247, 'max_depth': 15, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 15:42:12,354] Trial 1 finished with value: 0.9578947368421054 and parameters: {'classifier': 'XGBoost', 'n_estimators': 251, 'learning_rate': 0.028183885809358905, 'max_depth': 3, 'colsample_bytree': 0.3220442592361796}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 15:42:12,381] Trial 2 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 928.605534087794, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 15:42:12,672] Trial 3 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 191, 'max_depth': 18, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 0 with val

Number of finished trials:  100
Best trial:
  Value:  0.9578947368421054
  Params: 
    classifier: Random Forest
    n_estimators: 247
    max_depth: 15
    min_samples_split: 6
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.9578947368421052, Improvement: 0.9578947368421052



Processing files:  32%|███▏      | 86/270 [12:16<26:35,  8.67s/it][A[I 2024-07-08 15:42:29,661] A new study created in memory with name: no-name-b0d96f13-4031-4c31-9dc6-cd22dd126b52
[I 2024-07-08 15:42:29,688] Trial 0 finished with value: 0.9508771929824562 and parameters: {'classifier': 'Logistic Regression', 'C': 393.558493194699, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9508771929824562.
[I 2024-07-08 15:42:29,745] Trial 1 finished with value: 0.9578947368421054 and parameters: {'classifier': 'XGBoost', 'n_estimators': 292, 'learning_rate': 0.03272773946470151, 'max_depth': 4, 'colsample_bytree': 0.5375558113941165}. Best is trial 1 with value: 0.9578947368421054.
[I 2024-07-08 15:42:29,773] Trial 2 finished with value: 0.9543859649122807 and parameters: {'classifier': 'Logistic Regression', 'C': 16.027427465958297, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9578947368421054.


Model saved at /content/exports/modelization/testing_data/iterative/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:42:30,053] Trial 3 finished with value: 0.9403508771929824 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 269, 'learning_rate': 0.04147040522106298, 'max_depth': 5, 'subsample': 0.9032452193209062}. Best is trial 1 with value: 0.9578947368421054.
[I 2024-07-08 15:42:30,191] Trial 4 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 109, 'max_depth': 23, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9578947368421054.
[I 2024-07-08 15:42:30,309] Trial 5 finished with value: 0.9438596491228071 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 129, 'learning_rate': 0.050771195359800014, 'max_depth': 4, 'subsample': 0.9148207163030965}. Best is trial 1 with value: 0.9578947368421054.
[I 2024-07-08 15:42:30,334] Trial 6 finished with value: 0.9508771929824562 and parameters: {'classifier': 'Logistic Regression', 'C': 328.71594986703417, 'solver': 'lbfgs

Number of finished trials:  100
Best trial:
  Value:  0.9578947368421054
  Params: 
    classifier: XGBoost
    n_estimators: 292
    learning_rate: 0.03272773946470151
    max_depth: 4
    colsample_bytree: 0.5375558113941165
Iteration 1, Accuracy: 0.9578947368421052, Improvement: 0.9578947368421052


Parameters: { "use_label_encoder" } are not used.


Processing files:  32%|███▏      | 87/270 [12:31<29:58,  9.83s/it][A[I 2024-07-08 15:42:44,795] A new study created in memory with name: no-name-95c3dbf1-6dba-431d-a896-2cda9c9d8b2b
[I 2024-07-08 15:42:44,822] Trial 0 finished with value: 0.9242524916943522 and parameters: {'classifier': 'Logistic Regression', 'C': 304.30400305381227, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 15:42:44,891] Trial 1 finished with value: 0.9054263565891473 and parameters: {'classifier': 'XGBoost', 'n_estimators': 299, 'learning_rate': 0.09527819433389635, 'max_depth': 3, 'colsample_bytree': 0.6220465711979732}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 15:42:44,948] Trial 2 finished with value: 0.9197120708748617 and parameters: {'classifier': 'XGBoost', 'n_estimators': 253, 'learning_rate': 0.07188825737659284, 'max_depth': 3, 'colsample_bytree': 0.5595075258173761}. Best is trial 0 with value

Model saved at /content/exports/modelization/testing_data/iterative/Isolation Forest/onehot/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:42:45,045] Trial 3 finished with value: 0.9006644518272425 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 109, 'learning_rate': 0.04986286701850332, 'max_depth': 3, 'subsample': 0.8711740697869608}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 15:42:45,103] Trial 4 finished with value: 0.9149501661129568 and parameters: {'classifier': 'XGBoost', 'n_estimators': 280, 'learning_rate': 0.07165309710199669, 'max_depth': 5, 'colsample_bytree': 0.5963392802005505}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 15:42:45,121] Trial 5 finished with value: 0.9194905869324475 and parameters: {'classifier': 'Logistic Regression', 'C': 633.0921859824541, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 15:42:45,189] Trial 6 finished with value: 0.9149501661129568 and parameters: {'classifier': 'XGBoost', 'n_estimators': 287, 'learning_rate': 0.02663576960888834, 'max_depth': 4, 'colsample_byt

Number of finished trials:  100
Best trial:
  Value:  0.9243632336655594
  Params: 
    classifier: XGBoost
    n_estimators: 143
    learning_rate: 0.07290685597410959
    max_depth: 4
    colsample_bytree: 0.3217829624423636
Iteration 1, Accuracy: 0.95260663507109, Improvement: 0.95260663507109


Parameters: { "use_label_encoder" } are not used.


Processing files:  33%|███▎      | 88/270 [12:44<31:56, 10.53s/it][A[I 2024-07-08 15:42:58,098] A new study created in memory with name: no-name-a9ba9728-c200-45c5-b1d8-96f43c24e6e4
[I 2024-07-08 15:42:58,157] Trial 0 finished with value: 0.9006644518272425 and parameters: {'classifier': 'XGBoost', 'n_estimators': 299, 'learning_rate': 0.0774119838841617, 'max_depth': 3, 'colsample_bytree': 0.6945198042852265}. Best is trial 0 with value: 0.9006644518272425.
[I 2024-07-08 15:42:58,175] Trial 1 finished with value: 0.9242524916943522 and parameters: {'classifier': 'Logistic Regression', 'C': 119.00427466677877, 'solver': 'liblinear'}. Best is trial 1 with value: 0.9242524916943522.
[I 2024-07-08 15:42:58,213] Trial 2 finished with value: 0.9197120708748617 and parameters: {'classifier': 'XGBoost', 'n_estimators': 109, 'learning_rate': 0.07571972330583732, 'max_depth': 3, 'colsample_bytree': 0.5278360213220629}. Best is trial 1 with va

Model saved at /content/exports/modelization/testing_data/most_frequent/LOF/ordinal/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:42:58,357] Trial 4 finished with value: 0.8959025470653377 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 109, 'learning_rate': 0.05198148388842772, 'max_depth': 3, 'subsample': 0.9725365649868944}. Best is trial 1 with value: 0.9242524916943522.
[I 2024-07-08 15:42:58,515] Trial 5 finished with value: 0.9052048726467332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 107, 'max_depth': 23, 'min_samples_split': 9, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.9242524916943522.
[I 2024-07-08 15:42:58,553] Trial 6 finished with value: 0.9148394241417497 and parameters: {'classifier': 'XGBoost', 'n_estimators': 148, 'learning_rate': 0.0478639226885634, 'max_depth': 5, 'colsample_bytree': 0.37450799045512956}. Best is trial 1 with value: 0.9242524916943522.
[I 2024-07-08 15:42:58,884] Trial 7 finished with value: 0.9147286821705427 and parameters: {'classifier': 'Random Forest', 'n_estimators': 269, 'max_depth': 13, 'min_samples_

Number of finished trials:  100
Best trial:
  Value:  0.9242524916943522
  Params: 
    classifier: Logistic Regression
    C: 119.00427466677877
    solver: liblinear
Iteration 1, Accuracy: 0.9241706161137441, Improvement: 0.9241706161137441
Model saved at /content/exports/modelization/testing_data/most_frequent/LOF/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:43:05,525] Trial 1 finished with value: 0.9055370985603544 and parameters: {'classifier': 'Random Forest', 'n_estimators': 213, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9147286821705427.
[I 2024-07-08 15:43:05,734] Trial 2 finished with value: 0.9150609080841638 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 298, 'learning_rate': 0.051304898726386036, 'max_depth': 3, 'subsample': 0.9015217713367858}. Best is trial 2 with value: 0.9150609080841638.
[I 2024-07-08 15:43:05,993] Trial 3 finished with value: 0.9102990033222591 and parameters: {'classifier': 'Random Forest', 'n_estimators': 210, 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.9150609080841638.
[I 2024-07-08 15:43:06,009] Trial 4 finished with value: 0.9243632336655592 and parameters: {'classifier': 'Logistic Regression', 'C': 459.5692540424181, 'solver': 'liblinear'}. Best is trial 4 with

Number of finished trials:  100
Best trial:
  Value:  0.9243632336655592
  Params: 
    classifier: Logistic Regression
    C: 459.5692540424181
    solver: liblinear
Iteration 1, Accuracy: 0.9241706161137441, Improvement: 0.9241706161137441
Model saved at /content/exports/modelization/testing_data/most_frequent/LOF/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TA

[I 2024-07-08 15:43:11,947] A new study created in memory with name: no-name-42ab09c6-3f8e-4099-958b-fd91c2c7e420
The least populated class in y has only 4 members, which is less than n_splits=5.


Using target column: TARGET


[I 2024-07-08 15:43:12,228] Trial 0 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Random Forest', 'n_estimators': 264, 'max_depth': 12, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:43:12,254] Trial 1 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 859.2633451623278, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:43:12,280] Trial 2 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 583.29614092893, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:43:12,307] Trial 3 finished w

Number of finished trials:  100
Best trial:
  Value:  0.7999999999999999
  Params: 
    classifier: Logistic Regression
    C: 859.2633451623278
    solver: newton-cg
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333
Model saved at /content/exports/modelization/testing_data/most_frequent/DBSCAN/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/DBSCAN/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:43:18,741] Trial 2 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Random Forest', 'n_estimators': 219, 'max_depth': 30, 'min_samples_split': 7, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:43:18,858] Trial 3 finished with value: 0.5999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 180, 'learning_rate': 0.09556086190432803, 'max_depth': 3, 'subsample': 0.9061074308198486}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:43:18,906] Trial 4 finished with value: 0.7333333333333332 and parameters: {'classifier': 'XGBoost', 'n_estimators': 194, 'learning_rate': 0.019352303308675312, 'max_depth': 4, 'colsample_bytree': 0.6428841026024084}. Best is trial 0 with value: 0.7999999999999999.
The least 

Number of finished trials:  100
Best trial:
  Value:  0.7999999999999999
  Params: 
    classifier: Logistic Regression
    C: 979.426836090236
    solver: newton-cg
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333
Model saved at /content/exports/modelization/testing_data/most_frequent/DBSCAN/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:43:25,134] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'XGBoost', 'n_estimators': 251, 'learning_rate': 0.016790071036545225, 'max_depth': 5, 'colsample_bytree': 0.6072544182972881}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:43:25,335] Trial 4 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 178, 'max_depth': 23, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:43:25,360] Trial 5 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 267.2669086093514, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.733333333

Number of finished trials:  100
Best trial:
  Value:  0.7999999999999999
  Params: 
    classifier: Random Forest
    n_estimators: 146
    max_depth: 15
    min_samples_split: 4
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333



Processing files:  37%|███▋      | 99/270 [13:27<16:19,  5.73s/it][A[I 2024-07-08 15:43:40,821] A new study created in memory with name: no-name-272528aa-5b96-4a59-8a37-dda45252393c


Model saved at /content/exports/modelization/testing_data/most_frequent/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Isolation Forest/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:43:41,151] Trial 0 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 255, 'max_depth': 20, 'min_samples_split': 9, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:43:41,167] Trial 1 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Logistic Regression', 'C': 166.36543868390532, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:43:41,396] Trial 2 finished with value: 0.9462987012987014 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 263, 'learning_rate': 0.038251576682950206, 'max_depth': 4, 'subsample': 0.8395407726942407}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:43:41,635] Trial 3 finished with value: 0.9391558441558443 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 297, 'learning_rate': 0.059395734486277704, 'max_depth': 4, 'subsample': 0.8120072216

Number of finished trials:  100
Best trial:
  Value:  0.9534415584415583
  Params: 
    classifier: Random Forest
    n_estimators: 255
    max_depth: 20
    min_samples_split: 9
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.953405017921147, Improvement: 0.953405017921147



Processing files:  38%|███▊      | 103/270 [13:44<13:57,  5.01s/it][A[I 2024-07-08 15:43:58,003] A new study created in memory with name: no-name-e5849c67-a2f5-460e-b38e-e48cf79db015


Model saved at /content/exports/modelization/testing_data/most_frequent/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:43:58,263] Trial 0 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 208, 'max_depth': 18, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:43:58,332] Trial 1 finished with value: 0.9462987012987014 and parameters: {'classifier': 'XGBoost', 'n_estimators': 241, 'learning_rate': 0.042118588504349706, 'max_depth': 5, 'colsample_bytree': 0.5691248239626197}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:43:58,572] Trial 2 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 180, 'max_depth': 18, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:43:58,588] Trial 3 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Logistic Regression', 'C': 804.0055325594543, 'solver': 'liblinear'}. Best is trial 0 with va

Number of finished trials:  100
Best trial:
  Value:  0.9534415584415583
  Params: 
    classifier: Random Forest
    n_estimators: 208
    max_depth: 18
    min_samples_split: 8
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.953405017921147, Improvement: 0.953405017921147



Processing files:  39%|███▊      | 104/270 [14:01<18:49,  6.80s/it][A[I 2024-07-08 15:44:14,817] A new study created in memory with name: no-name-eb64fa95-c2d5-4318-a455-6f7018b134bc


Model saved at /content/exports/modelization/testing_data/most_frequent/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:44:15,107] Trial 0 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 232, 'max_depth': 14, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:44:15,224] Trial 1 finished with value: 0.9355194805194804 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 152, 'learning_rate': 0.09199741034538694, 'max_depth': 3, 'subsample': 0.9266256512561086}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:44:15,534] Trial 2 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 274, 'max_depth': 16, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 15:44:15,604] Trial 3 finished with value: 0.9427272727272727 and parameters: {'classifier': 'XGBoost', 'n_estimators': 279, 'learning_rate': 0.06958419621122293, 'max_depth': 3, 'cols

Number of finished trials:  100
Best trial:
  Value:  0.9534415584415583
  Params: 
    classifier: Random Forest
    n_estimators: 232
    max_depth: 14
    min_samples_split: 10
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.953405017921147, Improvement: 0.953405017921147



Processing files:  39%|███▉      | 105/270 [14:18<23:50,  8.67s/it][A[I 2024-07-08 15:44:32,049] A new study created in memory with name: no-name-3f51eb7a-d4cb-406d-a8a3-9733d3d28517
[I 2024-07-08 15:44:32,218] Trial 0 finished with value: 0.9166666666666666 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 167, 'learning_rate': 0.08608344378133177, 'max_depth': 4, 'subsample': 0.9108967411811554}. Best is trial 0 with value: 0.9166666666666666.


Model saved at /content/exports/modelization/testing_data/most_frequent/Isolation Forest/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:44:32,477] Trial 1 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 216, 'max_depth': 14, 'min_samples_split': 8, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 15:44:32,525] Trial 2 finished with value: 0.9416666666666667 and parameters: {'classifier': 'XGBoost', 'n_estimators': 134, 'learning_rate': 0.018950079319996434, 'max_depth': 4, 'colsample_bytree': 0.6021072100035627}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 15:44:32,594] Trial 3 finished with value: 0.9333333333333333 and parameters: {'classifier': 'XGBoost', 'n_estimators': 297, 'learning_rate': 0.0889101376971093, 'max_depth': 3, 'colsample_bytree': 0.5698762388939679}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 15:44:32,644] Trial 4 finished with value: 0.9416666666666667 and parameters: {'classifier': 'XGBoost', 'n_estimators': 187, 'learning_rate': 0.011172583521021667, 

Number of finished trials:  100
Best trial:
  Value:  0.9416666666666668
  Params: 
    classifier: XGBoost
    n_estimators: 204
    learning_rate: 0.06361885506709379
    max_depth: 5
    colsample_bytree: 0.5294839111633497


Parameters: { "use_label_encoder" } are not used.


Processing files:  39%|███▉      | 106/270 [14:29<24:57,  9.13s/it][A[I 2024-07-08 15:44:42,996] A new study created in memory with name: no-name-71782822-16ac-437b-b641-6d3ad615547f
[I 2024-07-08 15:44:43,045] Trial 0 finished with value: 0.9416666666666667 and parameters: {'classifier': 'XGBoost', 'n_estimators': 186, 'learning_rate': 0.047360635997733415, 'max_depth': 3, 'colsample_bytree': 0.3370627234139965}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-07-08 15:44:43,094] Trial 1 finished with value: 0.9416666666666667 and parameters: {'classifier': 'XGBoost', 'n_estimators': 182, 'learning_rate': 0.012189268434265599, 'max_depth': 4, 'colsample_bytree': 0.4048550502375461}. Best is trial 0 with value: 0.9416666666666667.


Iteration 1, Accuracy: 0.9833333333333333, Improvement: 0.9833333333333333
Model saved at /content/exports/modelization/testing_data/ffill/LOF/ordinal/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:44:43,394] Trial 2 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 228, 'max_depth': 14, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-07-08 15:44:43,602] Trial 3 finished with value: 0.925 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 235, 'learning_rate': 0.03313981076773259, 'max_depth': 3, 'subsample': 0.9124929773924644}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-07-08 15:44:43,862] Trial 4 finished with value: 0.925 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 275, 'learning_rate': 0.08304921379600753, 'max_depth': 5, 'subsample': 0.8017837454733371}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-07-08 15:44:43,909] Trial 5 finished with value: 0.9333333333333333 and parameters: {'classifier': 'XGBoost', 'n_estimators': 207, 'learning_rate': 0.06139191865627496, 'max_depth': 4, 'col

Number of finished trials:  100
Best trial:
  Value:  0.9416666666666668
  Params: 
    classifier: XGBoost
    n_estimators: 274
    learning_rate: 0.028830450113121756
    max_depth: 3
    colsample_bytree: 0.4320192718085206
Iteration 1, Accuracy: 0.9833333333333333, Improvement: 0.9833333333333333


Parameters: { "use_label_encoder" } are not used.


Processing files:  40%|███▉      | 107/270 [14:40<26:04,  9.60s/it][A[I 2024-07-08 15:44:54,223] A new study created in memory with name: no-name-850d193d-94c8-46b6-b08c-d0c65bc231ac
[I 2024-07-08 15:44:54,240] Trial 0 finished with value: 0.9333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 514.5084614411268, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9333333333333332.
[I 2024-07-08 15:44:54,299] Trial 1 finished with value: 0.9416666666666668 and parameters: {'classifier': 'XGBoost', 'n_estimators': 285, 'learning_rate': 0.06519120172677845, 'max_depth': 5, 'colsample_bytree': 0.6676552111391816}. Best is trial 1 with value: 0.9416666666666668.
[I 2024-07-08 15:44:54,325] Trial 2 finished with value: 0.9333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 983.245146316276, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.9416666666666668.
[I 2024-07-08 15:44:54,340] Trial 3

Model saved at /content/exports/modelization/testing_data/ffill/LOF/label/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:44:54,680] Trial 4 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 279, 'max_depth': 18, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9416666666666668.
[I 2024-07-08 15:44:54,707] Trial 5 finished with value: 0.9333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 122.75092327730461, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.9416666666666668.
[I 2024-07-08 15:44:54,733] Trial 6 finished with value: 0.9333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 87.64646696046715, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.9416666666666668.
[I 2024-07-08 15:44:55,003] Trial 7 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 214, 'max_depth': 11, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9416666666666668.
[I 2024-07-08 15:44:55,344] Trial 8 fin

Number of finished trials:  100
Best trial:
  Value:  0.9416666666666668
  Params: 
    classifier: XGBoost
    n_estimators: 285
    learning_rate: 0.06519120172677845
    max_depth: 5
    colsample_bytree: 0.6676552111391816
Iteration 1, Accuracy: 0.9583333333333334, Improvement: 0.9583333333333334


Parameters: { "use_label_encoder" } are not used.


Processing files:  40%|████      | 108/270 [14:56<29:40, 10.99s/it][A
Processing files:  41%|████      | 111/270 [14:56<14:19,  5.41s/it][A[I 2024-07-08 15:45:09,673] A new study created in memory with name: no-name-3a02d418-9513-4559-8a60-f0fb40a67389


Model saved at /content/exports/modelization/testing_data/ffill/LOF/onehot/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/ffill/DBSCAN/ordinal/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Number of finished trials:  100
Best trial:
  Value:  0.9333333333333332
  Params: 
    classifier: Logistic Regression
    C: 771.1733462239188
    solver: liblinear
Iteration 1, Accuracy: 1.0, Improvement: 1.0
Model saved at /content/exports/modelization/testing_data/ffill/DBSCAN/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/DBSCAN/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:45:16,407] Trial 1 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 267, 'learning_rate': 0.08235136631782412, 'max_depth': 4, 'subsample': 0.9795439846917645}. Best is trial 0 with value: 0.9333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:45:16,567] Trial 2 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 247, 'learning_rate': 0.04210898499947989, 'max_depth': 5, 'subsample': 0.9456130126902011}. Best is trial 0 with value: 0.9333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:45:16,593] Trial 3 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 427.11096288513016, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9333333333333332.
The least populated class in y has only 4 m

Number of finished trials:  100
Best trial:
  Value:  0.9333333333333332
  Params: 
    classifier: Logistic Regression
    C: 788.4628095170345
    solver: lbfgs
Iteration 1, Accuracy: 1.0, Improvement: 1.0
Model saved at /content/exports/modelization/testing_data/ffill/DBSCAN/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/DBSCAN/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:45:22,786] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 139, 'max_depth': 19, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:45:22,812] Trial 3 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 645.3566933406136, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:45:22,972] Trial 4 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 219, 'learning_rate': 0.06041785300173938, 'max_depth': 5, 'subsample': 0.8360340591388253}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Logistic Regression
    C: 665.4009480800258
    solver: newton-cg
Iteration 1, Accuracy: 1.0, Improvement: 1.0
Model saved at /content/exports/modelization/testing_data/ffill/DBSCAN/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/ffill/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/ffill/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing fil

[I 2024-07-08 15:45:29,034] Trial 0 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 123, 'max_depth': 26, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 15:45:29,081] Trial 1 finished with value: 0.9595102040816326 and parameters: {'classifier': 'XGBoost', 'n_estimators': 209, 'learning_rate': 0.0198457985571315, 'max_depth': 4, 'colsample_bytree': 0.37101990408727564}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 15:45:29,150] Trial 2 finished with value: 0.9595102040816326 and parameters: {'classifier': 'XGBoost', 'n_estimators': 243, 'learning_rate': 0.026512035752477972, 'max_depth': 4, 'colsample_bytree': 0.5270236043428801}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 15:45:29,348] Trial 3 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 168, 'max_depth': 17, 'min_samples_sp

Number of finished trials:  100
Best trial:
  Value:  0.9595102040816326
  Params: 
    classifier: Random Forest
    n_estimators: 123
    max_depth: 26
    min_samples_split: 3
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9676113360323887, Improvement: 0.9676113360323887



Processing files:  44%|████▎     | 118/270 [15:30<11:56,  4.71s/it][A[I 2024-07-08 15:45:44,184] A new study created in memory with name: no-name-cb2dbdd6-5b76-4d9c-bacd-9788dc0d8c81
[I 2024-07-08 15:45:44,210] Trial 0 finished with value: 0.9554285714285715 and parameters: {'classifier': 'Logistic Regression', 'C': 191.00365050991033, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9554285714285715.
[I 2024-07-08 15:45:44,268] Trial 1 finished with value: 0.9554285714285715 and parameters: {'classifier': 'XGBoost', 'n_estimators': 205, 'learning_rate': 0.038200668445155905, 'max_depth': 4, 'colsample_bytree': 0.5118221265690082}. Best is trial 0 with value: 0.9554285714285715.


Model saved at /content/exports/modelization/testing_data/ffill/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:45:44,527] Trial 2 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 164, 'max_depth': 15, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.9595102040816326.
[I 2024-07-08 15:45:44,797] Trial 3 finished with value: 0.9432653061224489 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 198, 'learning_rate': 0.07487742700838805, 'max_depth': 5, 'subsample': 0.858819898068635}. Best is trial 2 with value: 0.9595102040816326.
[I 2024-07-08 15:45:44,934] Trial 4 finished with value: 0.9433469387755103 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 101, 'learning_rate': 0.05753040663908062, 'max_depth': 5, 'subsample': 0.8489905914606432}. Best is trial 2 with value: 0.9595102040816326.
[I 2024-07-08 15:45:45,223] Trial 5 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 242, 'max_depth': 15, 'min_sample

Number of finished trials:  100
Best trial:
  Value:  0.9595102040816326
  Params: 
    classifier: Random Forest
    n_estimators: 164
    max_depth: 15
    min_samples_split: 5
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9595141700404858, Improvement: 0.9595141700404858



Processing files:  44%|████▍     | 119/270 [15:53<19:23,  7.70s/it][A[I 2024-07-08 15:46:06,451] A new study created in memory with name: no-name-d7f5e9e0-d3fc-48f2-a021-e7b4b2b25b6f
[I 2024-07-08 15:46:06,467] Trial 0 finished with value: 0.9514285714285714 and parameters: {'classifier': 'Logistic Regression', 'C': 999.0470179663406, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9514285714285714.
[I 2024-07-08 15:46:06,493] Trial 1 finished with value: 0.9514285714285714 and parameters: {'classifier': 'Logistic Regression', 'C': 884.1623197693258, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9514285714285714.


Model saved at /content/exports/modelization/testing_data/ffill/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:46:06,853] Trial 2 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 293, 'max_depth': 17, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 2 with value: 0.9595102040816326.
[I 2024-07-08 15:46:07,052] Trial 3 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 150, 'max_depth': 15, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.9595102040816326.
[I 2024-07-08 15:46:07,220] Trial 4 finished with value: 0.9514285714285714 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 229, 'learning_rate': 0.05976264536607535, 'max_depth': 3, 'subsample': 0.8224978136745373}. Best is trial 2 with value: 0.9595102040816326.
[I 2024-07-08 15:46:07,236] Trial 5 finished with value: 0.9514285714285714 and parameters: {'classifier': 'Logistic Regression', 'C': 65.24724472225176, 'solver': 'liblinear'}. Best is trial 2 with 

Number of finished trials:  100
Best trial:
  Value:  0.9595102040816326
  Params: 
    classifier: Random Forest
    n_estimators: 293
    max_depth: 17
    min_samples_split: 5
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9595141700404858, Improvement: 0.9595141700404858



Processing files:  44%|████▍     | 120/270 [16:11<24:42,  9.88s/it][A[I 2024-07-08 15:46:25,247] A new study created in memory with name: no-name-716c67c2-2820-406f-9ece-f580c737b6fe
[I 2024-07-08 15:46:25,274] Trial 0 finished with value: 0.9227272727272726 and parameters: {'classifier': 'Logistic Regression', 'C': 469.6825216803614, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9227272727272726.
[I 2024-07-08 15:46:25,289] Trial 1 finished with value: 0.9227272727272726 and parameters: {'classifier': 'Logistic Regression', 'C': 926.7693169445687, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9227272727272726.


Model saved at /content/exports/modelization/testing_data/ffill/Isolation Forest/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:46:25,518] Trial 2 finished with value: 0.9 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 256, 'learning_rate': 0.02927949770457166, 'max_depth': 3, 'subsample': 0.8556123820466277}. Best is trial 0 with value: 0.9227272727272726.
[I 2024-07-08 15:46:25,767] Trial 3 finished with value: 0.8954545454545455 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 255, 'learning_rate': 0.08539632657868353, 'max_depth': 4, 'subsample': 0.9894805971266928}. Best is trial 0 with value: 0.9227272727272726.
[I 2024-07-08 15:46:26,015] Trial 4 finished with value: 0.9045454545454545 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 220, 'learning_rate': 0.04599033037328984, 'max_depth': 5, 'subsample': 0.8699933884946235}. Best is trial 0 with value: 0.9227272727272726.
[I 2024-07-08 15:46:26,041] Trial 5 finished with value: 0.9227272727272726 and parameters: {'classifier': 'Logistic Regression', 'C': 633.6690207837262, 'solve

Number of finished trials:  100
Best trial:
  Value:  0.9272727272727271
  Params: 
    classifier: Logistic Regression
    C: 973.0058351768657
    solver: lbfgs
Iteration 1, Accuracy: 0.9227272727272727, Improvement: 0.9227272727272727
Model saved at /content/exports/modelization/testing_data/constant_0/LOF/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:46:32,639] Trial 2 finished with value: 0.8863636363636364 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 163, 'learning_rate': 0.01697598786084148, 'max_depth': 5, 'subsample': 0.9578389476091478}. Best is trial 1 with value: 0.9227272727272726.
[I 2024-07-08 15:46:32,687] Trial 3 finished with value: 0.9181818181818182 and parameters: {'classifier': 'XGBoost', 'n_estimators': 160, 'learning_rate': 0.029629554925086413, 'max_depth': 3, 'colsample_bytree': 0.40580768631979125}. Best is trial 1 with value: 0.9227272727272726.
[I 2024-07-08 15:46:32,947] Trial 4 finished with value: 0.8954545454545455 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 272, 'learning_rate': 0.03434583556750266, 'max_depth': 4, 'subsample': 0.9341158771574788}. Best is trial 1 with value: 0.9227272727272726.
[I 2024-07-08 15:46:32,995] Trial 5 finished with value: 0.9181818181818182 and parameters: {'classifier': 'XGBoost', 'n_estimators': 119, 'learni

Number of finished trials:  100
Best trial:
  Value:  0.9272727272727271
  Params: 
    classifier: Logistic Regression
    C: 981.876031931863
    solver: newton-cg
Iteration 1, Accuracy: 0.9227272727272727, Improvement: 0.9227272727272727
Model saved at /content/exports/modelization/testing_data/constant_0/LOF/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:46:39,366] Trial 3 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Random Forest', 'n_estimators': 124, 'max_depth': 16, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:46:39,494] Trial 4 finished with value: 0.8954545454545455 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 135, 'learning_rate': 0.06327617474284916, 'max_depth': 5, 'subsample': 0.9698730152126752}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:46:39,542] Trial 5 finished with value: 0.9136363636363637 and parameters: {'classifier': 'XGBoost', 'n_estimators': 132, 'learning_rate': 0.04091893513229319, 'max_depth': 4, 'colsample_bytree': 0.5302274236237849}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 15:46:39,568] Trial 6 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 331.5829637516974, 'solver': 'lbfgs'}. B

Number of finished trials:  100
Best trial:
  Value:  0.9181818181818182
  Params: 
    classifier: Logistic Regression
    C: 757.180859988354
    solver: liblinear
Iteration 1, Accuracy: 0.9363636363636364, Improvement: 0.9363636363636364
Model saved at /content/exports/modelization/testing_data/constant_0/LOF/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it co

The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:46:47,542] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 290, 'max_depth': 12, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:46:47,581] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 121, 'learning_rate': 0.09125427165116919, 'max_depth': 4, 'colsample_bytree': 0.48812503364801196}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:46:47,749] Trial 3 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 150, 'max_depth': 28, 'min_samples_split': 8, 'min_samples_leaf': 2}. Bes

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 218
    max_depth: 27
    min_samples_split: 3
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667



Processing files:  47%|████▋     | 127/270 [16:55<18:05,  7.59s/it][A[I 2024-07-08 15:47:08,885] A new study created in memory with name: no-name-9f1e2ea8-d347-4195-9303-dc51880666cb


Model saved at /content/exports/modelization/testing_data/constant_0/DBSCAN/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:47:09,105] Trial 0 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 180, 'max_depth': 13, 'min_samples_split': 7, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:47:09,153] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 165, 'learning_rate': 0.0773732963692518, 'max_depth': 3, 'colsample_bytree': 0.47962672335785606}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:47:09,340] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 155, 'max_depth': 30, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 180
    max_depth: 13
    min_samples_split: 7
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667



Processing files:  47%|████▋     | 128/270 [17:13<23:11,  9.80s/it][A[I 2024-07-08 15:47:26,457] A new study created in memory with name: no-name-493023f7-5887-4d0f-beb2-f414fd0dce9d


Model saved at /content/exports/modelization/testing_data/constant_0/DBSCAN/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:47:26,717] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 247, 'max_depth': 16, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:47:26,765] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'XGBoost', 'n_estimators': 300, 'learning_rate': 0.08052742122929736, 'max_depth': 5, 'colsample_bytree': 0.6830854349966171}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 15:47:26,780] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 852.5488021588443, 'solver': 'liblinear'}. Best is trial 0 with value: 0.733333

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Gradient Boosting
    n_estimators: 161
    learning_rate: 0.05354621781686177
    max_depth: 3
    subsample: 0.802501566583255
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333
Model saved at /content/exports/modelization/testing_data/constant_0/DBSCAN/onehot/TARGET/best_Gradient Boosting_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/IQR/onehot/application_train.csv


[I 2024-07-08 15:47:40,188] A new study created in memory with name: no-name-c07814a5-2979-478f-8a86-20b40591ead0
[I 2024-07-08 15:47:40,216] Trial 0 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 32.64976559597294, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9533116883116884.
[I 2024-07-08 15:47:40,241] Trial 1 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 971.102685075915, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9533116883116884.


Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Isolation Forest/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:47:40,541] Trial 2 finished with value: 0.9568831168831169 and parameters: {'classifier': 'Random Forest', 'n_estimators': 270, 'max_depth': 18, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.9568831168831169.
[I 2024-07-08 15:47:40,769] Trial 3 finished with value: 0.9568831168831169 and parameters: {'classifier': 'Random Forest', 'n_estimators': 176, 'max_depth': 22, 'min_samples_split': 10, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.9568831168831169.
[I 2024-07-08 15:47:40,957] Trial 4 finished with value: 0.9568831168831169 and parameters: {'classifier': 'Random Forest', 'n_estimators': 153, 'max_depth': 15, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.9568831168831169.
[I 2024-07-08 15:47:40,993] Trial 5 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 758.8227097623962, 'solver': 'newton-cg'}. Best is trial 2 with value: 0.9568831168831169.

Number of finished trials:  100
Best trial:
  Value:  0.9568831168831169
  Params: 
    classifier: Random Forest
    n_estimators: 270
    max_depth: 18
    min_samples_split: 7
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9568345323741008, Improvement: 0.9568345323741008



Processing files:  49%|████▉     | 133/270 [17:47<17:17,  7.57s/it][A[I 2024-07-08 15:48:01,210] A new study created in memory with name: no-name-2dbb5a17-65ed-47e5-99aa-1880512ccbfd
[I 2024-07-08 15:48:01,270] Trial 0 finished with value: 0.9568831168831169 and parameters: {'classifier': 'XGBoost', 'n_estimators': 281, 'learning_rate': 0.028024985154122835, 'max_depth': 3, 'colsample_bytree': 0.36295005827563004}. Best is trial 0 with value: 0.9568831168831169.


Model saved at /content/exports/modelization/testing_data/constant_0/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:48:01,581] Trial 1 finished with value: 0.9568831168831169 and parameters: {'classifier': 'Random Forest', 'n_estimators': 259, 'max_depth': 17, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 15:48:01,606] Trial 2 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 113.61760027026048, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 15:48:01,703] Trial 3 finished with value: 0.9425324675324676 and parameters: {'classifier': 'Gradient Boosting', 'n_estimators': 120, 'learning_rate': 0.09971602258085055, 'max_depth': 3, 'subsample': 0.9978928937992644}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 15:48:01,729] Trial 4 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 581.0431468184725, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-0

Number of finished trials:  100
Best trial:
  Value:  0.9568831168831169
  Params: 
    classifier: XGBoost
    n_estimators: 281
    learning_rate: 0.028024985154122835
    max_depth: 3
    colsample_bytree: 0.36295005827563004


Parameters: { "use_label_encoder" } are not used.


Processing files:  50%|████▉     | 134/270 [18:06<21:27,  9.47s/it][A

Iteration 1, Accuracy: 0.9568345323741008, Improvement: 0.9568345323741008
Model saved at /content/exports/modelization/testing_data/constant_0/Isolation Forest/label/TARGET/best_XGBoost_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 15:48:19,890] A new study created in memory with name: no-name-cb7aca51-9bbc-4b0f-9d7d-92646e4161b0
[I 2024-07-08 15:48:19,918] Trial 0 finished with value: 0.9496753246753247 and parameters: {'classifier': 'Logistic Regression', 'C': 698.9728402667544, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9496753246753247.
[I 2024-07-08 15:48:19,967] Trial 1 finished with value: 0.9568831168831169 and parameters: {'classifier': 'XGBoost', 'n_estimators': 160, 'learning_rate': 0.013307282015798286, 'max_depth': 3, 'colsample_bytree': 0.6853845908833635}. Best is trial 1 with value: 0.9568831168831169.
[I 2024-07-08 15:48:19,993] Trial 2 finished with value: 0.9496753246753247 and parameters: {'classifier': 'Logistic Regression', 'C': 429.19173762386697, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9568831168831169.
[I 2024-07-08 15:48:20,009] Trial 3 finished with value: 0.9496753246753247 and parameters: {'classifier': 'Logistic Regression', 'C': 684.4407556786003, 

Number of finished trials:  100
Best trial:
  Value:  0.9568831168831169
  Params: 
    classifier: XGBoost
    n_estimators: 160
    learning_rate: 0.013307282015798286
    max_depth: 3
    colsample_bytree: 0.6853845908833635


Parameters: { "use_label_encoder" } are not used.


Processing files:  50%|█████     | 135/270 [18:20<18:20,  8.16s/it][A

Iteration 1, Accuracy: 0.9568345323741008, Improvement: 0.9568345323741008
Model saved at /content/exports/modelization/testing_data/constant_0/Isolation Forest/onehot/TARGET/best_XGBoost_model.pkl



Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encod

In [17]:
test_1 = pd.read_csv("./exports/manual_check_patch/testing_data/bfill/DBSCAN/ordinal/application_train.csv")

In [18]:
test_1["TARGET"].head()

0    0
1    1
2    0
3    0
4    0
Name: TARGET, dtype: int64

# Évaluation des Modèles

After training the models, the evaluate_models function is used to evaluate the performance of each model. This function calculates several performance metrics and also computes the business score based on the given formula.

Business Score Calculation
The business score is calculated using the following formula:

Business Score
=
(
Benefit
×
PPV
)
−
(
Cost
×
False Positive Rate
)
Business Score=(Benefit×PPV)−(Cost×False Positive Rate)

Where:

Benefit: The benefit obtained from a correct prediction.
PPV (Positive Predictive Value): 
TP
TP
+
FP
TP+FP
TP
​
 , where TP is the number of true positives and FP is the number of false positives.
Cost: The cost associated with an incorrect prediction.
False Positive Rate: 
FP
Total Negatives
Total Negatives
FP
​


In [53]:
# Assurez-vous que les fonctions plot_roc_curve et plot_confusion_matrix existent
def plot_roc_curve(y_test, y_pred_proba, model_name, output_dir):
    from sklearn.metrics import RocCurveDisplay
    RocCurveDisplay.from_predictions(y_test, y_pred_proba)
    plt.title(f'ROC Curve - {model_name}')
    plt.savefig(os.path.join(output_dir, f'roc_curve_{model_name}.png'))
    plt.close()

def plot_confusion_matrix(y_test, y_pred, model_name, output_dir):
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.savefig(os.path.join(output_dir, f'confusion_matrix_{model_name}.png'))
    plt.close()

def evaluate_models(base_path, output_folder, target_columns, chunk_size=1000, benefit=1.0, cost=1.0):
    # Créer le dossier output_folder s'il n'existe pas
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    print(f"Output folder: {output_folder}")
    performance_metrics = []

    # Traverse base_path and find .pkl files
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('.pkl'):
                model_path = os.path.join(root, file)
                model_name = file.split('_')[1] if len(file.split('_')) > 1 else "unknown_model"
                target_column = next((col for col in target_columns if col in root), None)

                if target_column:
                    print(f"Analyzing model: {model_name} for target: {target_column}")
                    try:
                        # Charger le modèle et les données de test
                        best_pipeline = joblib.load(model_path)
                        test_file_path = os.path.join(os.path.dirname(model_path), 'application_test.csv')
                        
                        if not os.path.exists(test_file_path):
                            print(f"Test file not found: {test_file_path}")
                            continue
                        
                        test_data = pd.read_csv(test_file_path, chunksize=chunk_size)
                        for chunk in test_data:
                            if target_column not in chunk.columns:
                                print(f"Target column {target_column} not in chunk columns")
                                continue

                            X_test = chunk.drop(target_column, axis=1)
                            y_test = chunk[target_column]

                            # Prédictions
                            y_pred = best_pipeline.predict(X_test)
                            y_pred_proba = best_pipeline.predict_proba(X_test)[:, 1]

                            # Calcul des métriques de performance
                            accuracy = accuracy_score(y_test, y_pred)
                            precision = precision_score(y_test, y_pred)
                            recall = recall_score(y_test, y_pred)
                            f1 = f1_score(y_test, y_pred)
                            roc_auc = roc_auc_score(y_test, y_pred_proba)

                            # Matrice de confusion
                            cm = confusion_matrix(y_test, y_pred)
                            TP = cm[1, 1]
                            FP = cm[0, 1]
                            TN = cm[0, 0]
                            FN = cm[1, 0]

                            # Calcul du VPP et Taux de faux positifs
                            VPP = TP / (TP + FP) if (TP + FP) > 0 else 0
                            taux_fp = FP / (FP + TN) if (FP + TN) > 0 else 0

                            # Calcul du Score Métier
                            score_metier = (benefit * VPP) - (cost * taux_fp)

                            performance_metrics.append({
                                'model': model_name,
                                'target': target_column,
                                'accuracy': accuracy,
                                'precision': precision,
                                'recall': recall,
                                'f1_score': f1,
                                'roc_auc': roc_auc,
                                'score_metier': score_metier
                            })

                            print(f"Accuracy: {accuracy}")
                            print(f"Precision: {precision}")
                            print(f"Recall: {recall}")
                            print(f"F1 Score: {f1}")
                            print(f"ROC AUC: {roc_auc}")
                            print(f"Score Métier: {score_metier}")

                            # Visualisation des résultats
                            plot_roc_curve(y_test, y_pred_proba, model_name, root)
                            plot_confusion_matrix(y_test, y_pred, model_name, root)
                    except Exception as e:
                        print(f"Error processing model {model_path}: {e}")

    if performance_metrics:
        # Convertir les métriques de performance en DataFrame
        performance_df = pd.DataFrame(performance_metrics)
        performance_df.to_csv(os.path.join(output_folder, 'model_performance_metrics.csv'), index=False)
        print("Performance metrics exported.")
        return performance_df
    else:
        print("No performance metrics to export.")
        return pd.DataFrame()

In [54]:
evaluate_models(
    base_path=LOCAL_EXPORT_MODELIZATION_FOLDER_PATH, 
    output_folder=LOCAL_EXPORT_MODELIZATION_EVALUATION_RESULT_FOLDER_PATH, 
    target_columns=["TARGET"], 
    chunk_size=GENERAL_CHUNK_SIZE, 
    benefit=1.0, 
    cost=1.0)

Output folder: /content/exports/modelization_evaluation
Analyzing model: Logistic Regression for target: TARGET
Test file not found: /content/exports/modelization/testing_data/mean/LOF/ordinal/TARGET/application_test.csv
Analyzing model: Logistic Regression for target: TARGET
Test file not found: /content/exports/modelization/testing_data/mean/LOF/label/TARGET/application_test.csv
Analyzing model: Logistic Regression for target: TARGET
Test file not found: /content/exports/modelization/testing_data/mean/LOF/onehot/TARGET/application_test.csv
Analyzing model: Random Forest for target: TARGET
Test file not found: /content/exports/modelization/testing_data/mean/DBSCAN/ordinal/TARGET/application_test.csv
Analyzing model: Random Forest for target: TARGET
Test file not found: /content/exports/modelization/testing_data/mean/DBSCAN/label/TARGET/application_test.csv
Analyzing model: Gradient Boosting for target: TARGET
Test file not found: /content/exports/modelization/testing_data/mean/DBSCAN/

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

# Sélection du Modèle Final et Hyperparameter Tuning

After evaluating the models, the best performing model can be selected and further tuned using hyperparameter optimization.

def select_and_tune_best_model(performance_df, base_path, output_folder, target_column):
    best_model_row = performance_df.loc[performance_df['roc_auc'].idxmax()]
    best_model_name = best_model_row['model']
    print(f"Best model: {best_model_name} with ROC AUC: {best_model_row['roc_auc']}")

    model_path = os.path.join(output_folder, best_model_name, target_column, f'best_{best_model_name}_model.pkl')
    best_pipeline = joblib.load(model_path)

    # Tuning des hyperparamètres du meilleur modèle avec Optuna
    X_train = pd.read_csv(os.path.join(base_path, 'application_train.csv')).drop(target_column, axis=1)
    y_train = pd.read_csv(os.path.join(base_path, 'application_train.csv'))[target_column]

    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)

    print('Number of finished trials: ', len(study.trials))
    print('Best trial:')
    trial = study.best_trial

    print('  Value: ', trial.value)
    print('  Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    best_classifier = models[best_model_name]['model']
    best_params = {k: v for k, v in trial.params.items() if k != 'classifier'}
    best_classifier.set_params(**best_params)

    best_pipeline = Pipeline(steps=[
        ('classifier', best_classifier)
    ])

    best_pipeline.fit(X_train, y_train)
    joblib.dump(best_pipeline, model_path)
    print(f"Model saved at {model_path}")

    return best_pipeline

# Interprétabilité et Explicabilité
Using SHAP to explain the predictions of the model.

In [None]:
def explain_model(best_pipeline, X_train, output_folder):
    explainer = shap.TreeExplainer(best_pipeline.named_steps['classifier'])
    shap_values = explainer.shap_values(X_train)

    shap.summary_plot(shap_values, X_train)
    plt.savefig(os.path.join(output_folder, 'shap_summary_plot.png'))
    plt.close()

    shap.initjs()
    shap.force_plot(explainer.expected_value, shap_values[0,:], X_train.iloc[0,:])
    plt.savefig(os.path.join(output_folder, 'shap_force_plot.png'))
    plt.close()

In [None]:
# Summary

In [None]:
def summarize_results(performance_df):
    best_model_row = performance_df.loc[performance_df['roc_auc'].idxmax()]
    best_model_name = best_model_row['model']
    summary = f"Best model: {best_model_name}\n"
    summary += f"Accuracy: {best_model_row['accuracy']}\n"
    summary += f"Precision: {best_model_row['precision']}\n"
    summary += f"Recall: {best_model_row['recall']}\n"
    summary += f"F1 Score: {best_model_row['f1_score']}\n"
    summary += f"ROC AUC: {best_model_row['roc_auc']}\n"
    summary += f"Business Score: {best_model_row['business_score']}\n"
    
    print(summary)

    with open(os.path.join(output_folder, 'model_summary.txt'), 'w') as f:
        f.write(summary)

    print("Summary saved.")