# Imports

In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report, roc_curve, auc
import joblib
import optuna
import mlflow
import matplotlib.pyplot as plt
import seaborn as sns
import shap
from tqdm import tqdm

Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


# Settings

In [2]:
# Exports
LOCAL_EXPORT_FOLDER_PATH='/content/exports'
# Exports > Manual check path 
LOCAL_EXPORT_MANUAL_CHECK_PATCH_FOLDER_PATH=LOCAL_EXPORT_FOLDER_PATH+'/manual_check_patch'
TARGET_COLUMNS=['TARGET',]
LOCAL_EXPORT_MODELIZATION_FOLDER_PATH=LOCAL_EXPORT_FOLDER_PATH+'/modelization'
MLFLOW_EXPERIMENT_NAME = 'generic_model_experiment'
LOCAL_EXPORT_MODELIZATION_EVALUATION_RESULT_FOLDER_PATH=LOCAL_EXPORT_FOLDER_PATH+'/modelization_evaluation'

# Export > General Settings
TESTING_MODE=True
TESTING_MODE_MAX_LINES=1000
TESTING_MODE_SUB_FOLDER_NAME='testing_data'
GENERAL_CHUNK_SIZE=100000


In [3]:
def display_head_of_files(base_path, file_extension='csv', chunk_size=1000):
    """
    Parcours tous les fichiers dans le répertoire donné et affiche les premières lignes de chaque fichier CSV.

    Args:
        base_path (str): Le chemin du répertoire de base où se trouvent les fichiers.
        file_extension (str): L'extension des fichiers à traiter (par défaut 'csv').
        chunk_size (int): Taille des chunks pour lire les fichiers partiellement.

    Returns:
        None
    """
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith(file_extension):
                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                
                # Lire les premières lignes du fichier CSV
                for chunk in pd.read_csv(file_path, chunksize=chunk_size):
                    print(chunk.head())
                    break  # On ne lit qu'un seul chunk pour obtenir le head
                print("\n" + "="*80 + "\n")

# Exemple d'utilisation

In [4]:
display_head_of_files(base_path=LOCAL_EXPORT_MANUAL_CHECK_PATCH_FOLDER_PATH)

Processing file: /content/exports/manual_check_patch/testing_data/mean/LOF/ordinal/application_test.csv
   AMT_INCOME_TOTAL  NONLIVINGAPARTMENTS_MODE  FLAG_DOCUMENT_9  \
0            180000                  0.007361                0   
1            180000                  0.007361                0   
2            166500                  0.007361                0   
3             67500                  0.007361                0   
4            247500                  0.007361                0   

   TOTAL_CREDIT_BUREAU_REQUESTS  FLAG_EMAIL  HOUSETYPE_MODE  \
0                             0           0             0.0   
1                             1           0             0.0   
2                             2           0             0.0   
3                             0           0             0.0   
4                             4           0             0.0   

   LIVINGAPARTMENTS_MODE  FLAG_CONT_MOBILE  REGION_RATING_CLIENT  \
0                0.10255                 1          

# Models

In [5]:
# Liste des modèles et leurs hyperparamètres
models = {
    'Logistic Regression': {
        'model': LogisticRegression(),
        'params': {
            'C': np.logspace(-3, 3, 7),
            'solver': ['newton-cg', 'lbfgs', 'liblinear']
        }
    },
    'Random Forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [100, 200, 300],
            'max_depth': [10, 20, 30],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
    },
    # 'Gradient Boosting': {
    #     'model': GradientBoostingClassifier(),
    #     'params': {
    #         'n_estimators': [100, 200, 300],
    #         'learning_rate': [0.01, 0.05, 0.1],
    #         'max_depth': [3, 4, 5],
    #         'subsample': [0.8, 0.9, 1.0]
    #     }
    # },
    # 'XGBoost': {
    #     'model': xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    #     'params': {
    #         'n_estimators': [100, 200, 300],
    #         'learning_rate': [0.01, 0.05, 0.1],
    #         'max_depth': [3, 4, 5],
    #         'colsample_bytree': [0.3, 0.7]
    #     }
    # },
    # 'LightGBM': {
    #     'model': lgb.LGBMClassifier(),
    #     'params': {
    #         'n_estimators': [100, 200, 300],
    #         'learning_rate': [0.01, 0.05, 0.1],
    #         'num_leaves': [31, 62, 127],
    #         'boosting_type': ['gbdt', 'dart']
    #     }
    # },
    # 'CatBoost': {
    #     'model': cb.CatBoostClassifier(verbose=0),
    #     'params': {
    #         'iterations': [100, 200, 300],
    #         'learning_rate': [0.01, 0.05, 0.1],
    #         'depth': [3, 4, 5],
    #         'l2_leaf_reg': [3, 5, 7]
    #     }
    # },
    # 'SVM': {
    #     'model': SVC(probability=True),
    #     'params': {
    #         'C': np.logspace(-3, 3, 7),
    #         'kernel': ['linear', 'rbf', 'poly'],
    #         'degree': [3, 4, 5]
    #     }
    # },
    # 'KNN': {
    #     'model': KNeighborsClassifier(),
    #     'params': {
    #         'n_neighbors': [5, 10, 20],
    #         'weights': ['uniform', 'distance'],
    #         'metric': ['euclidean', 'manhattan']
    #     }
    # },
    # 'Neural Network': {
    #     'model': MLPClassifier(max_iter=500),
    #     'params': {
    #         'hidden_layer_sizes': ['50,50', '100', '100,50'],
    #         'activation': ['tanh', 'relu'],
    #         'alpha': [0.0001, 0.001, 0.01]
    #     }
    # }
}

# Entrainment

## Hyperparameter optimization method

In [6]:
# Fonction d'optimisation des hyperparamètres avec Optuna
def objective(trial, X_train, y_train):
    classifier_name = trial.suggest_categorical('classifier', list(models.keys()))
    classifier_info = models[classifier_name]
    classifier = classifier_info['model']
    params = classifier_info['params']

    trial_params = {}
    for param, values in params.items():
        if param == 'hidden_layer_sizes':
            hidden_layer_size_str = trial.suggest_categorical(param, values)
            trial_params[param] = tuple(map(int, hidden_layer_size_str.split(',')))
        elif isinstance(values[0], int):
            trial_params[param] = trial.suggest_int(param, min(values), max(values))
        elif isinstance(values[0], float):
            trial_params[param] = trial.suggest_float(param, min(values), max(values))
        else:
            trial_params[param] = trial.suggest_categorical(param, values)
    
    classifier.set_params(**trial_params)
    
    pipeline = Pipeline(steps=[
        ('classifier', classifier)
    ])
    
    score = cross_val_score(pipeline, X_train, y_train, n_jobs=-1, cv=5, scoring='accuracy', error_score='raise')
    accuracy = score.mean()
    return accuracy

## Optuna optimize hyperameters

In [7]:
# Fonction d'optimisation des hyperparamètres avec Optuna
def objective(trial, X_train, y_train):
    classifier_name = trial.suggest_categorical('classifier', list(models.keys()))
    classifier_info = models[classifier_name]
    classifier = classifier_info['model']
    params = classifier_info['params']

    trial_params = {}
    for param, values in params.items():
        if param == 'hidden_layer_sizes':
            hidden_layer_size_str = trial.suggest_categorical(param, values)
            trial_params[param] = tuple(map(int, hidden_layer_size_str.split(',')))
        elif isinstance(values[0], int):
            trial_params[param] = trial.suggest_int(param, min(values), max(values))
        elif isinstance(values[0], float):
            trial_params[param] = trial.suggest_float(param, min(values), max(values))
        else:
            trial_params[param] = trial.suggest_categorical(param, values)
    
    classifier.set_params(**trial_params)
    
    pipeline = Pipeline(steps=[
        ('classifier', classifier)
    ])
    
    score = cross_val_score(pipeline, X_train, y_train, n_jobs=-1, cv=5, scoring='accuracy', error_score='raise')
    accuracy = score.mean()
    return accuracy

# Re-entrainment method for best increase

In [8]:
# Fonction pour ré-entraîner un modèle jusqu'à atteindre une amélioration significative
def retrain_model(best_pipeline, X_train, y_train, X_test, y_test, threshold=0.01, max_iter=10):
    previous_score = 0
    for iteration in range(max_iter):
        best_pipeline.fit(X_train, y_train)
        y_pred = best_pipeline.predict(X_test)
        current_score = accuracy_score(y_test, y_pred)
        improvement = current_score - previous_score
        if improvement < threshold:
            break
        previous_score = current_score
        print(f"Iteration {iteration + 1}, Accuracy: {current_score}, Improvement: {improvement}")
    return best_pipeline, current_score

## Principal to entrain model

In [None]:
# Fonction d'optimisation des hyperparamètres avec Optuna
def objective(trial, X_train, y_train):
    classifier_name = trial.suggest_categorical('classifier', list(models.keys()))
    classifier_info = models[classifier_name]
    classifier = classifier_info['model']
    params = classifier_info['params']

    trial_params = {}
    for param, values in params.items():
        if param == 'hidden_layer_sizes':
            hidden_layer_size_str = trial.suggest_categorical(param, values)
            trial_params[param] = tuple(map(int, hidden_layer_size_str.split(',')))
        elif isinstance(values[0], int):
            trial_params[param] = trial.suggest_int(param, min(values), max(values))
        elif isinstance(values[0], float):
            trial_params[param] = trial.suggest_float(param, min(values), max(values))
        else:
            trial_params[param] = trial.suggest_categorical(param, values)
    
    classifier.set_params(**trial_params)
    
    pipeline = Pipeline(steps=[
        ('classifier', classifier)
    ])
    
    score = cross_val_score(pipeline, X_train, y_train, n_jobs=-1, cv=5, scoring='accuracy', error_score='raise')
    accuracy = score.mean()
    return accuracy

# Fonction pour ré-entraîner un modèle jusqu'à atteindre une amélioration significative
def retrain_model(best_pipeline, X_train, y_train, threshold=0.01, max_iter=10):
    previous_score = 0
    for iteration in range(max_iter):
        best_pipeline.fit(X_train, y_train)
        y_pred = best_pipeline.predict(X_train)
        current_score = accuracy_score(y_train, y_pred)
        improvement = current_score - previous_score
        if improvement < threshold:
            break
        previous_score = current_score
        print(f"Iteration {iteration + 1}, Accuracy: {current_score}, Improvement: {improvement}")
    return best_pipeline, current_score

def train_and_evaluate_models(base_path, output_folder, target_columns, max_features=5, testing=False, chunk_size=1000, testing_sub_path_name='test'):
    all_scores = {}
    total_files = sum([len(files) for r, d, files in os.walk(base_path) if any(f.endswith('application_train.csv') for f in files)])
    pbar = tqdm(total=total_files, desc="Processing files")

    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('application_train.csv'):
                file_path = os.path.join(root, file)
                test_file_path = file_path.replace('application_train.csv', 'application_test.csv')

                print(f"Processing file: {file_path}")

                # Lire les fichiers CSV par chunks
                for chunk in pd.read_csv(file_path, chunksize=chunk_size):
                    test_data = pd.read_csv(test_file_path)

                    for target_column in target_columns:
                        print(f"Using target column: {target_column}")

                        # Calculer les corrélations et sélectionner les meilleures caractéristiques
                        correlations = chunk.corr()[target_column].abs().sort_values(ascending=False)
                        top_features = correlations.index[1:max_features+1].tolist()

                        # Séparation des features et de la cible
                        X_train = chunk[top_features]
                        y_train = chunk[target_column]

                        X_test = test_data[top_features]
                        y_test = test_data[target_column] if target_column in test_data.columns else None

                        # Vérifier le nombre de classes dans y_train
                        if y_train.nunique() < 2:
                            print(f"Skipping optimization for {target_column} as it contains only one class in the training data.")
                            continue

                        # Optimisation des hyperparamètres avec Optuna
                        study = optuna.create_study(direction='maximize')
                        study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)

                        print('Number of finished trials: ', len(study.trials))
                        print('Best trial:')
                        trial = study.best_trial

                        print('  Value: ', trial.value)
                        print('  Params: ')
                        for key, value in trial.params.items():
                            print('    {}: {}'.format(key, value))

                        # Entraîner le meilleur modèle
                        best_classifier_name = trial.params['classifier']
                        best_classifier_info = models[best_classifier_name]
                        best_classifier = best_classifier_info['model']
                        best_params = {k: v for k, v in trial.params.items() if k != 'classifier'}

                        best_classifier.set_params(**best_params)

                        # Création du pipeline avec le meilleur modèle
                        best_pipeline = Pipeline(steps=[
                            ('classifier', best_classifier)
                        ])

                        # Retrain the model with the entire training dataset
                        best_pipeline, _ = retrain_model(best_pipeline, X_train, y_train)

                        # Déterminer le chemin de sortie
                        relative_path = os.path.relpath(root, base_path)
                        
                        output_dir = os.path.join(output_folder, relative_path, target_column)

                        if not os.path.exists(output_dir):
                            os.makedirs(output_dir)

                        model_path = os.path.join(output_dir, f'best_{best_classifier_name}_model.pkl')
                        joblib.dump(best_pipeline, model_path)

                        if y_test is not None:
                            # Évaluation du modèle
                            y_pred = best_pipeline.predict(X_test)
                            y_pred_proba = best_pipeline.predict_proba(X_test)[:, 1]  # Only use the probability for the positive class
                            accuracy = accuracy_score(y_test, y_pred)
                            roc_auc = roc_auc_score(y_test, y_pred_proba)

                            print(f"Accuracy: {accuracy}")
                            print(f"ROC AUC: {roc_auc}")
                            print(confusion_matrix(y_test, y_pred))
                            print(classification_report(y_test, y_pred))

                            # Logging avec mlflow
                            mlflow.set_experiment('credit_scoring')
                            with mlflow.start_run():
                                mlflow.log_params(trial.params)
                                mlflow.log_metric('accuracy', accuracy)
                                mlflow.log_metric('roc_auc', roc_auc)
                                mlflow.sklearn.log_model(best_pipeline, 'model')
                                mlflow.log_artifact(file_path)
                                mlflow.log_artifact(test_file_path)

                        print(f'Model saved at {model_path}')
                
                pbar.update(1)

    pbar.close()

    # Visualisation des scores de validation croisée
    model_names = list(all_scores.keys())
    model_scores = [score for scores in all_scores.values() for score in scores]
    model_names_repeated = [model for model in model_names for _ in range(len(all_scores[model]))]

    plot_cross_val_scores(model_scores, model_names_repeated, output_folder)

# Fonction de visualisation pour les scores de validation croisée
def plot_cross_val_scores(model_scores, model_names, output_dir):
    plt.figure(figsize=(12, 8))
    sns.boxplot(x=model_names, y=model_scores)
    plt.xlabel('Model')
    plt.ylabel('Cross-Validation Score')
    plt.title('Model Comparison - Cross-Validation Scores')
    plt.xticks(rotation=45)
    plt.savefig(os.path.join(output_dir, 'cross_val_scores.png'))
    plt.close()

## Calling modelization method

In [None]:
train_and_evaluate_models(
    base_path=LOCAL_EXPORT_MANUAL_CHECK_PATCH_FOLDER_PATH, 
    output_folder=LOCAL_EXPORT_MODELIZATION_FOLDER_PATH, 
    target_columns=TARGET_COLUMNS, 
    testing=TESTING_MODE, 
    chunk_size=GENERAL_CHUNK_SIZE, 
    testing_sub_path_name=TESTING_MODE_SUB_FOLDER_NAME)

Processing files:   0%|          | 0/270 [00:00<?, ?it/s][I 2024-07-08 13:42:49,918] A new study created in memory with name: no-name-9bd4d80a-de25-46d8-a5ed-ef3515f65ee6


Processing file: /content/exports/manual_check_patch/testing_data/mean/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:42:50,974] Trial 0 finished with value: 0.909090909090909 and parameters: {'classifier': 'Random Forest', 'n_estimators': 182, 'max_depth': 23, 'min_samples_split': 5, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.909090909090909.
[I 2024-07-08 13:42:51,529] Trial 1 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 875.8042162447258, 'solver': 'liblinear'}. Best is trial 1 with value: 0.9181818181818182.
[I 2024-07-08 13:42:52,103] Trial 2 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 441.2152889093382, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9181818181818182.
[I 2024-07-08 13:42:52,543] Trial 3 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 621.0754984500451, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.9181818181818182.
[I 2024-07-08 13:42:52,611] Trial 4 finished with value: 0.91818181818

Number of finished trials:  100
Best trial:
  Value:  0.9227272727272726
  Params: 
    classifier: Logistic Regression
    C: 26.983505462631456
    solver: newton-cg
Iteration 1, Accuracy: 0.9272727272727272, Improvement: 0.9272727272727272
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/LOF/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/LOF/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:43:01,426] Trial 2 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Random Forest', 'n_estimators': 197, 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:43:01,451] Trial 3 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 103.41640540004624, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:43:01,507] Trial 4 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 355.3519147165943, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:43:01,533] Trial 5 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 449.7592311633252, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:43:01,558] Trial 6 finished with value: 0.91818181818

Number of finished trials:  100
Best trial:
  Value:  0.9272727272727274
  Params: 
    classifier: Logistic Regression
    C: 9.445575921495674
    solver: newton-cg
Iteration 1, Accuracy: 0.9272727272727272, Improvement: 0.9272727272727272
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/LOF/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/LOF/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:43:09,685] Trial 0 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Random Forest', 'n_estimators': 257, 'max_depth': 11, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:43:09,893] Trial 1 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Random Forest', 'n_estimators': 170, 'max_depth': 19, 'min_samples_split': 8, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:43:10,050] Trial 2 finished with value: 0.909090909090909 and parameters: {'classifier': 'Random Forest', 'n_estimators': 117, 'max_depth': 29, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:43:10,238] Trial 3 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Random Forest', 'n_estimators': 148, 'max_depth': 21, 'min_samples_split': 8, 'min_samples_leaf': 4}. Best is trial 0 w

Number of finished trials:  100
Best trial:
  Value:  0.9181818181818182
  Params: 
    classifier: Random Forest
    n_estimators: 257
    max_depth: 11
    min_samples_split: 9
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9181818181818182, Improvement: 0.9181818181818182


Processing files:   1%|          | 3/270 [00:37<58:56, 13.24s/it]

[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/LOF/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/LOF/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/Z-Score/onehot/application_train.csv
Using target column: TARGET


Processing files:   2%|▏         | 6/270 [00:37<19:50,  4.51s/it][I 2024-07-08 13:43:27,269] A new study created in memory with name: no-name-485a66fd-2d0c-4aa3-b244-61bbeb085837
The least populated class in y has only 4 members, which is less than n_splits=5.


Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/MAD/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/MAD/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/MAD/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/DBSCAN/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:43:27,519] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 219, 'max_depth': 18, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:43:27,547] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 520.2820257447038, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:43:27,574] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 412.22830767282295, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:43:27,925] Trial 3 finishe

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 300
    max_depth: 22
    min_samples_split: 2
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667


Processing files:   4%|▎         | 10/270 [01:00<22:50,  5.27s/it][I 2024-07-08 13:43:50,745] A new study created in memory with name: no-name-23411dc5-259a-4fc1-9a19-1ded31d1dffa
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:43:50,771] Trial 0 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 447.64388087079226, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:43:50,797] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 886.1476059062028, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.6666666666666666.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/DBSCAN/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/DBSCAN/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:43:51,086] Trial 2 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 255, 'max_depth': 13, 'min_samples_split': 5, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:43:51,255] Trial 3 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 143, 'max_depth': 20, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:43:51,281] Trial 4 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 587.3813974501451, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.7999999999999999.
The least popula

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 290
    max_depth: 23
    min_samples_split: 4
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667


Processing files:   4%|▍         | 11/270 [01:26<37:35,  8.71s/it][I 2024-07-08 13:44:16,705] A new study created in memory with name: no-name-4af33930-a046-4110-b964-5c4d3520d761
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:44:16,731] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 935.9033951054794, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.7333333333333332.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/DBSCAN/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/DBSCAN/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:44:17,001] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 235, 'max_depth': 11, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:44:17,130] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 102, 'max_depth': 27, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:44:17,146] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 974.6525530867623, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7333333333333332.
The least po

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 122
    max_depth: 24
    min_samples_split: 3
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333


Processing files:   4%|▍         | 12/270 [01:43<43:59, 10.23s/it][I 2024-07-08 13:44:33,406] A new study created in memory with name: no-name-acf74e16-075f-4264-9e57-2646a70e824f


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/DBSCAN/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/mean/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports

[I 2024-07-08 13:44:33,534] Trial 0 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 102, 'max_depth': 22, 'min_samples_split': 7, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:44:33,782] Trial 1 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 212, 'max_depth': 30, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:44:33,797] Trial 2 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Logistic Regression', 'C': 650.726569891483, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:44:33,825] Trial 3 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Logistic Regression', 'C': 286.87623813422454, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:44:34,013] Tria

Number of finished trials:  100
Best trial:
  Value:  0.9537593984962406
  Params: 
    classifier: Random Forest
    n_estimators: 102
    max_depth: 22
    min_samples_split: 7
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9537366548042705, Improvement: 0.9537366548042705


Processing files:   6%|▌         | 16/270 [01:57<28:32,  6.74s/it][I 2024-07-08 13:44:47,599] A new study created in memory with name: no-name-1b5657f8-7a34-4d77-a160-e408e8b6c334


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/Isolation Forest/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:44:47,808] Trial 0 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 169, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:44:48,036] Trial 1 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 201, 'max_depth': 27, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:44:48,245] Trial 2 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 161, 'max_depth': 18, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:44:48,525] Trial 3 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 255, 'max_depth': 19, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 0 

Number of finished trials:  100
Best trial:
  Value:  0.9537593984962406
  Params: 
    classifier: Random Forest
    n_estimators: 169
    max_depth: 25
    min_samples_split: 8
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9537366548042705, Improvement: 0.9537366548042705


Processing files:   6%|▋         | 17/270 [02:16<36:27,  8.65s/it][I 2024-07-08 13:45:06,460] A new study created in memory with name: no-name-eab79460-84f9-4c27-b12a-b6956d0d5574


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/Isolation Forest/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/mean/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:45:06,699] Trial 0 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 201, 'max_depth': 16, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:45:06,724] Trial 1 finished with value: 0.9501879699248119 and parameters: {'classifier': 'Logistic Regression', 'C': 759.9452226174272, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:45:06,842] Trial 2 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 104, 'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9537593984962406.
[I 2024-07-08 13:45:07,172] Trial 3 finished with value: 0.9537593984962406 and parameters: {'classifier': 'Random Forest', 'n_estimators': 287, 'max_depth': 29, 'min_samples_split': 10, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9537593984962406.
[I 

Number of finished trials:  100
Best trial:
  Value:  0.9537593984962406
  Params: 
    classifier: Random Forest
    n_estimators: 201
    max_depth: 16
    min_samples_split: 5
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9537366548042705, Improvement: 0.9537366548042705


Processing files:   7%|▋         | 18/270 [02:39<47:05, 11.21s/it][I 2024-07-08 13:45:29,072] A new study created in memory with name: no-name-a5807cec-55ce-4a59-8dd3-d1d0fd3617e8
[I 2024-07-08 13:45:29,088] Trial 0 finished with value: 0.9359999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 805.8773276077034, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9359999999999999.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/mean/Isolation Forest/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/mean/Isolation Forest/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:45:29,336] Trial 1 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 211, 'max_depth': 23, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.952.
[I 2024-07-08 13:45:29,605] Trial 2 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 241, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.952.
[I 2024-07-08 13:45:29,630] Trial 3 finished with value: 0.9359999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 437.7005437656337, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.952.
[I 2024-07-08 13:45:29,657] Trial 4 finished with value: 0.9359999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 172.83390117628278, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.952.
[I 2024-07-08 13:45:29,682] Trial 5 finished with value: 0.9359999999999999 and parameters: {'classifier': '

Number of finished trials:  100
Best trial:
  Value:  0.952
  Params: 
    classifier: Random Forest
    n_estimators: 211
    max_depth: 23
    min_samples_split: 4
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.96, Improvement: 0.96


Processing files:   7%|▋         | 19/270 [03:00<55:37, 13.30s/it][I 2024-07-08 13:45:50,314] A new study created in memory with name: no-name-56d1f3d0-3d0f-4b46-a4fd-5ff2099cf5fe


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/LOF/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/LOF/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:45:50,573] Trial 0 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 217, 'max_depth': 15, 'min_samples_split': 3, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.952.
[I 2024-07-08 13:45:50,589] Trial 1 finished with value: 0.9359999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 147.364206566457, 'solver': 'liblinear'}. Best is trial 0 with value: 0.952.
[I 2024-07-08 13:45:50,604] Trial 2 finished with value: 0.9359999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 161.97673378405068, 'solver': 'liblinear'}. Best is trial 0 with value: 0.952.
[I 2024-07-08 13:45:50,813] Trial 3 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 176, 'max_depth': 27, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.952.
[I 2024-07-08 13:45:51,021] Trial 4 finished with value: 0.952 and parameters: {'classifier': 'Random Forest',

Number of finished trials:  100
Best trial:
  Value:  0.952
  Params: 
    classifier: Random Forest
    n_estimators: 217
    max_depth: 15
    min_samples_split: 3
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.952, Improvement: 0.952


Processing files:   7%|▋         | 20/270 [03:22<1:03:58, 15.36s/it][I 2024-07-08 13:46:12,616] A new study created in memory with name: no-name-c9365b85-d59e-45a3-bf73-aca2eaadbce9


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/LOF/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/LOF/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:46:12,774] Trial 0 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 129, 'max_depth': 26, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.952.
[I 2024-07-08 13:46:13,084] Trial 1 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 290, 'max_depth': 10, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.952.
[I 2024-07-08 13:46:13,099] Trial 2 finished with value: 0.952 and parameters: {'classifier': 'Logistic Regression', 'C': 473.51968715368, 'solver': 'liblinear'}. Best is trial 0 with value: 0.952.
[I 2024-07-08 13:46:13,268] Trial 3 finished with value: 0.952 and parameters: {'classifier': 'Random Forest', 'n_estimators': 138, 'max_depth': 27, 'min_samples_split': 8, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.952.
[I 2024-07-08 13:46:13,294] Trial 4 finished with value: 0.952 and parameters: {'classifier': 'Logistic Re

Number of finished trials:  100
Best trial:
  Value:  0.968
  Params: 
    classifier: Logistic Regression
    C: 10.710571988484276
    solver: liblinear
Iteration 1, Accuracy: 0.976, Improvement: 0.976
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/LOF/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/LOF/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/

[I 2024-07-08 13:46:28,155] Trial 1 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 51.26144560655317, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:46:28,181] Trial 2 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 67.93097708910399, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:46:28,461] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 253, 'max_depth': 19, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:46:28,477] Trial 4 finishe

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Logistic Regression
    C: 51.26144560655317
    solver: newton-cg
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/DBSCAN/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/DBSCAN/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:46:43,932] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 239, 'max_depth': 14, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:46:43,948] Trial 2 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 205.8629138649502, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:46:44,198] Trial 3 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 194, 'max_depth': 17, 'min_samples_split': 3, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7999999999999999.
The least po

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 180
    max_depth: 24
    min_samples_split: 4
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333


Processing files:  10%|▉         | 26/270 [04:13<46:28, 11.43s/it][I 2024-07-08 13:47:03,435] A new study created in memory with name: no-name-2334114f-8a73-4be2-aece-12664105896a
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:47:03,452] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 215.22382440805296, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7333333333333332.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/DBSCAN/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/DBSCAN/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:47:03,713] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 229, 'max_depth': 25, 'min_samples_split': 10, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:47:03,729] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 125.3753262261714, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:47:03,746] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 37.12000345832179, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 membe

Number of finished trials:  100
Best trial:
  Value:  0.9333333333333332
  Params: 
    classifier: Random Forest
    n_estimators: 200
    max_depth: 25
    min_samples_split: 8
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333


Processing files:  10%|█         | 27/270 [04:36<57:39, 14.24s/it][I 2024-07-08 13:47:26,696] A new study created in memory with name: no-name-d0e12e90-d1c6-444b-8a6d-83243ed04d0b
[I 2024-07-08 13:47:26,712] Trial 0 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Logistic Regression', 'C': 433.29344510634127, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9547619047619047.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/DBSCAN/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/bfill/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/ex

[I 2024-07-08 13:47:26,921] Trial 1 finished with value: 0.958843537414966 and parameters: {'classifier': 'Random Forest', 'n_estimators': 162, 'max_depth': 27, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.958843537414966.
[I 2024-07-08 13:47:27,099] Trial 2 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 129, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.958843537414966.
[I 2024-07-08 13:47:27,289] Trial 3 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 152, 'max_depth': 22, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.958843537414966.
[I 2024-07-08 13:47:27,630] Trial 4 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 279, 'max_depth': 26, 'min_samples_split': 5, 'min_samples_leaf': 3}. Best is trial 1 with

Number of finished trials:  100
Best trial:
  Value:  0.9629251700680272
  Params: 
    classifier: Random Forest
    n_estimators: 104
    max_depth: 17
    min_samples_split: 4
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9711934156378601, Improvement: 0.9711934156378601


Processing files:  11%|█▏        | 31/270 [04:59<37:15,  9.36s/it][I 2024-07-08 13:47:49,749] A new study created in memory with name: no-name-ff91ac0a-d968-4b98-94bd-bb27b3853f7a
[I 2024-07-08 13:47:49,775] Trial 0 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Logistic Regression', 'C': 937.8942384588714, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 13:47:49,800] Trial 1 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Logistic Regression', 'C': 750.3001300725645, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 13:47:49,826] Trial 2 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Logistic Regression', 'C': 375.6505468314587, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 13:47:49,842] Trial 3 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Logistic Regression', 'C': 657.7397183

[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/Isolation Forest/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:47:50,141] Trial 4 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 231, 'max_depth': 14, 'min_samples_split': 7, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 13:47:50,318] Trial 5 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 144, 'max_depth': 30, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 13:47:50,457] Trial 6 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 119, 'max_depth': 27, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9547619047619047.
[I 2024-07-08 13:47:50,726] Trial 7 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 221, 'max_depth': 15, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 0 

Number of finished trials:  100
Best trial:
  Value:  0.9629251700680272
  Params: 
    classifier: Random Forest
    n_estimators: 104
    max_depth: 30
    min_samples_split: 5
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9711934156378601, Improvement: 0.9711934156378601


Processing files:  12%|█▏        | 32/270 [05:17<42:54, 10.82s/it][I 2024-07-08 13:48:07,679] A new study created in memory with name: no-name-197dedf1-420c-4387-ae7e-c73c5e1b1838
[I 2024-07-08 13:48:07,695] Trial 0 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 61.92328241831801, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9505952380952382.
[I 2024-07-08 13:48:07,720] Trial 1 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 112.59747836702678, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9505952380952382.
[I 2024-07-08 13:48:07,737] Trial 2 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 184.3694236210577, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9505952380952382.
[I 2024-07-08 13:48:07,766] Trial 3 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 255.3183596120

[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/Isolation Forest/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/bfill/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:48:07,933] Trial 4 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 134, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 4 with value: 0.9547619047619047.
[I 2024-07-08 13:48:07,959] Trial 5 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 323.0280794828422, 'solver': 'newton-cg'}. Best is trial 4 with value: 0.9547619047619047.
[I 2024-07-08 13:48:08,218] Trial 6 finished with value: 0.9547619047619047 and parameters: {'classifier': 'Random Forest', 'n_estimators': 230, 'max_depth': 18, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 4 with value: 0.9547619047619047.
[I 2024-07-08 13:48:08,234] Trial 7 finished with value: 0.9505952380952382 and parameters: {'classifier': 'Logistic Regression', 'C': 467.1524134725633, 'solver': 'liblinear'}. Best is trial 4 with value: 0.9547619047619047.
[I 2024-07-08 13:48:08,524] Tria

Number of finished trials:  100
Best trial:
  Value:  0.9547619047619047
  Params: 
    classifier: Random Forest
    n_estimators: 134
    max_depth: 10
    min_samples_split: 2
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9547325102880658, Improvement: 0.9547325102880658


Processing files:  12%|█▏        | 33/270 [05:39<51:08, 12.95s/it][I 2024-07-08 13:48:29,335] A new study created in memory with name: no-name-bcb34490-5bb1-49e2-b788-6fe2a328cc44


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/bfill/Isolation Forest/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/bfill/Isolation Forest/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:48:29,483] Trial 0 finished with value: 0.9 and parameters: {'classifier': 'Random Forest', 'n_estimators': 134, 'max_depth': 17, 'min_samples_split': 5, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9.
[I 2024-07-08 13:48:29,833] Trial 1 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 299, 'max_depth': 11, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.9099999999999999.
[I 2024-07-08 13:48:29,859] Trial 2 finished with value: 0.9 and parameters: {'classifier': 'Logistic Regression', 'C': 47.215014335886565, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.9099999999999999.
[I 2024-07-08 13:48:30,189] Trial 3 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 293, 'max_depth': 23, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9099999999999999.
[I 2024-07-08 13:48:30,245] Trial 4 finished wit

Number of finished trials:  100
Best trial:
  Value:  0.9200000000000002
  Params: 
    classifier: Logistic Regression
    C: 621.8953521346266
    solver: liblinear
Iteration 1, Accuracy: 0.91, Improvement: 0.91
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/LOF/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/LOF/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:48:35,703] Trial 4 finished with value: 0.9100000000000001 and parameters: {'classifier': 'Logistic Regression', 'C': 925.2854929210192, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9100000000000001.
[I 2024-07-08 13:48:35,779] Trial 5 finished with value: 0.9100000000000001 and parameters: {'classifier': 'Logistic Regression', 'C': 863.9360686456572, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9100000000000001.
[I 2024-07-08 13:48:35,856] Trial 6 finished with value: 0.9100000000000001 and parameters: {'classifier': 'Logistic Regression', 'C': 758.0229299504498, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9100000000000001.
[I 2024-07-08 13:48:35,973] Trial 7 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 103, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.9100000000000001.
[I 2024-07-08 13:48:36,039] Trial 8 finished with value: 0.91000

Number of finished trials:  100
Best trial:
  Value:  0.9200000000000002
  Params: 
    classifier: Logistic Regression
    C: 663.4431150064177
    solver: lbfgs
Iteration 1, Accuracy: 0.92, Improvement: 0.92
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/LOF/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/LOF/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:48:41,877] Trial 0 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 286, 'max_depth': 17, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9099999999999999.
[I 2024-07-08 13:48:42,197] Trial 1 finished with value: 0.8699999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 291, 'max_depth': 19, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9099999999999999.
[I 2024-07-08 13:48:42,447] Trial 2 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 225, 'max_depth': 14, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9099999999999999.
[I 2024-07-08 13:48:42,746] Trial 3 finished with value: 0.9099999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 274, 'max_depth': 28, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0

Number of finished trials:  100
Best trial:
  Value:  0.9099999999999999
  Params: 
    classifier: Random Forest
    n_estimators: 286
    max_depth: 17
    min_samples_split: 2
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.91, Improvement: 0.91


Processing files:  13%|█▎        | 36/270 [06:12<47:30, 12.18s/it]

[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/LOF/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/LOF/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/Z-Score/onehot/application_train.csv
Using target column: TARGET


Processing files:  14%|█▍        | 39/270 [06:12<25:46,  6.70s/it][I 2024-07-08 13:49:02,313] A new study created in memory with name: no-name-e2199fe4-2dca-4727-afed-ce9e29f368da
The least populated class in y has only 4 members, which is less than n_splits=5.


Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/MAD/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/MAD/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/MAD/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/DBSCAN/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:49:02,533] Trial 0 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Random Forest', 'n_estimators': 204, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:49:02,549] Trial 1 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 438.476794479858, 'solver': 'liblinear'}. Best is trial 1 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:49:02,787] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 202, 'max_depth': 27, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Logistic Regression
    C: 438.476794479858
    solver: liblinear
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/DBSCAN/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/DBSCAN/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/DBSCAN/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:49:08,080] Trial 1 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 297, 'max_depth': 19, 'min_samples_split': 6, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:49:08,219] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 116, 'max_depth': 30, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:49:08,458] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 198, 'max_depth': 17, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is l

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Logistic Regression
    C: 20.12130486853442
    solver: liblinear
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/DBSCAN/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/DBSCAN/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/DBSCAN/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:49:13,241] Trial 3 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 153, 'max_depth': 25, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 3 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:49:13,369] Trial 4 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Random Forest', 'n_estimators': 104, 'max_depth': 27, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:49:13,384] Trial 5 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 41.21007011797094, 'solver': 'lbfgs'}. Best is trial 3 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13

Number of finished trials:  100
Best trial:
  Value:  0.7999999999999999
  Params: 
    classifier: Random Forest
    n_estimators: 153
    max_depth: 25
    min_samples_split: 6
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667


Processing files:  17%|█▋        | 45/270 [06:43<23:47,  6.34s/it][I 2024-07-08 13:49:33,274] A new study created in memory with name: no-name-417181db-f256-466c-a7be-4acc306ae611


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/DBSCAN/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/knn/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manu

[I 2024-07-08 13:49:33,483] Trial 0 finished with value: 0.952627450980392 and parameters: {'classifier': 'Random Forest', 'n_estimators': 183, 'max_depth': 19, 'min_samples_split': 8, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.952627450980392.
[I 2024-07-08 13:49:33,498] Trial 1 finished with value: 0.952627450980392 and parameters: {'classifier': 'Logistic Regression', 'C': 776.4134052528361, 'solver': 'liblinear'}. Best is trial 0 with value: 0.952627450980392.
[I 2024-07-08 13:49:33,686] Trial 2 finished with value: 0.952627450980392 and parameters: {'classifier': 'Random Forest', 'n_estimators': 148, 'max_depth': 21, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.952627450980392.
[I 2024-07-08 13:49:33,752] Trial 3 finished with value: 0.944627450980392 and parameters: {'classifier': 'Logistic Regression', 'C': 921.4677372206743, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.952627450980392.
[I 2024-07-08 13:49:34,101] Trial 4 fini

Number of finished trials:  100
Best trial:
  Value:  0.9605490196078431
  Params: 
    classifier: Random Forest
    n_estimators: 194
    max_depth: 14
    min_samples_split: 4
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9841897233201581, Improvement: 0.9841897233201581


Processing files:  18%|█▊        | 49/270 [07:07<22:48,  6.19s/it][I 2024-07-08 13:49:57,139] A new study created in memory with name: no-name-1b680804-e426-4e12-a4e3-9bc242d254ca
[I 2024-07-08 13:49:57,195] Trial 0 finished with value: 0.944627450980392 and parameters: {'classifier': 'Logistic Regression', 'C': 818.0414422690401, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.944627450980392.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/Isolation Forest/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:49:57,252] Trial 1 finished with value: 0.944627450980392 and parameters: {'classifier': 'Logistic Regression', 'C': 37.890045109038105, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.944627450980392.
[I 2024-07-08 13:49:57,268] Trial 2 finished with value: 0.952627450980392 and parameters: {'classifier': 'Logistic Regression', 'C': 751.3847272634623, 'solver': 'liblinear'}. Best is trial 2 with value: 0.952627450980392.
[I 2024-07-08 13:49:57,457] Trial 3 finished with value: 0.9565490196078432 and parameters: {'classifier': 'Random Forest', 'n_estimators': 158, 'max_depth': 26, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.9565490196078432.
[I 2024-07-08 13:49:57,726] Trial 4 finished with value: 0.9566274509803921 and parameters: {'classifier': 'Random Forest', 'n_estimators': 234, 'max_depth': 15, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 4 with value: 0.9566274509803921.
[I 2024-07-08 13:49:57,924] Trial 5

Number of finished trials:  100
Best trial:
  Value:  0.9605490196078431
  Params: 
    classifier: Random Forest
    n_estimators: 194
    max_depth: 27
    min_samples_split: 4
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9802371541501976, Improvement: 0.9802371541501976


Processing files:  19%|█▊        | 50/270 [07:30<31:00,  8.46s/it][I 2024-07-08 13:50:20,890] A new study created in memory with name: no-name-13294656-c759-43f3-b71d-02463e879931


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/Isolation Forest/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/Isolation Forest/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/knn/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:50:21,069] Trial 0 finished with value: 0.944627450980392 and parameters: {'classifier': 'Random Forest', 'n_estimators': 136, 'max_depth': 19, 'min_samples_split': 5, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.944627450980392.
[I 2024-07-08 13:50:21,230] Trial 1 finished with value: 0.952627450980392 and parameters: {'classifier': 'Random Forest', 'n_estimators': 110, 'max_depth': 28, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.952627450980392.
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as s

Number of finished trials:  100
Best trial:
  Value:  0.9565490196078432
  Params: 
    classifier: Logistic Regression
    C: 557.9331682209156
    solver: liblinear
Iteration 1, Accuracy: 0.9604743083003953, Improvement: 0.9604743083003953
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/knn/Isolation Forest/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/knn/Isolation Forest/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:50:26,922] Trial 0 finished with value: 0.9105050505050505 and parameters: {'classifier': 'Random Forest', 'n_estimators': 295, 'max_depth': 13, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9105050505050505.
[I 2024-07-08 13:50:27,202] Trial 1 finished with value: 0.9238383838383838 and parameters: {'classifier': 'Random Forest', 'n_estimators': 256, 'max_depth': 15, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.9238383838383838.
[I 2024-07-08 13:50:27,227] Trial 2 finished with value: 0.915050505050505 and parameters: {'classifier': 'Logistic Regression', 'C': 760.0602144801826, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9238383838383838.
[I 2024-07-08 13:50:27,396] Trial 3 finished with value: 0.9238383838383838 and parameters: {'classifier': 'Random Forest', 'n_estimators': 138, 'max_depth': 18, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.9238383838383838.
[

Number of finished trials:  100
Best trial:
  Value:  0.9238383838383838
  Params: 
    classifier: Random Forest
    n_estimators: 256
    max_depth: 15
    min_samples_split: 4
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.9282511210762332, Improvement: 0.9282511210762332


Processing files:  19%|█▉        | 52/270 [07:57<37:52, 10.42s/it][I 2024-07-08 13:50:47,777] A new study created in memory with name: no-name-51a63185-27db-4937-9728-da22225377d6


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/LOF/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/LOF/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:50:48,098] Trial 0 finished with value: 0.9238383838383838 and parameters: {'classifier': 'Random Forest', 'n_estimators': 278, 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9238383838383838.
[I 2024-07-08 13:50:48,367] Trial 1 finished with value: 0.9238383838383838 and parameters: {'classifier': 'Random Forest', 'n_estimators': 240, 'max_depth': 12, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9238383838383838.
[I 2024-07-08 13:50:48,383] Trial 2 finished with value: 0.915050505050505 and parameters: {'classifier': 'Logistic Regression', 'C': 963.0268465114627, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9238383838383838.
[I 2024-07-08 13:50:48,409] Trial 3 finished with value: 0.9194949494949494 and parameters: {'classifier': 'Logistic Regression', 'C': 293.71348323123783, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9238383838383838.
[I 2024-07-08 13:50:48,566] Trial 4

Number of finished trials:  100
Best trial:
  Value:  0.9238383838383838
  Params: 
    classifier: Random Forest
    n_estimators: 278
    max_depth: 10
    min_samples_split: 10
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9237668161434978, Improvement: 0.9237668161434978


Processing files:  20%|█▉        | 53/270 [08:22<48:17, 13.35s/it][I 2024-07-08 13:51:12,410] A new study created in memory with name: no-name-a59e6fac-d74b-4f89-a744-1bd146e2735a
[I 2024-07-08 13:51:12,436] Trial 0 finished with value: 0.9193939393939393 and parameters: {'classifier': 'Logistic Regression', 'C': 897.0634536986647, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9193939393939393.
[I 2024-07-08 13:51:12,462] Trial 1 finished with value: 0.9193939393939393 and parameters: {'classifier': 'Logistic Regression', 'C': 413.48827845293755, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9193939393939393.
[I 2024-07-08 13:51:12,478] Trial 2 finished with value: 0.9193939393939393 and parameters: {'classifier': 'Logistic Regression', 'C': 203.4841832670238, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9193939393939393.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/LOF/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/LOF/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:51:12,494] Trial 3 finished with value: 0.9193939393939393 and parameters: {'classifier': 'Logistic Regression', 'C': 756.7461053481726, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9193939393939393.
[I 2024-07-08 13:51:12,509] Trial 4 finished with value: 0.9193939393939393 and parameters: {'classifier': 'Logistic Regression', 'C': 910.8409624180541, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9193939393939393.
[I 2024-07-08 13:51:12,524] Trial 5 finished with value: 0.9193939393939393 and parameters: {'classifier': 'Logistic Regression', 'C': 582.9786413453537, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9193939393939393.
[I 2024-07-08 13:51:12,794] Trial 6 finished with value: 0.914949494949495 and parameters: {'classifier': 'Random Forest', 'n_estimators': 241, 'max_depth': 22, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9193939393939393.
[I 2024-07-08 13:51:13,083] Trial 7 finished with value: 0.8878787878

Number of finished trials:  100
Best trial:
  Value:  0.9282828282828282
  Params: 
    classifier: Random Forest
    n_estimators: 158
    max_depth: 30
    min_samples_split: 10
    min_samples_leaf: 3
Iteration 1, Accuracy: 0.9237668161434978, Improvement: 0.9237668161434978


Processing files:  21%|██        | 57/270 [08:41<26:21,  7.42s/it]

[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/LOF/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/LOF/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: 

[I 2024-07-08 13:51:31,479] A new study created in memory with name: no-name-6c41b6fc-af7a-4211-9b17-b9d960141115
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:51:31,638] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 148, 'max_depth': 12, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7333333333333332.


Processing file: /content/exports/manual_check_patch/testing_data/median/MAD/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/MAD/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/DBSCAN/ordinal/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:51:31,969] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 296, 'max_depth': 18, 'min_samples_split': 10, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:51:32,219] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 219, 'max_depth': 17, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:51:32,246] Trial 3 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 909.5423203063417, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7333333333333332.
The least popul

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 159
    max_depth: 18
    min_samples_split: 5
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667


Processing files:  23%|██▎       | 61/270 [08:59<21:04,  6.05s/it][I 2024-07-08 13:51:49,790] A new study created in memory with name: no-name-77a1e14f-063d-4304-bea6-9a265640b063
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:51:49,806] Trial 0 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 138.86278928730746, 'solver': 'liblinear'}. Best is trial 0 with value: 0.6666666666666666.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/DBSCAN/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/DBSCAN/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:51:50,075] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 251, 'max_depth': 14, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:51:50,091] Trial 2 finished with value: 0.5999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 291.3843725405936, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:51:50,116] Trial 3 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 894.3520262700202, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 me

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 120
    max_depth: 13
    min_samples_split: 6
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667


Processing files:  23%|██▎       | 62/270 [09:20<28:25,  8.20s/it][I 2024-07-08 13:52:10,416] A new study created in memory with name: no-name-f2db3e09-9ece-4cfc-83e5-90c135279122


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/DBSCAN/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/DBSCAN/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:52:10,585] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 147, 'max_depth': 27, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:52:10,600] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 789.5399507547118, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:52:10,760] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 118, 'max_depth': 24, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333332.
The least po

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 187
    max_depth: 16
    min_samples_split: 4
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333


Processing files:  23%|██▎       | 63/270 [09:40<35:16, 10.23s/it][I 2024-07-08 13:52:30,301] A new study created in memory with name: no-name-4876a82a-0342-434d-bda1-efeacdd1ae71
[I 2024-07-08 13:52:30,358] Trial 0 finished with value: 0.9523232323232322 and parameters: {'classifier': 'Logistic Regression', 'C': 102.24321156555573, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9523232323232322.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/DBSCAN/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/median/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /conte

[I 2024-07-08 13:52:30,688] Trial 1 finished with value: 0.955959595959596 and parameters: {'classifier': 'Random Forest', 'n_estimators': 291, 'max_depth': 16, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.955959595959596.
[I 2024-07-08 13:52:30,703] Trial 2 finished with value: 0.955959595959596 and parameters: {'classifier': 'Logistic Regression', 'C': 315.8699516341847, 'solver': 'liblinear'}. Best is trial 1 with value: 0.955959595959596.
[I 2024-07-08 13:52:31,013] Trial 3 finished with value: 0.955959595959596 and parameters: {'classifier': 'Random Forest', 'n_estimators': 269, 'max_depth': 19, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.955959595959596.
[I 2024-07-08 13:52:31,029] Trial 4 finished with value: 0.955959595959596 and parameters: {'classifier': 'Logistic Regression', 'C': 291.6542797912254, 'solver': 'liblinear'}. Best is trial 1 with value: 0.955959595959596.
[I 2024-07-08 13:52:31,247] Trial 5 fini

Number of finished trials:  100
Best trial:
  Value:  0.955959595959596
  Params: 
    classifier: Random Forest
    n_estimators: 291
    max_depth: 16
    min_samples_split: 6
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9558823529411765, Improvement: 0.9558823529411765


Processing files:  25%|██▍       | 67/270 [09:57<24:39,  7.29s/it][I 2024-07-08 13:52:47,613] A new study created in memory with name: no-name-994a0665-ee4d-49b7-8775-b29f7edd87b1
[I 2024-07-08 13:52:47,628] Trial 0 finished with value: 0.955959595959596 and parameters: {'classifier': 'Logistic Regression', 'C': 719.135281200081, 'solver': 'liblinear'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:52:47,685] Trial 1 finished with value: 0.9523232323232322 and parameters: {'classifier': 'Logistic Regression', 'C': 514.0976147252701, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.955959595959596.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/Isolation Forest/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:52:47,741] Trial 2 finished with value: 0.9523232323232322 and parameters: {'classifier': 'Logistic Regression', 'C': 222.5434216338269, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:52:47,909] Trial 3 finished with value: 0.955959595959596 and parameters: {'classifier': 'Random Forest', 'n_estimators': 122, 'max_depth': 15, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:52:48,219] Trial 4 finished with value: 0.9522558922558921 and parameters: {'classifier': 'Random Forest', 'n_estimators': 243, 'max_depth': 15, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:52:48,244] Trial 5 finished with value: 0.9523232323232322 and parameters: {'classifier': 'Logistic Regression', 'C': 468.2957244725501, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:52:48,270] Trial 6 finis

Number of finished trials:  100
Best trial:
  Value:  0.955959595959596
  Params: 
    classifier: Logistic Regression
    C: 719.135281200081
    solver: liblinear
Iteration 1, Accuracy: 0.9558823529411765, Improvement: 0.9558823529411765
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/Isolation Forest/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/Isolation Forest/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/median/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:53:02,115] Trial 1 finished with value: 0.955959595959596 and parameters: {'classifier': 'Random Forest', 'n_estimators': 148, 'max_depth': 25, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:53:02,131] Trial 2 finished with value: 0.9522558922558921 and parameters: {'classifier': 'Logistic Regression', 'C': 577.4070613257563, 'solver': 'liblinear'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:53:02,147] Trial 3 finished with value: 0.9522558922558921 and parameters: {'classifier': 'Logistic Regression', 'C': 402.0055073183871, 'solver': 'liblinear'}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:53:02,426] Trial 4 finished with value: 0.955959595959596 and parameters: {'classifier': 'Random Forest', 'n_estimators': 239, 'max_depth': 16, 'min_samples_split': 3, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.955959595959596.
[I 2024-07-08 13:53:02,442] Trial 5 fi

Number of finished trials:  100
Best trial:
  Value:  0.955959595959596
  Params: 
    classifier: Logistic Regression
    C: 0.5167753470688179
    solver: newton-cg
Iteration 1, Accuracy: 0.9558823529411765, Improvement: 0.9558823529411765
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/median/Isolation Forest/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/median/Isolation Forest/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:53:19,937] Trial 0 finished with value: 0.927741935483871 and parameters: {'classifier': 'Random Forest', 'n_estimators': 152, 'max_depth': 15, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.927741935483871.
[I 2024-07-08 13:53:19,953] Trial 1 finished with value: 0.9146236559139785 and parameters: {'classifier': 'Logistic Regression', 'C': 441.9567181845209, 'solver': 'liblinear'}. Best is trial 0 with value: 0.927741935483871.
[I 2024-07-08 13:53:19,979] Trial 2 finished with value: 0.9210752688172044 and parameters: {'classifier': 'Logistic Regression', 'C': 80.98842210515437, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.927741935483871.
[I 2024-07-08 13:53:20,188] Trial 3 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Random Forest', 'n_estimators': 179, 'max_depth': 18, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.927741935483871.
[I 2024-07-08 13:53:20,386] Trial 4 fini

Number of finished trials:  100
Best trial:
  Value:  0.927741935483871
  Params: 
    classifier: Random Forest
    n_estimators: 152
    max_depth: 15
    min_samples_split: 4
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9407894736842105, Improvement: 0.9407894736842105


Processing files:  26%|██▌       | 70/270 [10:49<39:46, 11.93s/it][I 2024-07-08 13:53:39,160] A new study created in memory with name: no-name-7f63a278-e81c-4c97-a799-d502b3c5aad0
[I 2024-07-08 13:53:39,227] Trial 0 finished with value: 0.9210752688172044 and parameters: {'classifier': 'Logistic Regression', 'C': 168.8764182654239, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9210752688172044.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/LOF/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/iterative/LOF/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:53:39,313] Trial 1 finished with value: 0.9210752688172044 and parameters: {'classifier': 'Logistic Regression', 'C': 329.65238278941854, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9210752688172044.
[I 2024-07-08 13:53:39,339] Trial 2 finished with value: 0.9210752688172044 and parameters: {'classifier': 'Logistic Regression', 'C': 148.1930519667025, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9210752688172044.
[I 2024-07-08 13:53:39,476] Trial 3 finished with value: 0.927741935483871 and parameters: {'classifier': 'Random Forest', 'n_estimators': 105, 'max_depth': 17, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.927741935483871.
[I 2024-07-08 13:53:39,644] Trial 4 finished with value: 0.927741935483871 and parameters: {'classifier': 'Random Forest', 'n_estimators': 129, 'max_depth': 21, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.927741935483871.
[I 2024-07-08 13:53:39,720] Trial 5 fin

Number of finished trials:  100
Best trial:
  Value:  0.927741935483871
  Params: 
    classifier: Random Forest
    n_estimators: 105
    max_depth: 17
    min_samples_split: 5
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9276315789473685, Improvement: 0.9276315789473685
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/LOF/label
[test9999] target_column=TARGET


Processing files:  26%|██▋       | 71/270 [11:10<46:12, 13.93s/it][I 2024-07-08 13:54:00,020] A new study created in memory with name: no-name-e03f0cd8-0de7-4468-9b4b-194aafb01abc


Model saved at /content/exports/modelization/testing_data/testing_data/iterative/LOF/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:54:00,270] Trial 0 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Random Forest', 'n_estimators': 232, 'max_depth': 21, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 13:54:00,549] Trial 1 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Random Forest', 'n_estimators': 243, 'max_depth': 29, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 13:54:00,564] Trial 2 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Logistic Regression', 'C': 631.7241041002302, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 13:54:00,579] Trial 3 finished with value: 0.9212903225806451 and parameters: {'classifier': 'Logistic Regression', 'C': 343.415820174482, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9212903225806451.
[I 2024-07-08 13:54:00,828] Trial 4 f

Number of finished trials:  100
Best trial:
  Value:  0.9212903225806451
  Params: 
    classifier: Random Forest
    n_estimators: 232
    max_depth: 21
    min_samples_split: 2
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9210526315789473, Improvement: 0.9210526315789473
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/LOF/onehot
[test9999] target_column=TARGET


Processing files:  28%|██▊       | 75/270 [11:28<23:48,  7.33s/it][I 2024-07-08 13:54:18,085] A new study created in memory with name: no-name-ccafac97-8f88-4ca0-ad79-486c52408287


Model saved at /content/exports/modelization/testing_data/testing_data/iterative/LOF/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Z-Score/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/MAD/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one

The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:18,103] Trial 0 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 68.55982673718887, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:18,281] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 160, 'max_depth': 22, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:18,298] Trial 2 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 209.61562583712413, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 m

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 100
    max_depth: 27
    min_samples_split: 3
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/DBSCAN/ordinal
[test9999] target_column=TARGET


Processing files:  29%|██▉       | 79/270 [11:46<19:12,  6.03s/it][I 2024-07-08 13:54:36,841] A new study created in memory with name: no-name-95fc1b4f-8220-46d0-bb6e-913457b4f51e
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:36,857] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 433.6667753848224, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:36,882] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 700.164775134815, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7333333333333332.


Model saved at /content/exports/modelization/testing_data/testing_data/iterative/DBSCAN/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:37,042] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 145, 'max_depth': 25, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:37,068] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 683.6078021214036, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:37,247] Trial 4 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 162, 'max_depth': 17, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7333333333333332.
The least p

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 203
    max_depth: 17
    min_samples_split: 6
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333


Processing files:  30%|██▉       | 80/270 [12:09<26:46,  8.45s/it][I 2024-07-08 13:54:59,059] A new study created in memory with name: no-name-fa30ef8f-c7c9-4778-a59b-d8b640f90fe2
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:59,086] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 919.819025782918, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.7333333333333332.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/DBSCAN/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/iterative/DBSCAN/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:59,385] Trial 1 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 265, 'max_depth': 16, 'min_samples_split': 5, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:59,401] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 188.69785274393834, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:54:59,559] Trial 3 finished with value: 0.8 and parameters: {'classifier': 'Random Forest', 'n_estimators': 135, 'max_depth': 20, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.8.
The least populated class in y has only 4

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 168
    max_depth: 13
    min_samples_split: 3
    min_samples_leaf: 1
Iteration 1, Accuracy: 1.0, Improvement: 1.0


Processing files:  30%|███       | 81/270 [12:29<33:11, 10.53s/it][I 2024-07-08 13:55:19,384] A new study created in memory with name: no-name-da1cccf0-4b0f-4fba-932b-b364918dc204
[I 2024-07-08 13:55:19,410] Trial 0 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 794.4831710942606, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:19,426] Trial 1 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 215.64794153896864, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9578947368421054.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/DBSCAN/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/iterative/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/iterative/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processi

[I 2024-07-08 13:55:19,644] Trial 2 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 174, 'max_depth': 13, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:19,669] Trial 3 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 535.9429853599927, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:19,918] Trial 4 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 214, 'max_depth': 18, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:19,944] Trial 5 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 265.6064630257818, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:19,960] Trial 6

Number of finished trials:  100
Best trial:
  Value:  0.9578947368421054
  Params: 
    classifier: Logistic Regression
    C: 794.4831710942606
    solver: newton-cg
Iteration 1, Accuracy: 0.9578947368421052, Improvement: 0.9578947368421052
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/Isolation Forest/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/iterative/Isolation Forest/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:55:32,400] Trial 1 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 265, 'max_depth': 14, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:32,639] Trial 2 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 190, 'max_depth': 28, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:32,664] Trial 3 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 595.4765488903157, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:32,679] Trial 4 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Logistic Regression', 'C': 375.7284676328175, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:32,999] Tri

Number of finished trials:  100
Best trial:
  Value:  0.9578947368421054
  Params: 
    classifier: Logistic Regression
    C: 281.9331164863273
    solver: liblinear
Iteration 1, Accuracy: 0.9578947368421052, Improvement: 0.9578947368421052
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/Isolation Forest/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/iterative/Isolation Forest/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/iterative/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:55:46,716] Trial 0 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 229, 'max_depth': 29, 'min_samples_split': 6, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:46,731] Trial 1 finished with value: 0.9508771929824562 and parameters: {'classifier': 'Logistic Regression', 'C': 451.42823655258945, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:46,746] Trial 2 finished with value: 0.9508771929824562 and parameters: {'classifier': 'Logistic Regression', 'C': 650.0529404109286, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:47,016] Trial 3 finished with value: 0.9578947368421054 and parameters: {'classifier': 'Random Forest', 'n_estimators': 224, 'max_depth': 13, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9578947368421054.
[I 2024-07-08 13:55:47,295] Tr

Number of finished trials:  100
Best trial:
  Value:  0.9578947368421054
  Params: 
    classifier: Random Forest
    n_estimators: 229
    max_depth: 29
    min_samples_split: 6
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9578947368421052, Improvement: 0.9578947368421052


Processing files:  32%|███▏      | 87/270 [13:20<33:18, 10.92s/it][I 2024-07-08 13:56:10,860] A new study created in memory with name: no-name-09fec949-605d-4a79-98e0-99678abe0591
[I 2024-07-08 13:56:10,896] Trial 0 finished with value: 0.9242524916943522 and parameters: {'classifier': 'Logistic Regression', 'C': 346.88945689544175, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9242524916943522.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/iterative/Isolation Forest/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/iterative/Isolation Forest/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:56:11,064] Trial 1 finished with value: 0.9053156146179402 and parameters: {'classifier': 'Random Forest', 'n_estimators': 134, 'max_depth': 27, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 13:56:11,232] Trial 2 finished with value: 0.909966777408638 and parameters: {'classifier': 'Random Forest', 'n_estimators': 133, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 13:56:11,481] Trial 3 finished with value: 0.9100775193798449 and parameters: {'classifier': 'Random Forest', 'n_estimators': 210, 'max_depth': 28, 'min_samples_split': 9, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 13:56:11,517] Trial 4 finished with value: 0.9242524916943522 and parameters: {'classifier': 'Logistic Regression', 'C': 516.706724914949, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9242524916943522.
[I

Number of finished trials:  100
Best trial:
  Value:  0.9242524916943522
  Params: 
    classifier: Logistic Regression
    C: 346.88945689544175
    solver: newton-cg
Iteration 1, Accuracy: 0.9289099526066351, Improvement: 0.9289099526066351
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/LOF/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/LOF/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:56:17,768] Trial 1 finished with value: 0.9005537098560354 and parameters: {'classifier': 'Random Forest', 'n_estimators': 222, 'max_depth': 27, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 13:56:17,987] Trial 2 finished with value: 0.9005537098560354 and parameters: {'classifier': 'Random Forest', 'n_estimators': 155, 'max_depth': 26, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 13:56:18,023] Trial 3 finished with value: 0.9194905869324475 and parameters: {'classifier': 'Logistic Regression', 'C': 587.7773639088649, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 13:56:18,059] Trial 4 finished with value: 0.9147286821705427 and parameters: {'classifier': 'Logistic Regression', 'C': 836.3892308179993, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9242524916943522.
[I 2024-07-08 13:56:18,238] Tria

Number of finished trials:  100
Best trial:
  Value:  0.9242524916943522
  Params: 
    classifier: Logistic Regression
    C: 346.67978265565625
    solver: lbfgs
Iteration 1, Accuracy: 0.9289099526066351, Improvement: 0.9289099526066351
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/LOF/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/LOF/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:56:23,626] Trial 0 finished with value: 0.9147286821705427 and parameters: {'classifier': 'Random Forest', 'n_estimators': 185, 'max_depth': 24, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9147286821705427.
[I 2024-07-08 13:56:23,815] Trial 1 finished with value: 0.9149501661129568 and parameters: {'classifier': 'Random Forest', 'n_estimators': 163, 'max_depth': 21, 'min_samples_split': 3, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.9149501661129568.
[I 2024-07-08 13:56:23,831] Trial 2 finished with value: 0.9243632336655592 and parameters: {'classifier': 'Logistic Regression', 'C': 604.5959815948116, 'solver': 'liblinear'}. Best is trial 2 with value: 0.9243632336655592.
[I 2024-07-08 13:56:23,959] Trial 3 finished with value: 0.9149501661129568 and parameters: {'classifier': 'Random Forest', 'n_estimators': 113, 'max_depth': 28, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.9243632336655592.


Number of finished trials:  100
Best trial:
  Value:  0.9243632336655592
  Params: 
    classifier: Logistic Regression
    C: 604.5959815948116
    solver: liblinear
Iteration 1, Accuracy: 0.9289099526066351, Improvement: 0.9289099526066351
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/LOF/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/LOF/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains o

[I 2024-07-08 13:56:29,657] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 262, 'max_depth': 26, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:56:29,673] Trial 1 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 560.9798543603526, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:56:29,882] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 165, 'max_depth': 15, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07

Number of finished trials:  100
Best trial:
  Value:  0.7999999999999999
  Params: 
    classifier: Logistic Regression
    C: 560.9798543603526
    solver: liblinear
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/DBSCAN/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/DBSCAN/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/DBSCAN/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:56:35,183] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 201, 'max_depth': 11, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:56:35,210] Trial 3 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Logistic Regression', 'C': 787.7556586531504, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:56:35,419] Trial 4 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 182, 'max_depth': 17, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7999999999999999.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13

Number of finished trials:  100
Best trial:
  Value:  0.7999999999999999
  Params: 
    classifier: Logistic Regression
    C: 244.41918575197948
    solver: liblinear
Iteration 1, Accuracy: 0.8, Improvement: 0.8
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/DBSCAN/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/DBSCAN/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/DBSCAN/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:56:40,887] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 251, 'max_depth': 11, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:56:40,912] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 473.67312790916515, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:56:41,102] Trial 4 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 171, 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 

Number of finished trials:  100
Best trial:
  Value:  0.7999999999999999
  Params: 
    classifier: Logistic Regression
    C: 14.437048829961043
    solver: liblinear
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/DBSCAN/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/DBSCAN/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains on

[I 2024-07-08 13:56:47,896] Trial 0 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 127, 'max_depth': 14, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:56:48,055] Trial 1 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 123, 'max_depth': 26, 'min_samples_split': 5, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:56:48,082] Trial 2 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Logistic Regression', 'C': 386.83841676591607, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:56:48,271] Trial 3 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 137, 'max_depth': 29, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9534415584415583.
[I 

Number of finished trials:  100
Best trial:
  Value:  0.9534415584415583
  Params: 
    classifier: Random Forest
    n_estimators: 127
    max_depth: 14
    min_samples_split: 9
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.953405017921147, Improvement: 0.953405017921147


Processing files:  38%|███▊      | 103/270 [14:11<10:43,  3.85s/it][I 2024-07-08 13:57:01,771] A new study created in memory with name: no-name-f467b55e-516f-4067-a6a9-9a02811fb3e3
[I 2024-07-08 13:57:01,787] Trial 0 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Logistic Regression', 'C': 234.78736294997768, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9534415584415583.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/Isolation Forest/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:57:02,055] Trial 1 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 218, 'max_depth': 29, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:57:02,071] Trial 2 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Logistic Regression', 'C': 308.3664856063218, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:57:02,086] Trial 3 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Logistic Regression', 'C': 140.15011390649525, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:57:02,102] Trial 4 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Logistic Regression', 'C': 337.6729612939674, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:57:02,127] Trial 5 finished with value: 0.95344155

Number of finished trials:  100
Best trial:
  Value:  0.9534415584415583
  Params: 
    classifier: Logistic Regression
    C: 234.78736294997768
    solver: lbfgs
Iteration 1, Accuracy: 0.953405017921147, Improvement: 0.953405017921147
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/Isolation Forest/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/Isolation Forest/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/most_frequent/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:57:12,650] Trial 0 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 190, 'max_depth': 11, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:57:12,665] Trial 1 finished with value: 0.9498051948051947 and parameters: {'classifier': 'Logistic Regression', 'C': 450.0356993708561, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:57:12,692] Trial 2 finished with value: 0.9498051948051947 and parameters: {'classifier': 'Logistic Regression', 'C': 889.3620962510493, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:57:12,970] Trial 3 finished with value: 0.9534415584415583 and parameters: {'classifier': 'Random Forest', 'n_estimators': 251, 'max_depth': 14, 'min_samples_split': 5, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9534415584415583.
[I 2024-07-08 13:57:12,996] Trial 4 fini

Number of finished trials:  100
Best trial:
  Value:  0.9534415584415583
  Params: 
    classifier: Random Forest
    n_estimators: 190
    max_depth: 11
    min_samples_split: 4
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.953405017921147, Improvement: 0.953405017921147


Processing files:  39%|███▉      | 105/270 [14:46<22:38,  8.24s/it][I 2024-07-08 13:57:36,560] A new study created in memory with name: no-name-2d3594ab-e482-49f4-9feb-ef324582cc2a
[I 2024-07-08 13:57:36,576] Trial 0 finished with value: 0.9333333333333333 and parameters: {'classifier': 'Logistic Regression', 'C': 669.2890548940526, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9333333333333333.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/most_frequent/Isolation Forest/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/most_frequent/Isolation Forest/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:57:36,805] Trial 1 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 188, 'max_depth': 22, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 13:57:37,024] Trial 2 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 196, 'max_depth': 30, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 13:57:37,041] Trial 3 finished with value: 0.9333333333333333 and parameters: {'classifier': 'Logistic Regression', 'C': 299.81660531330596, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 13:57:37,199] Trial 4 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 125, 'max_depth': 23, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.9416666666666667.
[I

Number of finished trials:  100
Best trial:
  Value:  0.9416666666666667
  Params: 
    classifier: Random Forest
    n_estimators: 188
    max_depth: 22
    min_samples_split: 4
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.975, Improvement: 0.975
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/LOF/ordinal
[test9999] target_column=TARGET


Processing files:  39%|███▉      | 106/270 [15:06<28:52, 10.57s/it][I 2024-07-08 13:57:56,456] A new study created in memory with name: no-name-79b58adc-5f9b-4c75-9984-7efe93f03ee1
[I 2024-07-08 13:57:56,473] Trial 0 finished with value: 0.9333333333333333 and parameters: {'classifier': 'Logistic Regression', 'C': 800.6197532459367, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9333333333333333.


Model saved at /content/exports/modelization/testing_data/testing_data/ffill/LOF/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:57:56,763] Trial 1 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 235, 'max_depth': 27, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 13:57:56,971] Trial 2 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 156, 'max_depth': 17, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 13:57:56,987] Trial 3 finished with value: 0.9333333333333333 and parameters: {'classifier': 'Logistic Regression', 'C': 248.1086839909405, 'solver': 'liblinear'}. Best is trial 1 with value: 0.9416666666666667.
[I 2024-07-08 13:57:57,145] Trial 4 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 133, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9416666666666667.


Number of finished trials:  100
Best trial:
  Value:  0.9416666666666667
  Params: 
    classifier: Random Forest
    n_estimators: 235
    max_depth: 27
    min_samples_split: 5
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9416666666666667, Improvement: 0.9416666666666667


Processing files:  40%|███▉      | 107/270 [15:28<35:51, 13.20s/it][I 2024-07-08 13:58:18,888] A new study created in memory with name: no-name-811abe1a-023f-4310-afea-aab0fa4386fc
[I 2024-07-08 13:58:18,905] Trial 0 finished with value: 0.9333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 774.2889472729137, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9333333333333332.
[I 2024-07-08 13:58:18,931] Trial 1 finished with value: 0.9333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 703.0930894609545, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9333333333333332.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/LOF/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/ffill/LOF/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:58:19,089] Trial 2 finished with value: 0.9166666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 121, 'max_depth': 14, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9333333333333332.
[I 2024-07-08 13:58:19,399] Trial 3 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 298, 'max_depth': 11, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 3 with value: 0.9416666666666667.
[I 2024-07-08 13:58:19,699] Trial 4 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 245, 'max_depth': 25, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.9416666666666667.
[I 2024-07-08 13:58:19,867] Trial 5 finished with value: 0.9416666666666667 and parameters: {'classifier': 'Random Forest', 'n_estimators': 130, 'max_depth': 18, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 3 

Number of finished trials:  100
Best trial:
  Value:  0.95
  Params: 
    classifier: Logistic Regression
    C: 7.16951383392367
    solver: newton-cg
Iteration 1, Accuracy: 0.9666666666666667, Improvement: 0.9666666666666667
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/LOF/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/ffill/LOF/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing 

[I 2024-07-08 13:58:25,052] Trial 0 finished with value: 0.7333333333333333 and parameters: {'classifier': 'Random Forest', 'n_estimators': 300, 'max_depth': 11, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:58:25,272] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 187, 'max_depth': 23, 'min_samples_split': 10, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7333333333333333.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:58:25,288] Trial 2 finished with value: 0.9333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 688.8679898191872, 'solver': 'newton-cg'}. Best is trial 2 with value: 0.9333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-

Number of finished trials:  100
Best trial:
  Value:  0.9333333333333332
  Params: 
    classifier: Logistic Regression
    C: 688.8679898191872
    solver: newton-cg
Iteration 1, Accuracy: 1.0, Improvement: 1.0
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/DBSCAN/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/ffill/DBSCAN/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/DBSCAN/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:58:30,469] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 194, 'max_depth': 28, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:58:30,708] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 230, 'max_depth': 29, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:58:30,917] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 195, 'max_depth': 24, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is l

Number of finished trials:  100
Best trial:
  Value:  0.9333333333333332
  Params: 
    classifier: Logistic Regression
    C: 689.7989539591151
    solver: liblinear
Iteration 1, Accuracy: 1.0, Improvement: 1.0
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/DBSCAN/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/ffill/DBSCAN/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/DBSCAN/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:58:36,241] Trial 5 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 267, 'max_depth': 25, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:58:36,257] Trial 6 finished with value: 0.8666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 178.35178890501618, 'solver': 'liblinear'}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:58:36,547] Trial 7 finished with value: 0.7999999999999999 and parameters: {'classifier': 'Random Forest', 'n_estimators': 268, 'max_depth': 13, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.8666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Logistic Regression
    C: 510.5856955660494
    solver: lbfgs
Iteration 1, Accuracy: 1.0, Improvement: 1.0
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/DBSCAN/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/ffill/DBSCAN/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/ffill/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/export

[I 2024-07-08 13:58:41,951] Trial 2 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 157, 'max_depth': 19, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:58:42,251] Trial 3 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 250, 'max_depth': 17, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:58:42,622] Trial 4 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 284, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:58:42,943] Trial 5 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 240, 'max_depth': 27, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 

Number of finished trials:  100
Best trial:
  Value:  0.9595102040816326
  Params: 
    classifier: Logistic Regression
    C: 230.55773149920012
    solver: newton-cg
Iteration 1, Accuracy: 0.9635627530364372, Improvement: 0.9635627530364372
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/Isolation Forest/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/ffill/Isolation Forest/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:58:57,319] Trial 2 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Logistic Regression', 'C': 638.9152762037546, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:58:57,547] Trial 3 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 175, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:58:57,837] Trial 4 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 228, 'max_depth': 30, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:58:57,872] Trial 5 finished with value: 0.9554285714285715 and parameters: {'classifier': 'Logistic Regression', 'C': 451.6724924806405, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:58:57,888] Trial 6 

Number of finished trials:  100
Best trial:
  Value:  0.9595102040816326
  Params: 
    classifier: Logistic Regression
    C: 801.4474073265637
    solver: newton-cg
Iteration 1, Accuracy: 0.9635627530364372, Improvement: 0.9635627530364372
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/Isolation Forest/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/ffill/Isolation Forest/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/ffill/Isolation Forest/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:59:12,143] Trial 0 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 160, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:59:12,332] Trial 1 finished with value: 0.9595102040816326 and parameters: {'classifier': 'Random Forest', 'n_estimators': 170, 'max_depth': 22, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:59:12,357] Trial 2 finished with value: 0.9514285714285714 and parameters: {'classifier': 'Logistic Regression', 'C': 978.3131122143639, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:59:12,372] Trial 3 finished with value: 0.9514285714285714 and parameters: {'classifier': 'Logistic Regression', 'C': 508.33842194617307, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9595102040816326.
[I 2024-07-08 13:59:12,621] Tri

Number of finished trials:  100
Best trial:
  Value:  0.9595102040816326
  Params: 
    classifier: Random Forest
    n_estimators: 160
    max_depth: 10
    min_samples_split: 2
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.9595141700404858, Improvement: 0.9595141700404858


Processing files:  44%|████▍     | 120/270 [16:43<23:29,  9.40s/it][I 2024-07-08 13:59:33,814] A new study created in memory with name: no-name-ec1a2c15-3055-4f8c-bb35-bda3591ca3cc


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/ffill/Isolation Forest/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/ffill/Isolation Forest/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/LOF/ordinal/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:59:34,144] Trial 0 finished with value: 0.8954545454545455 and parameters: {'classifier': 'Random Forest', 'n_estimators': 264, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.8954545454545455.
[I 2024-07-08 13:59:34,170] Trial 1 finished with value: 0.9227272727272726 and parameters: {'classifier': 'Logistic Regression', 'C': 852.8960147137003, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9227272727272726.
[I 2024-07-08 13:59:34,399] Trial 2 finished with value: 0.9045454545454545 and parameters: {'classifier': 'Random Forest', 'n_estimators': 203, 'max_depth': 13, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.9227272727272726.
[I 2024-07-08 13:59:34,425] Trial 3 finished with value: 0.9227272727272726 and parameters: {'classifier': 'Logistic Regression', 'C': 681.4574856905866, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.9227272727272726.
[I 2024-07-08 13:59:34,593] Tria

Number of finished trials:  100
Best trial:
  Value:  0.9272727272727271
  Params: 
    classifier: Logistic Regression
    C: 979.2413342480891
    solver: newton-cg
Iteration 1, Accuracy: 0.9227272727272727, Improvement: 0.9227272727272727
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/constant_0/LOF/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/constant_0/LOF/ordinal/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/LOF/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:59:40,272] Trial 4 finished with value: 0.9 and parameters: {'classifier': 'Random Forest', 'n_estimators': 207, 'max_depth': 18, 'min_samples_split': 7, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9227272727272726.
[I 2024-07-08 13:59:40,298] Trial 5 finished with value: 0.9227272727272726 and parameters: {'classifier': 'Logistic Regression', 'C': 22.717828702229667, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9227272727272726.
[I 2024-07-08 13:59:40,323] Trial 6 finished with value: 0.9227272727272726 and parameters: {'classifier': 'Logistic Regression', 'C': 889.2071974076774, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9227272727272726.
[I 2024-07-08 13:59:40,348] Trial 7 finished with value: 0.9227272727272726 and parameters: {'classifier': 'Logistic Regression', 'C': 215.98355372347072, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9227272727272726.
[I 2024-07-08 13:59:40,547] Trial 8 finished with value: 0.9181818181818182 and p

Number of finished trials:  100
Best trial:
  Value:  0.9272727272727271
  Params: 
    classifier: Logistic Regression
    C: 986.4399010629197
    solver: newton-cg
Iteration 1, Accuracy: 0.9227272727272727, Improvement: 0.9227272727272727
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/constant_0/LOF/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/constant_0/LOF/label/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/LOF/onehot/application_train.csv
Using target column: TARGET


[I 2024-07-08 13:59:45,869] Trial 2 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Random Forest', 'n_estimators': 274, 'max_depth': 13, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:59:46,068] Trial 3 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Random Forest', 'n_estimators': 156, 'max_depth': 14, 'min_samples_split': 3, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:59:46,337] Trial 4 finished with value: 0.909090909090909 and parameters: {'classifier': 'Random Forest', 'n_estimators': 216, 'max_depth': 26, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.9181818181818182.
[I 2024-07-08 13:59:46,363] Trial 5 finished with value: 0.9181818181818182 and parameters: {'classifier': 'Logistic Regression', 'C': 243.4794537743991, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9181818181818182.
[

Number of finished trials:  100
Best trial:
  Value:  0.9181818181818182
  Params: 
    classifier: Logistic Regression
    C: 726.5682416711796
    solver: lbfgs
Iteration 1, Accuracy: 0.9363636363636364, Improvement: 0.9363636363636364
[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/constant_0/LOF/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/constant_0/LOF/onehot/TARGET/best_Logistic Regression_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Z-Score/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Z-Score/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in

[I 2024-07-08 13:59:57,786] Trial 0 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 181, 'max_depth': 14, 'min_samples_split': 8, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.6666666666666666.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:59:58,137] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 299, 'max_depth': 18, 'min_samples_split': 8, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13:59:58,163] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 691.5788299590207, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 13

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 233
    max_depth: 28
    min_samples_split: 4
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667


Processing files:  47%|████▋     | 127/270 [17:31<19:26,  8.16s/it][I 2024-07-08 14:00:21,227] A new study created in memory with name: no-name-41f24de1-4b6f-44a2-96c8-6f716ecfdf92
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 14:00:21,260] Trial 0 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 614.5860930870105, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.6666666666666666.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/constant_0/DBSCAN/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/constant_0/DBSCAN/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/DBSCAN/label/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 14:00:21,531] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 217, 'max_depth': 19, 'min_samples_split': 10, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 14:00:21,557] Trial 2 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Logistic Regression', 'C': 96.74328889221262, 'solver': 'newton-cg'}. Best is trial 1 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 14:00:21,836] Trial 3 finished with value: 0.6666666666666666 and parameters: {'classifier': 'Random Forest', 'n_estimators': 233, 'max_depth': 19, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.7333333333333332.
The least p

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 238
    max_depth: 17
    min_samples_split: 4
    min_samples_leaf: 2
Iteration 1, Accuracy: 0.8666666666666667, Improvement: 0.8666666666666667


Processing files:  47%|████▋     | 128/270 [17:57<28:45, 12.15s/it][I 2024-07-08 14:00:47,434] A new study created in memory with name: no-name-2a6a972e-8d68-42b9-b72e-e97908fb556a
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 14:00:47,450] Trial 0 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 696.3685672372397, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7333333333333332.


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/constant_0/DBSCAN/label
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/constant_0/DBSCAN/label/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/DBSCAN/onehot/application_train.csv
Using target column: TARGET


The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 14:00:47,669] Trial 1 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 185, 'max_depth': 14, 'min_samples_split': 8, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 14:00:48,020] Trial 2 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Random Forest', 'n_estimators': 292, 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.7333333333333332.
The least populated class in y has only 4 members, which is less than n_splits=5.
[I 2024-07-08 14:00:48,045] Trial 3 finished with value: 0.7333333333333332 and parameters: {'classifier': 'Logistic Regression', 'C': 134.80705557893683, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7333333333333332.
The least popul

Number of finished trials:  100
Best trial:
  Value:  0.8666666666666666
  Params: 
    classifier: Random Forest
    n_estimators: 185
    max_depth: 11
    min_samples_split: 5
    min_samples_leaf: 1
Iteration 1, Accuracy: 0.9333333333333333, Improvement: 0.9333333333333333


Processing files:  48%|████▊     | 129/270 [18:13<30:35, 13.02s/it][I 2024-07-08 14:01:03,249] A new study created in memory with name: no-name-a993a8a2-5faa-4d48-ada7-a2ee153d9305


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/constant_0/DBSCAN/onehot
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/constant_0/DBSCAN/onehot/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/IQR/ordinal/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/IQR/label/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/IQR/onehot/application_train.csv
Using target column: TARGET
Skipping optimization for TARGET as it contains only one class in the training data.
Pro

[I 2024-07-08 14:01:03,418] Trial 0 finished with value: 0.9568831168831169 and parameters: {'classifier': 'Random Forest', 'n_estimators': 149, 'max_depth': 29, 'min_samples_split': 8, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 14:01:03,434] Trial 1 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 588.3533718479761, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 14:01:03,572] Trial 2 finished with value: 0.9568831168831169 and parameters: {'classifier': 'Random Forest', 'n_estimators': 120, 'max_depth': 16, 'min_samples_split': 2, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 14:01:03,588] Trial 3 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 773.1846075162648, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 14:01:03,877] Tria

Number of finished trials:  100
Best trial:
  Value:  0.9568831168831169
  Params: 
    classifier: Random Forest
    n_estimators: 149
    max_depth: 29
    min_samples_split: 8
    min_samples_leaf: 4
Iteration 1, Accuracy: 0.9568345323741008, Improvement: 0.9568345323741008


Processing files:  49%|████▉     | 133/270 [18:34<19:29,  8.54s/it][I 2024-07-08 14:01:24,308] A new study created in memory with name: no-name-986ca064-464d-4c2a-ae90-467883f5d46e


[test9999] output_folder=/content/exports/modelization
[test9999] testing_sub_path_name=testing_data
[test9999] relative_path=testing_data/constant_0/Isolation Forest/ordinal
[test9999] target_column=TARGET
Model saved at /content/exports/modelization/testing_data/testing_data/constant_0/Isolation Forest/ordinal/TARGET/best_Random Forest_model.pkl
Processing file: /content/exports/manual_check_patch/testing_data/constant_0/Isolation Forest/label/application_train.csv
Using target column: TARGET


[I 2024-07-08 14:01:24,547] Trial 0 finished with value: 0.9568831168831169 and parameters: {'classifier': 'Random Forest', 'n_estimators': 208, 'max_depth': 13, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 14:01:24,573] Trial 1 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 451.51917287823056, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 14:01:24,600] Trial 2 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Logistic Regression', 'C': 305.8843054156911, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 14:01:24,929] Trial 3 finished with value: 0.9533116883116884 and parameters: {'classifier': 'Random Forest', 'n_estimators': 286, 'max_depth': 11, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9568831168831169.
[I 2024-07-08 14:01:24,945] Tr

In [17]:
test_1 = pd.read_csv("./exports/manual_check_patch/testing_data/bfill/DBSCAN/ordinal/application_train.csv")

In [18]:
test_1["TARGET"].head()

0    0
1    1
2    0
3    0
4    0
Name: TARGET, dtype: int64

# Évaluation des Modèles

After training the models, the evaluate_models function is used to evaluate the performance of each model. This function calculates several performance metrics and also computes the business score based on the given formula.

Business Score Calculation
The business score is calculated using the following formula:

Business Score
=
(
Benefit
×
PPV
)
−
(
Cost
×
False Positive Rate
)
Business Score=(Benefit×PPV)−(Cost×False Positive Rate)

Where:

Benefit: The benefit obtained from a correct prediction.
PPV (Positive Predictive Value): 
TP
TP
+
FP
TP+FP
TP
​
 , where TP is the number of true positives and FP is the number of false positives.
Cost: The cost associated with an incorrect prediction.
False Positive Rate: 
FP
Total Negatives
Total Negatives
FP
​


In [None]:
def evaluate_models(base_path, output_folder, target_columns, chunk_size=1000, benefit=1.0, cost=1.0):
    performance_metrics = []
    for root, dirs, files in os.walk(output_folder):
        for file in files:
            if file.endswith('.pkl') and any(col in root for col in target_columns):
                model_path = os.path.join(root, file)
                model_name = file.split('_')[1]
                target_column = [col for col in target_columns if col in root][0]

                print(f"Analyzing model: {model_name} for target: {target_column}")

                # Charger le modèle et les données de test
                best_pipeline = joblib.load(model_path)
                test_file_path = os.path.join(os.path.dirname(model_path).replace(output_folder, base_path), 'application_test.csv')
                test_data = pd.read_csv(test_file_path, chunksize=chunk_size)

                for chunk in test_data:
                    if target_column not in chunk.columns:
                        continue

                    X_test = chunk.drop(target_column, axis=1)
                    y_test = chunk[target_column]

                    # Prédictions
                    y_pred = best_pipeline.predict(X_test)
                    y_pred_proba = best_pipeline.predict_proba(X_test)[:, 1]

                    # Calcul des métriques de performance
                    accuracy = accuracy_score(y_test, y_pred)
                    precision = precision_score(y_test, y_pred)
                    recall = recall_score(y_test, y_pred)
                    f1 = f1_score(y_test, y_pred)
                    roc_auc = roc_auc_score(y_test, y_pred_proba)

                    # Matrice de confusion
                    cm = confusion_matrix(y_test, y_pred)
                    TP = cm[1, 1]
                    FP = cm[0, 1]
                    TN = cm[0, 0]
                    FN = cm[1, 0]

                    # Calcul du VPP et Taux de faux positifs
                    VPP = TP / (TP + FP) if (TP + FP) > 0 else 0
                    taux_fp = FP / (FP + TN) if (FP + TN) > 0 else 0

                    # Calcul du Score Métier
                    score_metier = (benefit * VPP) - (cost * taux_fp)

                    performance_metrics.append({
                        'model': model_name,
                        'target': target_column,
                        'accuracy': accuracy,
                        'precision': precision,
                        'recall': recall,
                        'f1_score': f1,
                        'roc_auc': roc_auc,
                        'score_metier': score_metier
                    })

                    print(f"Accuracy: {accuracy}")
                    print(f"Precision: {precision}")
                    print(f"Recall: {recall}")
                    print(f"F1 Score: {f1}")
                    print(f"ROC AUC: {roc_auc}")
                    print(f"Score Métier: {score_metier}")

                    # Visualisation des résultats
                    plot_roc_curve(y_test, y_pred_proba, model_name, root)
                    plot_confusion_matrix(y_test, y_pred, model_name, root)

    # Convertir les métriques de performance en DataFrame
    performance_df = pd.DataFrame(performance_metrics)
    performance_df.to_csv(os.path.join(output_folder, 'model_performance_metrics.csv'), index=False)

    return performance_df


In [None]:
evaluate_models(
    base_path=LOCAL_EXPORT_MODELIZATION_FOLDER_PATH, 
    output_folder=, 
    target_columns, 
    chunk_size=1000, 
    benefit=1.0, 
    cost=1.0)

# Sélection du Modèle Final et Hyperparameter Tuning

After evaluating the models, the best performing model can be selected and further tuned using hyperparameter optimization.

def select_and_tune_best_model(performance_df, base_path, output_folder, target_column):
    best_model_row = performance_df.loc[performance_df['roc_auc'].idxmax()]
    best_model_name = best_model_row['model']
    print(f"Best model: {best_model_name} with ROC AUC: {best_model_row['roc_auc']}")

    model_path = os.path.join(output_folder, best_model_name, target_column, f'best_{best_model_name}_model.pkl')
    best_pipeline = joblib.load(model_path)

    # Tuning des hyperparamètres du meilleur modèle avec Optuna
    X_train = pd.read_csv(os.path.join(base_path, 'application_train.csv')).drop(target_column, axis=1)
    y_train = pd.read_csv(os.path.join(base_path, 'application_train.csv'))[target_column]

    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)

    print('Number of finished trials: ', len(study.trials))
    print('Best trial:')
    trial = study.best_trial

    print('  Value: ', trial.value)
    print('  Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    best_classifier = models[best_model_name]['model']
    best_params = {k: v for k, v in trial.params.items() if k != 'classifier'}
    best_classifier.set_params(**best_params)

    best_pipeline = Pipeline(steps=[
        ('classifier', best_classifier)
    ])

    best_pipeline.fit(X_train, y_train)
    joblib.dump(best_pipeline, model_path)
    print(f"Model saved at {model_path}")

    return best_pipeline

# Interprétabilité et Explicabilité
Using SHAP to explain the predictions of the model.

In [None]:
def explain_model(best_pipeline, X_train, output_folder):
    explainer = shap.TreeExplainer(best_pipeline.named_steps['classifier'])
    shap_values = explainer.shap_values(X_train)

    shap.summary_plot(shap_values, X_train)
    plt.savefig(os.path.join(output_folder, 'shap_summary_plot.png'))
    plt.close()

    shap.initjs()
    shap.force_plot(explainer.expected_value, shap_values[0,:], X_train.iloc[0,:])
    plt.savefig(os.path.join(output_folder, 'shap_force_plot.png'))
    plt.close()

In [None]:
# Summary

In [None]:
def summarize_results(performance_df):
    best_model_row = performance_df.loc[performance_df['roc_auc'].idxmax()]
    best_model_name = best_model_row['model']
    summary = f"Best model: {best_model_name}\n"
    summary += f"Accuracy: {best_model_row['accuracy']}\n"
    summary += f"Precision: {best_model_row['precision']}\n"
    summary += f"Recall: {best_model_row['recall']}\n"
    summary += f"F1 Score: {best_model_row['f1_score']}\n"
    summary += f"ROC AUC: {best_model_row['roc_auc']}\n"
    summary += f"Business Score: {best_model_row['business_score']}\n"
    
    print(summary)

    with open(os.path.join(output_folder, 'model_summary.txt'), 'w') as f:
        f.write(summary)

    print("Summary saved.")