### Import libraries

In [45]:
import pandas as pd
import numpy as np 
import tqdm
import os 
import joblib

# Preprocessing
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder,RobustScaler


# Models
from sklearn.ensemble import (
    RandomForestClassifier,
    VotingClassifier
)
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import SVC
from xgboost import XGBClassifier

# Parameter tuning, splitting
from sklearn.model_selection import (
    GridSearchCV,
    RepeatedStratifiedKFold,
    cross_val_score,
    train_test_split,
)

# Metrics
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
)


### Read dataset

In [46]:
df = pd.read_csv("../data/dataset_for_training.csv", header=0)

### Get models

In [47]:
def get_classification_models():
    models = dict()
    models["KNN"] = KNeighborsClassifier()
    models["LOR"] = LogisticRegression()
    models["SVM"] = SVC(probability=True)
    models["RF"] = RandomForestClassifier()
    models["XGB"] = XGBClassifier(use_label_encoder=False, eval_metric="logloss")
    return models

### Define pipeline

In [48]:
def create_pipeline(model_abbreviation, model, categorical_indices, continuous_indices):
    scaler = RobustScaler()
    imputer = SimpleImputer(strategy="median")
    scalingsteps = [("cont_imputer", imputer), ("continuous", scaler)]

    sampler = SMOTE(random_state=42)


    scaling_pipeline = Pipeline(steps=scalingsteps)
    encoding_pipeline = Pipeline(
                steps=[
                    (
                        "categorical",
                        OneHotEncoder(handle_unknown="ignore", drop="if_binary"),
                    )
                ]
            )
    if model_abbreviation == "ensemble":
        finalmodel = model
    else:
        finalmodel = CalibratedClassifierCV(
            base_estimator=model, cv=5, ensemble=True
        )
    preprocess = ColumnTransformer(
            transformers=[
                ("scal", scaling_pipeline, continuous_indices),
                ("cat", encoding_pipeline, categorical_indices),
            ],
            remainder="passthrough",
        )
    pipeline = Pipeline(
            steps=[
                ("preprocess", preprocess),
                ("sampler", sampler),
                ("model", finalmodel),
            ])
    return pipeline

In [49]:
def train(pipeline, X_train, y_train, classificationtype,features):
    pipeline.fit(X_train, np.ravel(y_train))
    y_hat_train = pipeline.predict(X_train)
    y_prob_train = pipeline.predict_proba(X_train)
    train_acc = accuracy_score(y_train, y_hat_train)
    if classificationtype == "binary":
        train_auc = roc_auc_score(y_train, y_prob_train[:, 1])
    else:
        train_auc = roc_auc_score(
            y_train, y_prob_train, multi_class="ovo", average="macro"
        )
    report = classification_report(y_train, y_hat_train)
    print("Train results of {} with {} features".format(pipeline.named_steps["model"], len(features)))
    print("Train accuracy: {:.2f}".format(train_acc))
    print("Train AUC: {:.2f}".format(train_auc))
    print(report)
    return {"train_acc": train_acc, "train_auc": train_auc, "train_report": report}

In [50]:
def test(pipeline, X_train, y_train, X_test, y_test, classificationtype,features):
    pipeline.fit(X_train, y_train)
    y_hat = pipeline.predict(X_test)
    y_prob = pipeline.predict_proba(X_test)
    test_acc = accuracy_score(y_test, y_hat)
    if classificationtype == "binary":
        test_auc = roc_auc_score(y_test, y_prob[:, 1], average="macro")
        tn, fp, fn, tp = confusion_matrix(y_test, y_hat).ravel()
    else:
        test_auc = roc_auc_score(
            y_test, y_prob, multi_class="ovr", average="macro"
        )
        tn, fp, fn, tp = "nvt", "nvt", "nvt", "nvt"
    F1 = f1_score(y_test, y_hat, average="macro")
    precision = precision_score(y_test, y_hat, average="macro")
    recall = recall_score(y_test, y_hat, average="macro")
    print("Test results of {} with {} features".format(pipeline.named_steps["model"], len(features)))
    print("Test accuracy: {:.2f}".format(test_acc))
    print("Test AUC: {:.2f}".format(test_auc))
    print("Test F1-score: {:.2f}".format(F1))
    report = classification_report(y_test, y_hat)
    print(report)
    return {
        "test_acc": test_acc,
        "test_auc": test_auc,
        "test_report": report,
        "tn": tn,
        "fp": fp,
        "fn": fn,
        "tp": tp,
        "test_F1": F1,
        "test_recall": recall,
        "test_precision": precision,
    }

### Create ensemble

In [51]:
def create_classification_ensemble_model(estimators, X_train, y_train, X_test, y_test, classificationtype,features, categorical_indices, continuous_indices):
    base_estimators = list()  # base models
    weights = list()
    results = dict()
    counter = 0
    models = get_classification_models()
    for abbreviation in estimators:
        ML_model = models[abbreviation]
        pipeline = create_pipeline(abbreviation,ML_model, categorical_indices, continuous_indices)
        train_results = train(pipeline, X_train, y_train, classificationtype,features)
        test_results = test(pipeline, X_train, y_train, X_test, y_test, classificationtype,features)
        weights.append(test_results["test_auc"] * test_results["test_auc"])
        results[abbreviation] = {
            "ACC": test_results["test_acc"],
            "AUC": test_results["test_auc"],
            "RECALL": test_results["test_recall"],
            "PRECISION": test_results["test_precision"],
            "F1": test_results["test_F1"],
        }
        base_estimators.append(
            (
                f"{abbreviation}_{counter}",
                Pipeline(
                    steps=[
                        (
                            f"{abbreviation}_{counter}",
                            pipeline.named_steps["model"],
                        )
                    ]
                ),
            )
        )
        counter += 1
    normalized_weights = [float(i / sum(weights)) for i in weights]
    ensemble = VotingClassifier(
        base_estimators, voting="soft", weights=normalized_weights
    )
    return ensemble, results

### Confidence interval

In [52]:
def confidence_interval_classification(all_accuracy, all_aucs, all_recall, all_precision, all_f1):
    all_accuracy.sort()
    all_aucs.sort()
    all_recall.sort()
    all_precision.sort()
    all_f1.sort()

    print(f"Confidence interval accuracy: {np.array(all_accuracy).mean()} [{np.array(all_accuracy).mean() - (2*np.array(all_accuracy).std())}-{np.array(all_accuracy).mean()+(2*np.array(all_accuracy).std())}]\n")
    print(f"Confidence interval AUC: {np.array(all_aucs).mean()} [{np.array(all_aucs).mean() - (2*np.array(all_aucs).std())}-{np.array(all_aucs).mean()+(2*np.array(all_aucs).std())}]\n")
    print(f"Confidence interval recall: {np.array(all_recall).mean()} [{np.array(all_recall).mean() - (2*np.array(all_recall).std())}-{np.array(all_recall).mean()+(2*np.array(all_recall).std())}]\n")
    print(f"Confidence interval precision: {np.array(all_precision).mean()} [{np.array(all_precision).mean() - (2*np.array(all_precision).std())}-{np.array(all_precision).mean()+(2*np.array(all_precision).std())}]\n")
    print(f"Confidence interval F1-score: {np.array(all_f1).mean()} [{np.array(all_f1).mean() - (2*np.array(all_f1).std())}-{np.array(all_f1).mean()+(2*np.array(all_f1).std())}]\n")

### Save results

In [58]:
def save_pipeline(pipeline, model_abbreviation, features, train_results, test_results, split):
    joblib.dump(pipeline, f"../results/{model_abbreviation}_bestpipeline.pkl")
    with open(f'../results/{model_abbreviation}_bestpipeline_results.txt', 'w') as file:
        file.write("Data was split with random_state={}\n".format(split))
        file.write("Train accuracy: {:.2f}\n".format(train_results["train_acc"]))
        file.write("Train AUC of {:.2f}\n".format(train_results["train_auc"]))
        file.write("{}\n".format(train_results["train_report"]))
        file.write("Test accuracy: {:.2f}\n".format(test_results["test_acc"]))
        file.write("Test AUC of {:.2f}\n".format(test_results["test_auc"]))
        file.write("TN: {}\tFP: {}\tFN: {}\tTP: {}\n".format(test_results["tn"], test_results["fp"], test_results["fn"], test_results["tp"]))
        file.write("{}\n\n".format(test_results["test_report"]))
        file.write("Features:\n")
        for feat in features:
            file.write(f"{feat}\n")

### Train loop

In [59]:
models = ["LOR", "KNN", "SVM", "RF", "XGB"]
classificationtype = "binary"

In [60]:
def train_test_loop(dataset, model_abbreviation, loops=1, save=False):
    best_auc = 0
    all_aucs = []
    all_recall = []
    all_precision = []
    all_f1 = []
    all_accuracies = []
    for i in tqdm.tqdm(range(0,int(loops))):
        y = dataset["y"]
        X = dataset.drop(["y"], axis=1)
        categorical_features = list(X.select_dtypes(include=["object"]))
        categorical_indices = [X.columns.get_loc(cat) for cat in categorical_features]
        continuous_features = list(X.select_dtypes(include=["float", "int"]))
        continuous_indices = [X.columns.get_loc(cont) for cont in continuous_features]
        features = X.columns
        # Split dataset
        X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,stratify=y,random_state=i)
        print("Training set: {}, Test set: {}".format(len(X_train), len(X_test)))

        if model_abbreviation == "ensemble":
            voting_classifier, results_seperate_estimators = create_classification_ensemble_model(models, X_train, y_train, X_test, y_test, classificationtype,features,  categorical_indices, continuous_indices)
            pipeline = create_pipeline(model_abbreviation, voting_classifier, categorical_indices, continuous_indices)
           
        else:
            modeldict = get_classification_models()
            model = modeldict[model_abbreviation]
            pipeline = create_pipeline(model_abbreviation, model, categorical_indices, continuous_indices)
        train_results = train(pipeline, X_train, y_train, classificationtype,features)
        test_results = test(pipeline, X_train, y_train, X_test, y_test, classificationtype,features)
        all_accuracies.append(test_results["test_acc"])
        all_aucs.append(test_results["test_auc"])
        all_recall.append(test_results["test_recall"])
        all_precision.append(test_results["test_precision"])
        all_f1.append(test_results["test_F1"])
        
        if save:
            if test_results["test_auc"] > best_auc:
                save_pipeline(pipeline, model_abbreviation, features, train_results, test_results, i)
                best_auc = test_results["test_auc"]
    if loops > 20:
        confidence_interval_classification(all_accuracies,all_aucs,all_recall,all_precision,all_f1)

In [56]:
train_test_loop(df, "RF")

  0%|          | 0/1 [00:00<?, ?it/s]

Training set: 324, Test set: 81
Train results of CalibratedClassifierCV(base_estimator=RandomForestClassifier(), cv=5) with 30 features
Train accuracy: 1.00
Train AUC: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       246
           1       1.00      1.00      1.00        78

    accuracy                           1.00       324
   macro avg       1.00      1.00      1.00       324
weighted avg       1.00      1.00      1.00       324



100%|██████████| 1/1 [00:01<00:00,  1.75s/it]

Test results of CalibratedClassifierCV(base_estimator=RandomForestClassifier(), cv=5) with 30 features
Test accuracy: 0.81
Test AUC: 0.80
Test F1-score: 0.70
              precision    recall  f1-score   support

           0       0.83      0.95      0.89        61
           1       0.73      0.40      0.52        20

    accuracy                           0.81        81
   macro avg       0.78      0.68      0.70        81
weighted avg       0.80      0.81      0.79        81






In [57]:
train_test_loop(df, "ensemble", loops=20, save=True)

  0%|          | 0/20 [00:00<?, ?it/s]

Training set: 324, Test set: 81
Train results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Train accuracy: 0.83
Train AUC: 0.89
              precision    recall  f1-score   support

           0       0.92      0.85      0.88       246
           1       0.61      0.77      0.68        78

    accuracy                           0.83       324
   macro avg       0.77      0.81      0.78       324
weighted avg       0.85      0.83      0.83       324

Test results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Test accuracy: 0.74
Test AUC: 0.76
Test F1-score: 0.69
              precision    recall  f1-score   support

           0       0.87      0.77      0.82        61
           1       0.48      0.65      0.55        20

    accuracy                           0.74        81
   macro avg       0.68      0.71      0.69        81
weighted avg       0.77      0.74      0.75        81

Train results of Calibr

  5%|▌         | 1/20 [00:17<05:27, 17.23s/it]

Training set: 324, Test set: 81
Train results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Train accuracy: 0.85
Train AUC: 0.89
              precision    recall  f1-score   support

           0       0.93      0.86      0.89       246
           1       0.65      0.79      0.71        78

    accuracy                           0.85       324
   macro avg       0.79      0.83      0.80       324
weighted avg       0.86      0.85      0.85       324

Test results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Test accuracy: 0.77
Test AUC: 0.80
Test F1-score: 0.70
              precision    recall  f1-score   support

           0       0.86      0.82      0.84        61
           1       0.52      0.60      0.56        20

    accuracy                           0.77        81
   macro avg       0.69      0.71      0.70        81
weighted avg       0.78      0.77      0.77        81

Train results of Calibr

 10%|█         | 2/20 [00:37<05:38, 18.80s/it]

Training set: 324, Test set: 81
Train results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Train accuracy: 0.83
Train AUC: 0.89
              precision    recall  f1-score   support

           0       0.93      0.83      0.88       246
           1       0.61      0.81      0.69        78

    accuracy                           0.83       324
   macro avg       0.77      0.82      0.79       324
weighted avg       0.85      0.83      0.83       324

Test results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Test accuracy: 0.78
Test AUC: 0.83
Test F1-score: 0.73
              precision    recall  f1-score   support

           0       0.89      0.80      0.84        61
           1       0.54      0.70      0.61        20

    accuracy                           0.78        81
   macro avg       0.71      0.75      0.73        81
weighted avg       0.80      0.78      0.79        81

Train results of Calibr

 15%|█▌        | 3/20 [00:56<05:20, 18.84s/it]

Training set: 324, Test set: 81
Train results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Train accuracy: 0.82
Train AUC: 0.91
              precision    recall  f1-score   support

           0       0.91      0.85      0.88       246
           1       0.61      0.74      0.67        78

    accuracy                           0.82       324
   macro avg       0.76      0.80      0.78       324
weighted avg       0.84      0.82      0.83       324

Test results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Test accuracy: 0.80
Test AUC: 0.74
Test F1-score: 0.73
              precision    recall  f1-score   support

           0       0.87      0.87      0.87        61
           1       0.60      0.60      0.60        20

    accuracy                           0.80        81
   macro avg       0.73      0.73      0.73        81
weighted avg       0.80      0.80      0.80        81

Train results of Calibr

 20%|██        | 4/20 [01:13<04:55, 18.49s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 25%|██▌       | 5/20 [01:39<05:13, 20.88s/it]

Training set: 324, Test set: 81
Train results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Train accuracy: 0.83
Train AUC: 0.92
              precision    recall  f1-score   support

           0       0.94      0.83      0.88       246
           1       0.61      0.83      0.71        78

    accuracy                           0.83       324
   macro avg       0.78      0.83      0.80       324
weighted avg       0.86      0.83      0.84       324

Test results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Test accuracy: 0.77
Test AUC: 0.74
Test F1-score: 0.70
              precision    recall  f1-score   support

           0       0.86      0.82      0.84        61
           1       0.52      0.60      0.56        20

    accuracy                           0.77        81
   macro avg       0.69      0.71      0.70        81
weighted avg       0.78      0.77      0.77        81

Train results of Calibr

 30%|███       | 6/20 [01:57<04:41, 20.13s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 35%|███▌      | 7/20 [02:15<04:13, 19.47s/it]

Training set: 324, Test set: 81
Train results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Train accuracy: 0.80
Train AUC: 0.88
              precision    recall  f1-score   support

           0       0.92      0.80      0.86       246
           1       0.56      0.77      0.65        78

    accuracy                           0.80       324
   macro avg       0.74      0.79      0.75       324
weighted avg       0.83      0.80      0.81       324

Test results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Test accuracy: 0.75
Test AUC: 0.88
Test F1-score: 0.72
              precision    recall  f1-score   support

           0       0.92      0.74      0.82        61
           1       0.50      0.80      0.62        20

    accuracy                           0.75        81
   macro avg       0.71      0.77      0.72        81
weighted avg       0.82      0.75      0.77        81

Train results of Calibr

 40%|████      | 8/20 [02:37<04:02, 20.20s/it]

Training set: 324, Test set: 81
Train results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Train accuracy: 0.82
Train AUC: 0.88
              precision    recall  f1-score   support

           0       0.92      0.83      0.87       246
           1       0.59      0.77      0.67        78

    accuracy                           0.82       324
   macro avg       0.76      0.80      0.77       324
weighted avg       0.84      0.82      0.83       324

Test results of CalibratedClassifierCV(base_estimator=LogisticRegression(), cv=5) with 30 features
Test accuracy: 0.85
Test AUC: 0.87
Test F1-score: 0.79
              precision    recall  f1-score   support

           0       0.89      0.92      0.90        61
           1       0.72      0.65      0.68        20

    accuracy                           0.85        81
   macro avg       0.81      0.78      0.79        81
weighted avg       0.85      0.85      0.85        81

Train results of Calibr

 45%|████▌     | 9/20 [03:00<03:50, 20.92s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 50%|█████     | 10/20 [03:22<03:32, 21.23s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 55%|█████▌    | 11/20 [03:40<03:04, 20.46s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 60%|██████    | 12/20 [04:01<02:43, 20.45s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 65%|██████▌   | 13/20 [04:20<02:21, 20.17s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 70%|███████   | 14/20 [04:41<02:02, 20.37s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 75%|███████▌  | 15/20 [05:01<01:40, 20.16s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 80%|████████  | 16/20 [05:20<01:19, 19.82s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 85%|████████▌ | 17/20 [05:38<00:58, 19.43s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 90%|█████████ | 18/20 [05:57<00:38, 19.16s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

 95%|█████████▌| 19/20 [06:15<00:18, 18.87s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           

100%|██████████| 20/20 [06:32<00:00, 19.62s/it]

Test results of VotingClassifier(estimators=[('LOR_0',
                              Pipeline(steps=[('LOR_0',
                                               CalibratedClassifierCV(base_estimator=LogisticRegression(),
                                                                      cv=5))])),
                             ('KNN_1',
                              Pipeline(steps=[('KNN_1',
                                               CalibratedClassifierCV(base_estimator=KNeighborsClassifier(),
                                                                      cv=5))])),
                             ('SVM_2',
                              Pipeline(steps=[('SVM_2',
                                               CalibratedClassifierCV(base_estimator=SVC(probability=True),
                                                                      cv=5))])),
                             ('RF_3',
                              P...
                                                           


