In [1]:
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('/home/rupali/Documents/Master Thesis/jenga')

In [2]:
import random
import numpy as np
import pandas as pd

from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

from jenga.basis import Dataset
from jenga.corruptions.generic import MissingValues, SwappedValues, CategoricalShift
from jenga.corruptions.numerical import Scaling, GaussianNoise
from jenga.cleaning.ppp import PipelinePerformancePrediction
from jenga.cleaning.outlier_detection import NoOutlierDetection, PyODKNNOutlierDetection, PyODIsolationForestOutlierDetection
from jenga.cleaning.imputation import NoImputation, MeanModeImputation
from jenga.cleaning.clean import Clean

In [3]:
## use categorical columns as strings
def cat_cols_to_str(df):
    for col in df.columns:
        if pd.api.types.is_categorical_dtype(df[col]):
            df[col] = df[col].astype(str)

    return df

  and should_run_async(code)


In [4]:
def run_experiment(dataset_name, learner, param_grid, corruptions, fraction, cleaners, num_repetitions, categorical_precision_threshold=0.7, numerical_std_error_threshold=2.0):
    
    ## dataset
    dataset = Dataset(dataset_name)
    
    all_data = dataset.all_data
    attribute_names = dataset.attribute_names
    attribute_types = dataset.attribute_types
    
    ## categorical and numerical features
    categorical_columns = dataset.categorical_columns
    numerical_columns = dataset.numerical_columns
    print(f"Found {len(categorical_columns)} categorical and {len(numerical_columns)} numeric features \n")
    
    ## train and test data
    df_train, lab_train, df_test, lab_test = dataset.get_train_test_data()
    ### if we don't convert the categorical columns to str, the swapping corruption doesn't let us assign new values to the column: "Cannot setitem on a Categorical with a new category, set the categories first"
    #df_train = cat_cols_to_str(df_train)
    #df_test = cat_cols_to_str(df_test)
    
    ## pipeline performance prediction (ppp)
    ppp = PipelinePerformancePrediction(df_train, lab_train, df_test, lab_test, categorical_columns, numerical_columns, learner, param_grid)
    ppp_model = ppp.fit_ppp(df_train)
    
    ## generate corrupted data
    for _ in range(num_repetitions):
        df_corrupted, perturbations, cols_perturbed, summary_col_corrupt = ppp.get_corrupted(df_test, corruptions, fraction, num_repetitions)
    
    ## cleaning
    clean = Clean(df_train, df_corrupted, categorical_columns, numerical_columns, categorical_precision_threshold, numerical_std_error_threshold, ppp, ppp_model, cleaners)
    df_outliers, df_cleaned, corrupted_score_ppp, best_cleaning_score, cleaner_scores_ppp, summary_cleaners = clean(df_train, df_test, df_corrupted, cols_perturbed)
    
    ## results
    result = {
        'ppp_score_model': ppp.predict_score_ppp(ppp_model, df_test),
        'ppp_score_corrupted': corrupted_score_ppp,
        'ppp_score_cleaned': best_cleaning_score,
        'ppp_scores_cleaners': cleaner_scores_ppp
    }
#     print('\n'.join([f'{key}:{val}' for key, val in result.items()]))
    
    ## summary
    summary = {
        'dataset': dataset_name,
        'model': learner,
        'corruptions': summary_col_corrupt,
        'cleaners': summary_cleaners,
        'result': result
    }
#     print('\n\n\n\n'.join([f'{key}:{val}' for key, val in summary.items()]))
    
    return summary #summary_col_corrupt, result

### Altogether

In [5]:
datasets = [
    'thoracic_surgery',
    'cleve',
    'acute-inflammations'
]

In [6]:
## model parameters
## models is a dict where key = leaner & value = param_grid
models = {SGDClassifier(loss='log'): {'learner__max_iter': [500, 1000, 5000], 
                                         'learner__penalty': ['l2', 'l1', 'elasticnet'], 
                                         'learner__alpha': [0.0001, 0.001, 0.01, 0.1]
                                        }, 
          RandomForestClassifier():{'learner__n_estimators': [100, 200, 500], 
                                    'learner__max_depth': [5, 10, 15]
                                   }
         }

## make dict of multiple leraners and corresponding param_grids

In [7]:
corruptions = [[MissingValues], 
               #[SwappedValues], 
               [CategoricalShift],
               [Scaling], 
               [GaussianNoise],
               [MissingValues, CategoricalShift, Scaling, GaussianNoise]]#, SwappedValues]]

In [8]:
fractions = [0.15, 0.25, 0.5, 0.75, 0.9]

In [9]:
cleaners = [
    (NoOutlierDetection, MeanModeImputation),
    (PyODKNNOutlierDetection, MeanModeImputation),
    (PyODIsolationForestOutlierDetection, MeanModeImputation),
]

In [10]:
# stdoutOrigin=sys.stdout 
# sys.stdout = open("/home/rupali/Documents/Master Thesis/jenga/out/experiments.txt", "w")

# for _ in range(10):
#   print("\n\n..................................ITERATION..................................\n")
ind_results = []

for dataset in datasets:
    for learner, param_grid in models.items():
        for corruption in corruptions:
            for fraction in fractions:
                print(learner)
                ind_results.append(run_experiment(dataset, learner, param_grid, corruption, fraction, cleaners, 100))
        
# sys.stdout.close()
# sys.stdout=stdoutOrigin

SGDClassifier(loss='log')
Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE14', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	per


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturba

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.990625, 'Recall': 0.9511363636363637, 'F1-score': 0.9687483870967742, 'Accuracy': 0.9680851063829787, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6489451476793249, 'classification_report': {'F': {'precision': 0.8409090909090909, 'recall': 0.9367088607594937, 'f1-score': 0.8862275449101796, 'support': 79}, 'T': {'precision': 0.16666666666666666, 'recall': 0.06666666666666667, 'f1-score': 0.09523809523809522, 'support': 15}, 'accuracy': 0.7978723404255319, 'macro avg': {'precision': 0.5037878787878788, 'recall': 0.5016877637130802, 'f1-score': 0.49073282007413743, 'support': 94}, 'weighted avg': {'precision': 0.7333172147001935, 'recall': 0.7978723404255319, 'f1-score': 0.7600058242178257, 'support': 94}}}

Outlier 


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbati

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9791666666666667, 'Recall': 0.9929577464788732, 'F1-score': 0.9858156028368794, 'Accuracy': 0.9893617021276596}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 25.003100830259196}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6784810126582278, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7875, 'Recall': 0.880281690140845, 'F1-score': 0.797079365079365, 'Accuracy': 0.8191489361702128}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 48.198864293075715}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6831223628691983, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6831223628691983, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}} 

Cleaning improved the overall score 



SGDClassifier(loss='log')


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'colu


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.989010989010989, 'Recall': 0.8, 'F1-score': 0.8694444444444444, 'Accuracy': 0.9787234042553191, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5905271199388846, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.989010989010989, 'Recall': 0.8, 'F1-score': 0.8694444444444444, 'Accuracy': 0.9787234042553191, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5974025974025974, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5974025974025974, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}} 

Cleaning improved the overall score 



SGDClassifier(loss='log')


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE14', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5721153846153846, 'classification_report': {'F': {'precision': 0.8703703703703703, 'recall': 0.6025641025641025, 'f1-score': 0.712121212121212, 'support': 78}, 'T': {'precision': 0.225, 'recall': 0.5625, 'f1-score': 0.3214285714285714, 'support': 16}, 'accuracy': 0.5957446808510638, 'macro avg': {'precision': 0.5476851851851852, 'recall': 0.5825320512820513, 'f1-score': 0.5167748917748918, 'support': 94}, 'weighted avg': {'precision': 0.7605200945626477, 'recall': 0.5957446808510638, 'f1-score': 0.6456203371096988, 'support': 94}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: 


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE14', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: Missing

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.05319148936170213, 'Recall': 0.5, 'F1-score': 0.09615384615384615, 'Accuracy': 0.10638297872340426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9184782608695652, 'Recall': 0.5588235294117647, 'F1-score': 0.5608844596698848, 'Accuracy': 0.8404255319148937, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6839430894308942, 'classification_report': {'F': {'precision': 0.8764044943820225, 'recall': 0.9512195121951219, 'f1-score': 0.912280701754386, 'support': 82}, 'T': {'precision': 0.2, 'recall': 0.08333333333333333, 'f1-score': 0.11764705882352941, 'support': 12}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.5382022471910113, 'recall': 0.5172764227642276, 'f1-score': 0.5149638802889577, 'support': 94}, 'weighted avg': {'precision': 0.7900549844609134, 'recall': 0.8404255319148937, 'f1-score': 0.81083810903980


Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.15, 'sampling': 'MAR'}

Gene

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7250459277403551, 'Recall': 0.849002849002849, 'F1-score': 0.7595029239766082, 'Accuracy': 0.851063829787234, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.4888888888888889, 'classification_report': {'F': {'precision': 0.896551724137931, 'recall': 0.9176470588235294, 'f1-score': 0.9069767441860465, 'support': 85}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4482758620689655, 'recall': 0.4588235294117647, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.8107116654438739, 'recall': 0.8297872340425532, 'f1-score': 0.8201385452746165, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7250459277403551, 'Recall': 0.849002849002849, 'F1-score': 0.7595029239766082, 'Accuracy': 0.851063829787234, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.54640522875817, 'classification_report': {'F': {'precision': 0.898876404494382, 'recall': 0.9411764705882353, 'f1-score': 0.9195402298850575, 'support': 85}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.449438202247191, 'recall': 0.47058823529411764, 'f1-score': 0.45977011494252873, 'support': 94}, 'weighted avg': {'precision': 0.8128137700215156, 'recall': 0.851063829787234, 'f1-score': 0.8314991440449988, 'supp


Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.25, 'sampling': 'MAR'}

Gener


Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5897435897435898, 'classification_report': {'F': {'precision': 0.8571428571428571, 'recall': 0.9629629629629629, 'f1-score': 0.9069767441860465, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.42857142857142855, 'recall': 0.48148148148148145, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.7386018237082067, 'recall': 0.8297872340425532, 'f1-score': 0.7815437902028698, 'support': 94}}} 

Cleaning improved the overall score 



SGDClassifier(loss='log')
Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupte


Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.5, 'sampling': 'MCAR'}

Generati


Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.75, 'sampling': 'MNAR'}

G

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.1276595744680851, 'Recall': 0.5, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4444444444444444, 'Recall': 0.13186813186813187, 'F1-score': 0.2033898305084746, 'Accuracy': 0.2553191489361702, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.584045584045584, 'classification_report': {'F': {'precision': 0.8522727272727273, 'recall': 0.9259259259259259, 'f1-score': 0.8875739644970415, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.7978723404255319, 'macro avg': {'precision': 0.42613636363636365, 'recall': 0.46296296296296297, 'f1-score': 0.44378698224852076, 'support': 94}, 'weighted avg': {'precision': 0.7344052224371374, 'recall': 0.7978723404255319, 'f1-score': 0.7648243736623443, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.1276595744680851, 'Recall': 0.5, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4444444444444444, 'Recall': 0.13186813186813187, 'F1-score': 0.2033898305084746, 'Accuracy': 0.2553191489361702, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6201329534662868, 'classification_report': {'F': {'precision': 0.8539325842696629, 'recall': 0.9382716049382716, 'f1-score': 0.8941176470588234, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8085106382978723, 'macro avg': {'precision': 0.42696629213483145, 'recall': 0.4691358024691358, 'f1-score': 0.4470588235294117, 'support': 94}, 'weighted avg': {'precision': 0.7358355247430075, 'recall': 0.8085106382978723, 'f1-score': 0.7704630788485606, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.1276595744680851, 'Recall': 0.5, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4444444444444444, 'Recall': 0.13186813186813187, 'F1-score': 0.2033898305084746, 'Accuracy': 0.2553191489361702, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6258309591642925, 'classification_report': {'F': {'precision': 0.8539325842696629, 'recall': 0.9382716049382716, 'f1-score': 0.8941176470588234, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8085106382978723, 'macro avg': {'precision': 0.42696629213483145, 'recall': 0.4691358024691358, 'f1-score': 0.4470588235294117, 'support': 94}, 'weighted avg': {'precision': 0.7358355247430075, 'recall': 0.8085106382978723, 'f1-score': 0.77046307884


Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE7', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.9, 'sampling': 'MAR'}

Generating c


Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6102678571428571, 'classification_report': {'F': {'precision': 0.851063829787234, 'recall': 1.0, 'f1-score': 0.9195402298850576, 'support': 80}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.425531914893617, 'recall': 0.5, 'f1-score': 0.4597701149425288, 'support': 94}, 'weighted avg': {'precision': 0.724309642372114, 'recall': 0.851063829787234, 'f1-score': 0.7825874296894108, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8043478260869565, 'Recall': 0.94375, 'F1-score': 0.8485770538750672, 'Accuracy': 0.9042553191489362}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 54.1224745931778}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5924107142

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7, 'Recall': 0.86875, 'F1-score': 0.7101747173689621, 'Accuracy': 0.776595744680851}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 63.294093297607475}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5977678571428571, 'classification_report': {'F': {'precision': 0.851063829787234, 'recall': 1.0, 'f1-score': 0.9195402298850576, 'support': 80}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.425531914893617, 'recall': 0.5, 'f1-score': 0.4597701149425288, 'support': 94}, 'weighted avg': {'precision': 0.724309642372114, 'recall': 0.851063829787234, 'f1-score': 0.7825874296894108, 'support': 94}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, Me

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scal


Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.5707502374169041, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.3776595744680851, 'Recall': 0.5, 'F1-score': 0.43030303030303035, 'Accuracy': 0.7553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 302.31957446808514}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_sc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8709677419354839, 'Recall': 0.943661971830986, 'F1-score': 0.8960751796572692, 'Accuracy': 0.9148936170212766}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.37978891592391567}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.717948717948718, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8026315789473684, 'Recall': 0.8943661971830985, 'F1-score': 0.8179940622176327, 'Accuracy': 0.8404255319148937}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.5073255513439939}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7027540360873694, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.717948717948718, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}} 

Cleaning improved the overall score 



SGDClassifier(loss='log')


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'co


Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9433962264150944, 'Recall': 0.9361702127659575, 'F1-score': 0.935909090909091, 'Accuracy': 0.9361702127659575}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 232.4760276132516}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7449186991869918, 'classification_report': {'F': {'precision': 0.8717948717948718, 'recall': 0.8292682926829268, 'f1-score': 0.8500000000000001, 'support': 82}, 'T': {'precision': 0.125, 'recall': 0.16666666666666666, 'f1-score': 0.14285714285714288, 'support': 12}, 'accuracy': 0.7446808510638298, 'macro avg': {'precision': 0.4983974358974359, 'recall': 0.4979674796747967, 'f1-score': 0.4964285714285715, 'support': 94}, 'weighted avg': {'precision': 0.7764593562465903, 'recall': 0.7446808510638298, 'f1-score': 0.7597264437689969, 'support': 94}}}




Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.1276595744680851, 'Recall': 0.5, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 738.9315478723405}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.475, 'classification_report': {'F': {'precision': 0.7446808510638298, 'recall': 1.0, 'f1-score': 0.8536585365853657, 'support': 70}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24}, 'accuracy': 0.7446808510638298, 'macro avg': {'precision': 0.3723404255319149, 'recall': 0.5, 'f1-score': 0.42682926829268286, 'support': 94}, 'weighted avg': {'precision': 0.5545495699411498, 'recall': 0.7446808510638298, 'f1-score': 0.6357031655422937, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Rec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.9794520547945205, 'Recall': 0.9375, 'F1-score': 0.9561771561771562, 'Accuracy': 0.9680851063829787}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.60625000821711}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.4803571428571428, 'classification_report': {'F': {'precision': 0.7446808510638298, 'recall': 1.0, 'f1-score': 0.8536585365853657, 'support': 70}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24}, 'accuracy': 0.7446808510638298, 'macro avg': {'precision': 0.3723404255319149, 'recall': 0.5, 'f1-score': 0.42682926829268286, 'support': 94}, 'weighted avg': {'precision': 0.5545495699411498, 'recall': 0.7446808510638298, 'f1-score': 0.6357031655422937, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.4803571428571428, 'classification_report': {'F': {'precision': 0.7446808510638298, 'recall': 1.0, 'f1-score': 0.8536585365853657, 'support': 70}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24}, 'accuracy': 0.7446808510638298, 'macro avg': {'precision': 0.3723404255319149, 'recall': 0.5, 'f1-score': 0.42682926829268286, 'support': 94}, 'weighted avg': {'precision': 0.5545495699411498, 'recall': 0.7446808510638298, 'f1-score': 0.6357031655422937, 'support': 94}}} 

Cleaning improved the overall score 



SGDClassifier(loss='log')


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'


Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'c

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.05319148936170213, 'Recall': 0.5, 'F1-score': 0.09615384615384615, 'Accuracy': 0.10638297872340426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 5948082.774890426}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.45912910618792974, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.44680851063829785, 'Recall': 0.5, 'F1-score': 0.47191011235955055, 'Accuracy': 0.8936170212765957}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 121.0870487211408}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7020626432391138, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.44680851063829785, 'Recall': 0.5, 'F1-score': 0.47191011235955055, 'Accuracy': 0.8936170212765957}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 121.0870487211408}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6936592818945759, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}

Best cleaning method:
Cleaning score: Cleaner: (PyO

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training da


Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted traini

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 29.545651853102175}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6602564102564102, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6109375, 'Recall': 0.6901785714285715, 'F1-score': 0.6142676767676768, 'Accuracy': 0.723404255319149}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 64.0551745173665}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6626602564102564, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6626602564102564, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}} 

Cleaning improved the overall score 



SGDClassifier(loss='log')


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.859375, 'Recall': 0.9366197183098591, 'F1-score': 0.8843472317156527, 'Accuracy': 0.9042553191489362}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 154.25744054448438}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6440306681270537, 'classification_report': {'F': {'precision': 0.8953488372093024, 'recall': 0.927710843373494, 'f1-score': 0.9112426035502958, 'support': 83}, 'T': {'precision': 0.25, 'recall': 0.18181818181818182, 'f1-score': 0.2105263157894737, 'support': 11}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.5726744186046512, 'recall': 0.5547645125958379, 'f1-score': 0.5608844596698848, 'support': 94}, 'weighted avg': {'precision': 0.8198292924294903, 'recall': 0.8404255319148937, 'f1-score': 0.8292438890250932, 'support': 94}}}

Outlier det


Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data o

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8615384615384616, 'Recall': 0.8085106382978724, 'F1-score': 0.8012218045112781, 'Accuracy': 0.8085106382978723}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 22.940526935389965}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5552742616033756, 'classification_report': {'F': {'precision': 0.8488372093023255, 'recall': 0.9240506329113924, 'f1-score': 0.8848484848484849, 'support': 79}, 'T': {'precision': 0.25, 'recall': 0.13333333333333333, 'f1-score': 0.1739130434782609, 'support': 15}, 'accuracy': 0.7978723404255319, 'macro avg': {'precision': 0.5494186046511628, 'recall': 0.5286919831223629, 'f1-score': 0.5293807641633729, 'support': 94}, 'weighted avg': {'precision': 0.7532780801583374, 'recall': 0.7978723404255319, 'f1-score': 0.771401339948981, 'support': 94}}}

O


Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training d

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


PPP score no cleaning: {'roc_auc_score': 0.525, 'classification_report': {'F': {'precision': 0.8936170212765957, 'recall': 1.0, 'f1-score': 0.9438202247191011, 'support': 84}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10}, 'accuracy': 0.8936170212765957, 'macro avg': {'precision': 0.44680851063829785, 'recall': 0.5, 'f1-score': 0.47191011235955055, 'support': 94}, 'weighted avg': {'precision': 0.7985513807152558, 'recall': 0.8936170212765957, 'f1-score': 0.8434138178340903, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.1276595744680851, 'Recall': 0.5, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.553932540511683}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.525, 'classification_report':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6600907029478458, 'Recall': 0.7101190476190475, 'F1-score': 0.6451102180002435, 'Accuracy': 0.6702127659574468}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 4.090290184064689}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5249999999999999, 'classification_report': {'F': {'precision': 0.8936170212765957, 'recall': 1.0, 'f1-score': 0.9438202247191011, 'support': 84}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10}, 'accuracy': 0.8936170212765957, 'macro avg': {'precision': 0.44680851063829785, 'recall': 0.5, 'f1-score': 0.47191011235955055, 'support': 94}, 'weighted avg': {'precision': 0.7985513807152558, 'recall': 0.8936170212765957, 'f1-score': 0.8434138178340903, 'support': 94}}}

Best cleaning method:
Cleaning score:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94


Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.5570639668660837, 'Recall': 0.6476190476190475, 'F1-score': 0.5022184056104194, 'Accuracy': 0.6063829787234043}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 86.69873795703718}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6987179487179488, 'classification_report': {'F': {'precision': 0.8279569892473119, 'recall': 0.9871794871794872, 'f1-score': 0.9005847953216375, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.41397849462365593, 'recall': 0.4935897435897436, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6870281400137268, 'recall': 0.8191489361702128, 'f1-score': 0.7472937663307205, 'support': 94}}}

Outlier detection method: PyODI

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Gener


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE14', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: Cat

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE9', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Gen

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6144349477682811, 'classification_report': {'F': {'precision': 0.8586956521739131, 'recall': 0.9753086419753086, 'f1-score': 0.9132947976878614, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42934782608695654, 'recall': 0.4876543209876543, 'f1-score': 0.4566473988439307, 'support': 94}, 'weighted avg': {'precision': 0.7399398704902868, 'recall': 0.8404255319148937, 'f1-score': 0.7869880703480507, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.925438596491228, 'Recall': 0.98125, 'F1-score': 0.9463093937477239, 'Accuracy': 0.9680851063829787}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9943181818181819, 'Recall': 0.9375, 'F1-score': 0.9614121510673235, 'Accuracy': 0.9893617021276595, 'Mean Squared Error': 8.454394875232365}
Cl

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE30', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MAR'}

Gene

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE14', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE14', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}

Genera

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8044148936170212, 'Recall': 0.8365384615384617, 'F1-score': 0.8150032938076416, 'Accuracy': 0.8962765957446808}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7706702544031312, 'Recall': 0.8728932584269663, 'F1-score': 0.7403565877921877, 'Accuracy': 0.8404255319148937, 'Mean Squared Error': 0.4193181183510639}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7002032520325203, 'classification_report': {'F': {'precision': 0.8681318681318682, 'recall': 0.9634146341463414, 'f1-score': 0.9132947976878613, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.4340659340659341, 'recall': 0.4817073170731707, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7573065232639701, 'recall': 0.8404255319148937, 'f1-score': 0.7967039

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7511313115274691, 'Recall': 0.7548076923076923, 'F1-score': 0.7211223839838403, 'Accuracy': 0.8058510638297873}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7706702544031312, 'Recall': 0.8728932584269663, 'F1-score': 0.7403565877921877, 'Accuracy': 0.8404255319148937, 'Mean Squared Error': 0.6790953956117021}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7073170731707317, 'classification_report': {'F': {'precision': 0.8681318681318682, 'recall': 0.9634146341463414, 'f1-score': 0.9132947976878613, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.4340659340659341, 'recall': 0.4817073170731707, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7573065232639701, 'recall': 0.840425531914893


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: Categorical

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corru

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


PPP score no cleaning: {'roc_auc_score': 0.4723214285714286, 'classification_report': {'F': {'precision': 0.851063829787234, 'recall': 1.0, 'f1-score': 0.9195402298850576, 'support': 80}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.425531914893617, 'recall': 0.5, 'f1-score': 0.4597701149425288, 'support': 94}, 'weighted avg': {'precision': 0.724309642372114, 'recall': 0.851063829787234, 'f1-score': 0.7825874296894108, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.25398936170212766, 'Recall': 0.5, 'F1-score': 0.3368055555555555, 'Accuracy': 0.5079787234042553}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7729215473003124, 'Recall': 0.6853422619047619, 'F1-score': 0.6488171291333213, 'Accuracy': 0.6648936170212766, 'Mean Squared Error': 10795328.632197767}
Cleaner: (NoOutlierDetection, Mea

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7987233163304905, 'Recall': 0.8563829787234043, 'F1-score': 0.8181592855025428, 'Accuracy': 0.8643617021276596}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7729215473003124, 'Recall': 0.6853422619047619, 'F1-score': 0.6488171291333213, 'Accuracy': 0.6648936170212766, 'Mean Squared Error': 127.8322444557395}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6526785714285714, 'classification_report': {'F': {'precision': 0.851063829787234, 'recall': 1.0, 'f1-score': 0.9195402298850576, 'support': 80}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.425531914893617, 'recall': 0.5, 'f1-score': 0.4597701149425288, 'support': 94}, 'weighted avg': {'precision': 0.724309642372114, 'recall': 0.851063829787234, 'f1-score': 0.7825874296894108, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7566870356591792, 'Recall': 0.800531914893617, 'F1-score': 0.7608759544749536, 'Accuracy': 0.8085106382978723}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7729215473003124, 'Recall': 0.6853422619047619, 'F1-score': 0.6488171291333213, 'Accuracy': 0.6648936170212766, 'Mean Squared Error': 137.22875790829218}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6508928571428572, 'classification_report': {'F': {'precision': 0.851063829787234, 'recall': 1.0, 'f1-score': 0.9195402298850576, 'support': 80}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.425531914893617, 'recall': 0.5, 'f1-score': 0.4597701149425288, 'support': 94}, 'weighted avg': {'precision': 0.724309642372114, 'recall': 0.851063829787234, 'f1-score': 0.7825874296894108, 's

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 


	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE30', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.75

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR'}

Gener


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: Catego

	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE30', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.30053191489361697, 'Recall': 0.625, 'F1-score': 0.3355009229736533, 'Accuracy': 0.35106382978723405}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7420212765957447, 'Recall': 0.75, 'F1-score': 0.745945945945946, 'Accuracy': 0.9840425531914894, 'Mean Squared Error': 1155.63292527346}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6016877637130802, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8970238095238097, 'Recall': 0.9527914614121511, 'F1-score': 0.9191606467148263, 'Accuracy': 0.9734042553191489}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7420212765957447, 'Recall': 0.75, 'F1-score': 0.745945945945946, 'Accuracy': 0.9840425531914894, 'Mean Squared Error': 147.13546278578104}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5434599156118144, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.922824302134647, 'Recall': 0.922824302134647, 'F1-score': 0.922824302134647, 'Accuracy': 0.9787234042553192}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7420212765957447, 'Recall': 0.75, 'F1-score': 0.745945945945946, 'Accuracy': 0.9840425531914894, 'Mean Squared Error': 147.28765246201985}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5350210970464135, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE30', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupte

	perturbation: MissingValues: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.9, 'sampling

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.46239837398373984, 'classification_report': {'F': {'precision': 0.8723404255319149, 'recall': 1.0, 'f1-score': 0.9318181818181819, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.43617021276595747, 'recall': 0.5, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.7609778180172024, 'recall': 0.8723404255319149, 'f1-score': 0.8128626692456481, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7703764320785598, 'Recall': 0.7630952380952382, 'F1-score': 0.7439010117171038, 'Accuracy': 0.8962765957446808}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.38388297872340427, 'Recall': 0.3799145299145299, 'F1-score': 0.3784267298079453, 'Accuracy': 0.8404255319148937, 'Mean Squared Error': 31.465884361052762}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6473577235772359, 'classification_report': {'F': {'precision': 0.8723404255319149, 'recall': 1.0, 'f1-score': 0.9318181818181819, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.43617021276595747, 'recall': 0.5, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.7609778180172024, 'recall': 0.8723404255319149, 'f1-score': 0.8128626692456481, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.66415273556231, 'Recall': 0.7080357142857143, 'F1-score': 0.6835889999513596, 'Accuracy': 0.8962765957446808}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.38388297872340427, 'Recall': 0.3799145299145299, 'F1-score': 0.3784267298079453, 'Accuracy': 0.8404255319148937, 'Mean Squared Error': 31.564945304240663}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6529471544715447, 'classification_report': {'F': {'precision': 0.8723404255319149, 'recall': 1.0, 'f1-score': 0.9318181818181819, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.43617021276595747, 'recall': 0.5, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.7609778180172024, 'recall': 0.8723404255319149, 'f1-score': 0.8128626692456

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'colum


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbatio

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.705118411000764, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.703590527119939, 'classification_report': {'F': {'precision': 0.819

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6585179526355996, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.705

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE9', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'colum


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: M

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7251602564102564, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.734375, 'classification_report': {'F': {'precision': 0.829787234042

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7776442307692307, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7776442307692307, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 


	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE14', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.5, 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7973856209150326, 'classification_report': {'F': {'precision': 0.9010989010989011, 'recall': 0.9647058823529412, 'f1-score': 0.9318181818181819, 'support': 85}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.45054945054945056, 'recall': 0.4823529411764706, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.8148234743979426, 'recall': 0.8723404255319149, 'f1-score': 0.8426015473887815, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7241379310344828, 'Recall': 0.7421875, 'F1-score': 0.7323953823953824, 'Accuracy': 0.9361702127659575, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7751633986928105, 'classification_report': {'F': {'precision': 0.9010989010989011, 'recall': 0.9647058823529412, 'f1-score': 0.9318181818181819, 'support': 85}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.45054945054945056, 'recall': 0.4823529411764706, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.8148234743979426, 'recall': 0.8723404255319149, 'f1-score': 0.8426015473887815, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7241379310344828, 'Recall': 0.7421875, 'F1-score': 0.7323953823953824, 'Accuracy': 0.9361702127659575, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7450980392156863, 'classification_report': {'F': {'precision': 0.9010989010989011, 'recall': 0.9647058823529412, 'f1-score': 0.9318181818181819, 'support': 85}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.45054945054945056, 'recall': 0.4823529411764706, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.8148234743979426, 'recall': 0.8723404255319149, 'f1-score': 0.8426015473887815, 'support': 94}}}

Best cleaning method:
Cleaning 


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE9', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE30', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbati

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.49957805907173003, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8932926829268293, 'Recall': 0.4566441441441441, 'F1-score': 0.5049966295921806, 'Accuracy': 0.6276595744680851, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.51434

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8932926829268293, 'Recall': 0.4566441441441441, 'F1-score': 0.5049966295921806, 'Accuracy': 0.6276595744680851, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.460337552742616, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5143459915611814, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE9', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE30', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'P


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 94 rows... 

	perturbation: Missi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


PPP score no cleaning: {'roc_auc_score': 0.7056030389363723, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.05319148936170213, 'Recall': 0.5, 'F1-score': 0.09615384615384615, 'Accuracy': 0.10638297872340426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.46808510638297873, 'Recall': 0.5, 'F1-score': 0.4835164835164835, 'Accuracy': 0.9361702127659575, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'r

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.46808510638297873, 'Recall': 0.5, 'F1-score': 0.4835164835164835, 'Accuracy': 0.9361702127659575, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7179487179487181, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.46808510638297873, 'Recall': 0.5, 'F1-score': 0.4835164835164835, 'Accuracy': 0.9361702127659575, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.731244064577398, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}

Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForest

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating cor

	perturbation: CategoricalShift: {'column': 'PRE7', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE7', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	pert

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6503378378378378, 'classification_report': {'F': {'precision': 0.7872340425531915, 'recall': 1.0, 'f1-score': 0.880952380952381, 'support': 74}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 20}, 'accuracy': 0.7872340425531915, 'macro avg': {'precision': 0.39361702127659576, 'recall': 0.5, 'f1-score': 0.4404761904761905, 'support': 94}, 'weighted avg': {'precision': 0.6197374377546402, 'recall': 0.7872340425531915, 'f1-score': 0.6935157041540022, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5, 'Recall': 0.425531914893617, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6405405405405407, 'classification_report': {'F': {'precision': 0.7872340425531915, 'recall': 1.0, 'f1-score': 0.880952380952381, 'support': 74}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 20}, 'accuracy': 0.7872340425531915, 'macro avg': {'precision': 0.39361702127659576, 'recall': 0.5, 'f1-score': 0.4404761904761905, 'support': 94}, 'weighted avg': {'precision': 0.6197374377546402, 'recall': 0.7872340425531915, 'f1-score': 0.6935157041540022, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5, 'Recall': 0.425531914893617, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6222972972972973, 'classification_report': {'F': {'precision': 0.7872340425531915, 'recall': 1.0, 'f1-score': 0.880952380952381, 'support': 74}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 20}, 'accuracy': 0.7872340425531915, 'macro avg': {'precision': 0.39361702127659576, 'recall': 0.5, 'f1-score': 0.4404761904761905, 'support': 94}, 'weighted avg': {'precision': 0.6197374377546402, 'recall': 0.7872340425531915, 'f1-score': 0.6935157041540022, 'support': 94}}}

Best cleaning method:
Clean


Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.25, 'sampling': 'MAR'}

Gener

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5892661555312158, 'classification_report': {'F': {'precision': 0.8850574712643678, 'recall': 0.927710843373494, 'f1-score': 0.9058823529411766, 'support': 83}, 'T': {'precision': 0.14285714285714285, 'recall': 0.09090909090909091, 'f1-score': 0.1111111111111111, 'support': 11}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.5139573070607554, 'recall': 0.5093099671412924, 'f1-score': 0.5084967320261439, 'support': 94}, 'weighted avg': {'precision': 0.7982042413443734, 'recall': 0.8297872340425532, 'f1-score': 0.8128772076206371, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.3776595744680851, 'Recall': 0.5, 'F1-score': 0.43030303030303035, 'Accuracy': 0.7553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.6060150375939849, 'Recall': 0.5973684210526315, 'F1-score': 0.5572409284079765, 'Accuracy': 0.7553191489361702, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5991237677984667, 'classification_report': {'F': {'precision': 0.8850574712643678, 'recall': 0.927710843373494, 'f1-score': 0.9058823529411766, 'support': 83}, 'T': {'precision': 0.14285714285714285, 'recall': 0.09090909090909091, 'f1-score': 0.1111111111111111, 'support': 11}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.5139573070607554, 'recall': 0.5093099671412924, 'f1-score': 0.5084967320261439, 'support': 94}, 'weighted avg': {'precision': 0.7982042413443734, 'recall': 0.8297872340425532, 'f1-s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.3776595744680851, 'Recall': 0.5, 'F1-score': 0.43030303030303035, 'Accuracy': 0.7553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.6060150375939849, 'Recall': 0.5973684210526315, 'F1-score': 0.5572409284079765, 'Accuracy': 0.7553191489361702, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5750273822562979, 'classification_report': {'F': {'precision': 0.8850574712643678, 'recall': 0.927710843373494, 'f1-score': 0.9058823529411766, 'support': 83}, 'T': {'precision': 0.14285714285714285, 'recall': 0.09090909090909091, 'f1-score': 0.1111111111111111, 'support': 11}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.5139573070607554, 'recall': 0.5093099671412924, 'f1-score': 0.5084967320261439, 'support': 94}, 'weighted avg': {'precision': 0.7982042413443734, 'recall': 0


	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbatio

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.24731182795698925, 'Recall': 0.48936170212765956, 'F1-score': 0.32857142857142857, 'Accuracy': 0.48936170212765956}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4744897959183674, 'Recall': 0.48791208791208796, 'F1-score': 0.44385156439485807, 'Accuracy': 0.48936170212765956, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.630579297245964, 'classification_report': {'F': {'precision': 0.8539325842696629, 'recall': 0.9382716049382716, 'f1-score': 0.8941176470588234, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8085106382978723, 'macro avg': {'precision': 0.42696629213483145, 'recall': 0.4691358024691358, 'f1-score': 0.4470588235294117, 'support': 94}, 'weighted avg': {'precision': 0.7358355247430075, 'recall': 0.8085106382978723, 'f1-score': 0.7704630788485606

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.24731182795698925, 'Recall': 0.48936170212765956, 'F1-score': 0.32857142857142857, 'Accuracy': 0.48936170212765956}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4744897959183674, 'Recall': 0.48791208791208796, 'F1-score': 0.44385156439485807, 'Accuracy': 0.48936170212765956, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6533713200379867, 'classification_report': {'F': {'precision': 0.8555555555555555, 'recall': 0.9506172839506173, 'f1-score': 0.9005847953216374, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.42777777777777776, 'recall': 0.47530864197530864, 'f1-score': 0.4502923976608187, 'support': 94}, 'weighted avg': {'precision': 0.7372340425531915, 'recall': 0.8191489361702128, 'f1-


Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE19', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.75, 'sampling': 'MNAR'}

Gener

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


PPP score no cleaning: {'roc_auc_score': 0.5393430099312453, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.1276595744680851, 'Recall': 0.5, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.48, 'Recall': 0.12903225806451613, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702, 'Mean Squared Error': nan}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5393430099312453, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.1276595744680851, 'Recall': 0.5, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.48, 'Recall': 0.12903225806451613, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5576776165011459, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.1276595744680851, 'Recall': 0.5, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.48, 'Recall': 0.12903225806451613, 'F1-score': 0.20338983050847456, 'Accuracy': 0.2553191489361702, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5389610389610389, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}

Best cleaning meth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE7', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupte


Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6562261268143621, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Im

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6485867074102368, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6699770817417877, 'classification_report': {'F': {'precision': 0.8191489361702128, 'recall': 1.0, 'f1-score': 0.9005847953216375, 'support': 77}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.4095744680851064, 'recall': 0.5, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6710049796287912, 'recall': 0.8191489361702128, 'f1-score': 0.7377130770187882, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling


Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.4950711938663746, 'classification_report': {'F': {'precision': 0.8829787234042553, 'recall': 1.0, 'f1-score': 0.9378531073446328, 'support': 83}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 11}, 'accuracy': 0.8829787234042553, 'macro avg': {'precision': 0.44148936170212766, 'recall': 0.5, 'f1-score': 0.4689265536723164, 'support': 94}, 'weighted avg': {'precision': 0.7796514259846085, 'recall': 0.8829787234042553, 'f1-score': 0.828104339463878, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9375, 'Recall': 0.9875, 'F1-score': 0.960337552742616, 'Accuracy': 0.9787234042553191}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 15.02775998574497}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5377875136911281, 'classification_report': {'F': {'precision': 0.8829787234042553, 'recall': 1.0, 'f1-score': 0.9378531073446328, 'support': 83}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 11}, 'accuracy': 0.8829787234042553, 'macro avg': {'precision': 0.44148936170212766, 'recall': 0.5, 'f1-score': 0.4689265536723164, 'support': 94}, 'weighted avg': {'precision': 0.7796514259846085, 'recall': 0.8829787234042553, 'f1-score': 0.828104339463878, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7121212121212122, 'Recall': 0.88125, 'F1-score': 0.7304964539007092, 'Accuracy': 0.7978723404255319}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 35.29769181985206}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5279299014238774, 'classification_report': {'F': {'precision': 0.8829787234042553, 'recall': 1.0, 'f1-score': 0.9378531073446328, 'support': 83}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 11}, 'accuracy': 0.8829787234042553, 'macro avg': {'precision': 0.44148936170212766, 'recall': 0.5, 'f1-score': 0.4689265536723164, 'support': 94}, 'weighted avg': {'precision': 0.7796514259846085, 'recall': 0.8829787234042553, 'f1-score': 0.828104339463878, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5377875136911281, 'classification_report': {'F': {'precision': 0.8829787234042553, 'recall': 1.0, 'f1-score': 0.9378531073446328, 'support': 83}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 11}, 'accuracy': 0.8829787234042553, 'macro avg': {'precision': 0.44148936170212766, 'recall': 0.5, 'f1-score': 0.4689265536723164, 'support': 94}, 'weighted avg': {'precision': 0.7796514259846085, 'recall': 0.8829787234042553, 'f1-score': 0.828104339463878, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: 


Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Sca

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6447368421052632, 'classification_report': {'F': {'precision': 0.8085106382978723, 'recall': 1.0, 'f1-score': 0.8941176470588235, 'support': 76}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18}, 'accuracy': 0.8085106382978723, 'macro avg': {'precision': 0.40425531914893614, 'recall': 0.5, 'f1-score': 0.44705882352941173, 'support': 94}, 'weighted avg': {'precision': 0.6536894522408329, 'recall': 0.8085106382978723, 'f1-score': 0.7229036295369211, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9107142857142857, 'Recall': 0.9647887323943662, 'F1-score': 0.932732216974381, 'Accuracy': 0.9468085106382979}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.21271183814641287}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.73, 'Recall': 0.8098591549295775, 'F1-score': 0.6976771888028588, 'Accuracy': 0.7127659574468085}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.5385317058991024}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6582602339181287, 'classification_report': {'F': {'precision': 0.8085106382978723, 'recall': 1.0, 'f1-score': 0.8941176470588235, 'support': 76}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18}, 'accuracy': 0.8085106382978723, 'macro avg': {'precision': 0.40425531914893614, 'recall': 0.5, 'f1-score': 0.44705882352941173, 'support': 94}, 'weighted avg': {'precision': 0.6536894522408329, 'recall': 0.8085106382978723, 'f1-score': 0.7229036295369211, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6761695906432748, 'classification_report': {'F': {'precision': 0.8085106382978723, 'recall': 1.0, 'f1-score': 0.8941176470588235, 'support': 76}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18}, 'accuracy': 0.8085106382978723, 'macro avg': {'precision': 0.40425531914893614, 'recall': 0.5, 'f1-score': 0.44705882352941173, 'support': 94}, 'weighted avg': {'precision': 0.6536894522408329, 'recall': 0.8085106382978723, 'f1-score': 0.7229036295369211, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'co


Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'col

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7511904761904762, 'classification_report': {'F': {'precision': 0.8924731182795699, 'recall': 0.9880952380952381, 'f1-score': 0.9378531073446327, 'support': 84}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10}, 'accuracy': 0.8829787234042553, 'macro avg': {'precision': 0.44623655913978494, 'recall': 0.49404761904761907, 'f1-score': 0.46892655367231634, 'support': 94}, 'weighted avg': {'precision': 0.7975291695264242, 'recall': 0.8829787234042553, 'f1-score': 0.8380815001803101, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9519230769230769, 'Recall': 0.9468085106382979, 'F1-score': 0.9466575871070253, 'Accuracy': 0.9468085106382979}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.4657166908163401}
Cleaner: (PyODKNNOutlierDetection, 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scalin


Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 124.4697897230593}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5696138211382114, 'classification_report': {'F': {'precision': 0.8723404255319149, 'recall': 1.0, 'f1-score': 0.9318181818181819, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.43617021276595747, 'recall': 0.5, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.7609778180172024, 'recall': 0.8723404255319149, 'f1-score': 0.8128626692456481, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.972972972972973, 'Recall': 0.9166666666666667, 'F1-score': 0.9406565656565656, 'Accuracy': 0.9574468085106383}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 124.48982368569885}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.598069105691057, 'classification_report': {'F': {'precision': 0.8723404255319149, 'recall': 1.0, 'f1-score': 0.9318181818181819, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.43617021276595747, 'recall': 0.5, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.7609778180172024, 'recall': 0.8723404255319149, 'f1-score': 0.8128626692456481, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.598069105691057, 'classification_report': {'F': {'precision': 0.8723404255319149, 'recall': 1.0, 'f1-score': 0.9318181818181819, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.43617021276595747, 'recall': 0.5, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.7609778180172024, 'recall': 0.8723404255319149, 'f1-score': 0.8128626692456481, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'c


Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: Scaling: {'col

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


PPP score no cleaning: {'roc_auc_score': 0.6866096866096866, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.05319148936170213, 'Recall': 0.5, 'F1-score': 0.09615384615384615, 'Accuracy': 0.10638297872340426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 9343377.730177661}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.686609686609686

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.5978664530017433}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6961063627730295, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.9772727272727273, 'Recall': 0.8, 'F1-score': 0.8633720930232558, 'Accuracy': 0.9574468085106383}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.668244520963563}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7084520417853752, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7084520417853752, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data o


Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6220322886989553, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.5952380952380952, 'Recall': 0.5232142857142857, 'F1-score': 0.5149638802889577, 'Accuracy': 0.8404255319148937}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 27.48476332969889}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_sc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.5332821300563236, 'Recall': 0.5580357142857143, 'F1-score': 0.5179487179487179, 'Accuracy': 0.648936170212766}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 60.05912028822711}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6020892687559354, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6248812915479582, 'classification_report': {'F': {'precision': 0.8617021276595744, 'recall': 1.0, 'f1-score': 0.9257142857142857, 'support': 81}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.4308510638297872, 'recall': 0.5, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.7425305568130375, 'recall': 0.8617021276595744, 'f1-score': 0.7976899696048633, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 9

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


PPP score no cleaning: {'roc_auc_score': 0.6051829268292682, 'classification_report': {'F': {'precision': 0.8723404255319149, 'recall': 1.0, 'f1-score': 0.9318181818181819, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.43617021276595747, 'recall': 0.5, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.7609778180172024, 'recall': 0.8723404255319149, 'f1-score': 0.8128626692456481, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.3776595744680851, 'Recall': 0.5, 'F1-score': 0.43030303030303035, 'Accuracy': 0.7553191489361702}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 166.67462475721675}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.605182926829268

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8533231474407945, 'Recall': 0.7832210655235763, 'F1-score': 0.8094594594594595, 'Accuracy': 0.8723404255319149}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 39.60219094714161}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6316056910569106, 'classification_report': {'F': {'precision': 0.8723404255319149, 'recall': 1.0, 'f1-score': 0.9318181818181819, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8723404255319149, 'macro avg': {'precision': 0.43617021276595747, 'recall': 0.5, 'f1-score': 0.46590909090909094, 'support': 94}, 'weighted avg': {'precision': 0.7609778180172024, 'recall': 0.8723404255319149, 'f1-score': 0.8128626692456481, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6769607843137255, 'Recall': 0.72106552357624, 'F1-score': 0.6852819070577205, 'Accuracy': 0.7340425531914894}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 55.78025390401974}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.608739837398374, 'classification_report': {'F': {'precision': 0.8709677419354839, 'recall': 0.9878048780487805, 'f1-score': 0.9257142857142857, 'support': 82}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12}, 'accuracy': 0.8617021276595744, 'macro avg': {'precision': 0.43548387096774194, 'recall': 0.49390243902439024, 'f1-score': 0.46285714285714286, 'support': 94}, 'weighted avg': {'precision': 0.759780370624571, 'recall': 0.8617021276595744, 'f1-score': 0.8075379939209727, 'support': 94}}}

Best clean

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows


Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 9

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5809294871794872, 'classification_report': {'F': {'precision': 0.8369565217391305, 'recall': 0.9871794871794872, 'f1-score': 0.9058823529411766, 'support': 78}, 'T': {'precision': 0.5, 'recall': 0.0625, 'f1-score': 0.1111111111111111, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.6684782608695652, 'recall': 0.5248397435897436, 'f1-score': 0.5084967320261439, 'support': 94}, 'weighted avg': {'precision': 0.7796022201665125, 'recall': 0.8297872340425532, 'f1-score': 0.7706021415658463, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8454243717401613, 'Recall': 0.8297872340425532, 'F1-score': 0.8278388278388278, 'Accuracy': 0.8297872340425532}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.521581459925425}
Cleaner: (PyODKNNOutl


Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training da

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6573839662447258, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.6174603174603175, 'Recall': 0.6541666666666667, 'F1-score': 0.5993179880647911, 'Accuracy': 0.6276595744680851}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.9445755317387077}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6488095238095238, 'Recall': 0.6636904761904762, 'F1-score': 0.6544117647058824, 'Accuracy': 0.723404255319149}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.8943970438005817}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6481012658227848, 'classification_report': {'F': {'precision': 0.8404255319148937, 'recall': 1.0, 'f1-score': 0.9132947976878613, 'support': 79}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15}, 'accuracy': 0.8404255319148937, 'macro avg': {'precision': 0.42021276595744683, 'recall': 0.5, 'f1-score': 0.45664739884393063, 'support': 94}, 'weighted avg': {'precision': 0.7063150746944319, 'recall': 0.8404255319148937, 'f1-score': 0.767556266141926, 'support': 94}}}

Best cleaning method:
Cleaning score: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 row

PPP score no cleaning: {'roc_auc_score': 0.5712280701754385, 'classification_report': {'F': {'precision': 0.8021978021978022, 'recall': 0.9733333333333334, 'f1-score': 0.8795180722891567, 'support': 75}, 'T': {'precision': 0.3333333333333333, 'recall': 0.05263157894736842, 'f1-score': 0.09090909090909091, 'support': 19}, 'accuracy': 0.7872340425531915, 'macro avg': {'precision': 0.5677655677655677, 'recall': 0.5129824561403509, 'f1-score': 0.4852135815991238, 'support': 94}, 'weighted avg': {'precision': 0.707427324448601, 'recall': 0.7872340425531915, 'f1-score': 0.7201183845633987, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.05319148936170213, 'Recall': 0.5, 'F1-score': 0.09615384615384615, 'Accuracy': 0.10638297872340426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 4.408708207734185}
Cleaner: (N

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.4944444444444444, 'Recall': 0.4857142857142857, 'F1-score': 0.42782608695652175, 'Accuracy': 0.5531914893617021}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.8818406458580547}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5859649122807017, 'classification_report': {'F': {'precision': 0.7956989247311828, 'recall': 0.9866666666666667, 'f1-score': 0.880952380952381, 'support': 75}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.7872340425531915, 'macro avg': {'precision': 0.3978494623655914, 'recall': 0.49333333333333335, 'f1-score': 0.4404761904761905, 'support': 94}, 'weighted avg': {'precision': 0.6348661633493479, 'recall': 0.7872340425531915, 'f1-score': 0.7028875379939211, 'support': 94}}}

Outlier detection method: PyODI

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6070175438596491, 'classification_report': {'F': {'precision': 0.7978723404255319, 'recall': 1.0, 'f1-score': 0.8875739644970414, 'support': 75}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.7978723404255319, 'macro avg': {'precision': 0.39893617021276595, 'recall': 0.5, 'f1-score': 0.4437869822485207, 'support': 94}, 'weighted avg': {'precision': 0.6366002716161159, 'recall': 0.7978723404255319, 'f1-score': 0.7081707163540224, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()
Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalS

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MCAR'}

Genera


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE30', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: C

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generat

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.5691489361702128, 'Recall': 0.625, 'F1-score': 0.5948275862068966, 'Accuracy': 0.8882978723404255}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': 5064.770219917155}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7263621794871795, 'classification_report': {'F': {'precision': 0.8279569892473119, 'recall': 0.9871794871794872, 'f1-score': 0.9005847953216375, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.41397849462365593, 'recall': 0.4935897435897436, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6870281400137268, 'recall': 0.8191489361702128, 'f1-score': 0.7472937663307205, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Out

	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'DGN', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}
	per


Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE11', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: Cate

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE7', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR'}

Genera

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.25, 'sampling': 'MNAR'}

Generati

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6659649122807018, 'classification_report': {'F': {'precision': 0.7956989247311828, 'recall': 0.9866666666666667, 'f1-score': 0.880952380952381, 'support': 75}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.7872340425531915, 'macro avg': {'precision': 0.3978494623655914, 'recall': 0.49333333333333335, 'f1-score': 0.4404761904761905, 'support': 94}, 'weighted avg': {'precision': 0.6348661633493479, 'recall': 0.7872340425531915, 'f1-score': 0.7028875379939211, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8035058027079303, 'Recall': 0.828125, 'F1-score': 0.809436460887468, 'Accuracy': 0.8909574468085106}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7523310023310024, 'Recall': 0.8118131868131868, 'F1-score': 0.6757301074438795, 'Accuracy': 0.8031914893617021, 'Mean Squared Error': 0.8227557982019271}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6922807017543859, 'classification_report': {'F': {'precision': 0.7956989247311828, 'recall': 0.9866666666666667, 'f1-score': 0.880952380952381, 'support': 75}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.7872340425531915, 'macro avg': {'precision': 0.3978494623655914, 'recall': 0.49333333333333335, 'f1-score': 0.4404761904761905, 'support': 94}, 'weighted avg': {'precision': 0.6348661633493479, 'recall': 0.7872340425531915, 'f1-score': 0.7028875379939211, '

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7893301478543093, 'Recall': 0.8072916666666665, 'F1-score': 0.7874667501173526, 'Accuracy': 0.8696808510638299}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7523310023310024, 'Recall': 0.8118131868131868, 'F1-score': 0.6757301074438795, 'Accuracy': 0.8031914893617021, 'Mean Squared Error': 0.9120085938553472}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6656140350877193, 'classification_report': {'F': {'precision': 0.7978723404255319, 'recall': 1.0, 'f1-score': 0.8875739644970414, 'support': 75}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.7978723404255319, 'macro avg': {'precision': 0.39893617021276595, 'recall': 0.5, 'f1-score': 0.4437869822485207, 'support': 94}, 'weighted avg': {'precision': 0.6366002716161159, 'recall': 0.7978723404255319, 'f1-score': 0.7081707163540

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE10', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 


	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE30', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'DGN', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrup

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE8', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE7', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted 

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.5, 'sampling': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.6690705128205129, 'classification_report': {'F': {'precision': 0.8351648351648352, 'recall': 0.9743589743589743, 'f1-score': 0.8994082840236687, 'support': 78}, 'T': {'precision': 0.3333333333333333, 'recall': 0.0625, 'f1-score': 0.10526315789473684, 'support': 16}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.5842490842490843, 'recall': 0.5184294871794872, 'f1-score': 0.5023357209592028, 'support': 94}, 'weighted avg': {'precision': 0.7497467071935157, 'recall': 0.8191489361702128, 'f1-score': 0.7642346455336378, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.25, 'Recall': 0.5, 'F1-score': 0.3333333333333333, 'Accuracy': 0.5}
Imputation method: MeanModeImputa

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7292311661506709, 'Recall': 0.7712765957446808, 'F1-score': 0.719798938962702, 'Accuracy': 0.7712765957446809}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7369132226469184, 'Recall': 0.5890628978864273, 'F1-score': 0.5901795866734891, 'Accuracy': 0.7180851063829787, 'Mean Squared Error': 12.95889722595951}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7740384615384617, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6759698634698634, 'Recall': 0.699468085106383, 'F1-score': 0.6437752493327714, 'Accuracy': 0.699468085106383}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7369132226469184, 'Recall': 0.5890628978864273, 'F1-score': 0.5901795866734891, 'Accuracy': 0.7180851063829787, 'Mean Squared Error': 35.861163570887115}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7836538461538461, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE17', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Gene

	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE7', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE5', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generatin

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE6', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE14', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE17', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'col

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5616071428571429, 'classification_report': {'F': {'precision': 0.851063829787234, 'recall': 1.0, 'f1-score': 0.9195402298850576, 'support': 80}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.425531914893617, 'recall': 0.5, 'f1-score': 0.4597701149425288, 'support': 94}, 'weighted avg': {'precision': 0.724309642372114, 'recall': 0.851063829787234, 'f1-score': 0.7825874296894108, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.991390791476408, 'Recall': 0.9739583333333333, 'F1-score': 0.9818489430553117, 'Accuracy': 0.9867021276595744}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9835164835164836, 'Recall': 0.8333333333333333, 'F1-score': 0.8664772727272727, 'Accuracy': 0.9680851063829787, 'Mean Squared Error': 41.68430815483443}
Cleaner: (PyODK

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8853741496598639, 'Recall': 0.8735119047619047, 'F1-score': 0.8695362109838183, 'Accuracy': 0.8829787234042553}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9835164835164836, 'Recall': 0.8333333333333333, 'F1-score': 0.8664772727272727, 'Accuracy': 0.9680851063829787, 'Mean Squared Error': 46.66412963355129}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6674107142857143, 'classification_report': {'F': {'precision': 0.851063829787234, 'recall': 1.0, 'f1-score': 0.9195402298850576, 'support': 80}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.425531914893617, 'recall': 0.5, 'f1-score': 0.4597701149425288, 'support': 94}, 'weighted avg': {'precision': 0.724309642372114, 'recall': 0.851063829787234, 'f1-score': 0.7825874296894108, 's

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6674107142857143, 'classification_report': {'F': {'precision': 0.851063829787234, 'recall': 1.0, 'f1-score': 0.9195402298850576, 'support': 80}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14}, 'accuracy': 0.851063829787234, 'macro avg': {'precision': 0.425531914893617, 'recall': 0.5, 'f1-score': 0.4597701149425288, 'support': 94}, 'weighted avg': {'precision': 0.724309642372114, 'recall': 0.851063829787234, 'f1-score': 0.7825874296894108, 'support': 94}}} 

Cleaning improved the overall score 



RandomForestClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE32', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE8', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE9', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows..

	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE10', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE11', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrup

	perturbation: Scaling: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE7', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE25', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE25', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrup

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE5', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE6', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE19', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'PRE30', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'PRE4', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: MissingValues: {'column': 'PRE4'

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.28989361702127653, 'Recall': 0.625, 'F1-score': 0.3221153846153847, 'Accuracy': 0.3297872340425532}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9836956521739131, 'Recall': 0.8125, 'F1-score': 0.8415730337078652, 'Accuracy': 0.9680851063829787, 'Mean Squared Error': 47768.10679855961}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5524839743589743, 'classification_report': {'F': {'precision': 0.8369565217391305, 'recall': 0.9871794871794872, 'f1-score': 0.9058823529411766, 'support': 78}, 'T': {'precision': 0.5, 'recall': 0.0625, 'f1-score': 0.1111111111111111, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.6684782608695652, 'recall': 0.5248397435897436, 'f1-score': 0.5084967320261439, 'support': 94}, 'weighted avg': {'precision': 0.7796022201665125, 'recall': 0.8297872340425532, 'f1-score': 0.7706021415658463, 'suppor

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7121923237380058, 'Recall': 0.6860119047619048, 'F1-score': 0.6939034540158135, 'Accuracy': 0.8324468085106382}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9836956521739131, 'Recall': 0.8125, 'F1-score': 0.8415730337078652, 'Accuracy': 0.9680851063829787, 'Mean Squared Error': 49.246908629520775}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5645032051282051, 'classification_report': {'F': {'precision': 0.8279569892473119, 'recall': 0.9871794871794872, 'f1-score': 0.9005847953216375, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8191489361702128, 'macro avg': {'precision': 0.41397849462365593, 'recall': 0.4935897435897436, 'f1-score': 0.45029239766081874, 'support': 94}, 'weighted avg': {'precision': 0.6870281400137268, 'recall': 0.8191489361702128, 'f1-scor


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Sex', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrup

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9516129032258065, 'Recall': 0.8854166666666666, 'F1-score': 0.9084302325581395, 'Accuracy': 0.9016393442622951, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9698924731182796, 'classification_report': {'0': {'precision': 0.875, 'recall': 0.9032258064516129, 'f1-score': 0.8888888888888888, 'support': 31}, '1': {'precision': 0.896551724137931, 'recall': 0.8666666666666667, 'f1-score': 0.8813559322033899, 'support': 30}, 'accuracy': 0.8852459016393442, 'macro avg': {'precision': 0.8857758620689655, 'recall': 0.8849462365591398, 'f1-score': 0.8851224105461394, 'support': 61}, 'weighted avg': {'precision': 0.8855992085924251, 'recall': 0.8852459016393442, 'f1-score': 0.8851841560927418, 'support': 61}}}

Outlier detectio


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Sex', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9210526315789473, 'Recall': 0.7969348659003831, 'F1-score': 0.8352238805970149, 'Accuracy': 0.8524590163934426, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8604395604395603, 'classification_report': {'0': {'precision': 0.6388888888888888, 'recall': 0.8846153846153846, 'f1-score': 0.7419354838709676, 'support': 26}, '1': {'precision': 0.88, 'recall': 0.6285714285714286, 'f1-score': 0.7333333333333334, 'support': 35}, 'accuracy': 0.7377049180327869, 'macro avg': {'precision': 0.7594444444444444, 'recall': 0.7565934065934066, 'f1-score': 0.7376344086021505, 'support': 61}, 'weighted avg': {'precision': 0.7772313296903461, 'recall': 0.7377049180327869, 'f1-score': 0.7369998237264234, 'support': 61}}}

Outlier detectio


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	pertu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9268292682926829, 'Recall': 0.88, 'F1-score': 0.8857333778817239, 'Accuracy': 0.8524590163934426, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8868131868131868, 'classification_report': {'0': {'precision': 0.8421052631578947, 'recall': 0.9142857142857143, 'f1-score': 0.8767123287671234, 'support': 35}, '1': {'precision': 0.8695652173913043, 'recall': 0.7692307692307693, 'f1-score': 0.8163265306122449, 'support': 26}, 'accuracy': 0.8524590163934426, 'macro avg': {'precision': 0.8558352402745995, 'recall': 0.8417582417582418, 'f1-score': 0.8465194296896841, 'support': 61}, 'weighted avg': {'precision': 0.8538095059459054, 'recall': 0.8524590163934426, 'f1-score': 0.850974119717503, 'support': 61}}}

Outlier detection


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.75, 'sampling': 'MC

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9568965517241379, 'Recall': 0.6875, 'F1-score': 0.7502047502047502, 'Accuracy': 0.9180327868852459, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8966666666666666, 'classification_report': {'0': {'precision': 0.8, 'recall': 0.8888888888888888, 'f1-score': 0.8421052631578948, 'support': 36}, '1': {'precision': 0.8095238095238095, 'recall': 0.68, 'f1-score': 0.7391304347826089, 'support': 25}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.8047619047619048, 'recall': 0.7844444444444445, 'f1-score': 0.7906178489702518, 'support': 61}, 'weighted avg': {'precision': 0.8039032006245121, 'recall': 0.8032786885245902, 'f1-score': 0.7999024646434334, 'support': 61}}}

Outlier detection method: PyODIsolationFore


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training d

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9736842105263157, 'Recall': 0.7857142857142857, 'F1-score': 0.8501228501228502, 'Accuracy': 0.9508196721311475}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 371.1700426379546}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8714285714285714, 'classification_report': {'0': {'precision': 0.6388888888888888, 'recall': 0.8846153846153846, 'f1-score': 0.7419354838709676, 'support': 26}, '1': {'precision': 0.88, 'recall': 0.6285714285714286, 'f1-score': 0.7333333333333334, 'support': 35}, 'accuracy': 0.7377049180327869, 'macro avg': {'precision': 0.7594444444444444, 'recall': 0.7565934065934066, 'f1-score': 0.7376344086021505, 'support': 61}, 'weighted avg': {'precision': 0.7772313296903461, 'recall': 0.7377049180327869, 'f1-score': 0.7369998237264234, 'support': 61}}}

Ou


Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Categoric

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7980769230769231, 'Recall': 0.9037037037037037, 'F1-score': 0.8179241393527107, 'Accuracy': 0.8524590163934426, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9344086021505376, 'classification_report': {'0': {'precision': 0.7777777777777778, 'recall': 0.9333333333333333, 'f1-score': 0.8484848484848485, 'support': 30}, '1': {'precision': 0.92, 'recall': 0.7419354838709677, 'f1-score': 0.8214285714285714, 'support': 31}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.8488888888888889, 'recall': 0.8376344086021505, 'f1-score': 0.8349567099567099, 'support': 61}, 'weighted avg': {'precision': 0.8500546448087432, 'recall': 0.8360655737704918, 'f1-score': 0.8347349

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7980769230769231, 'Recall': 0.9037037037037037, 'F1-score': 0.8179241393527107, 'Accuracy': 0.8524590163934426, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9193548387096774, 'classification_report': {'0': {'precision': 0.7567567567567568, 'recall': 0.9333333333333333, 'f1-score': 0.835820895522388, 'support': 30}, '1': {'precision': 0.9166666666666666, 'recall': 0.7096774193548387, 'f1-score': 0.7999999999999999, 'support': 31}, 'accuracy': 0.819672131147541, 'macro avg': {'precision': 0.8367117117117118, 'recall': 0.821505376344086, 'f1-score': 0.817910447761194, 'support': 61}, 'weighted avg': {'precision': 0.8380224486781863, 'recall': 0.81967


Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.3770491803278688, 'Recall': 0.5, 'F1-score': 0.42990654205607476, 'Accuracy': 0.7540983606557377}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5641025641025641, 'Recall': 0.5335483870967741, 'F1-score': 0.5335782638414217, 'Accuracy': 0.7540983606557377, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9389978213507625, 'classification_report': {'0': {'precision': 0.7714285714285715, 'recall': 1.0, 'f1-score': 0.870967741935484, 'support': 27}, '1': {'precision': 1.0, 'recall': 0.7647058823529411, 'f1-score': 0.8666666666666666, 'support': 34}, 'accuracy': 0.8688524590163934, 'macro avg': {'precision': 0.8857142857142857, 'recall': 0.8823529411764706, 'f1-score': 0.8688172043010753, 'support': 61}, 'weighted avg': {'precision': 0.8988290398126464, 'recall': 0.8688524590163934, 'f1-score': 0.8685704212938481, 'supp

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.3770491803278688, 'Recall': 0.5, 'F1-score': 0.42990654205607476, 'Accuracy': 0.7540983606557377}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5641025641025641, 'Recall': 0.5335483870967741, 'F1-score': 0.5335782638414217, 'Accuracy': 0.7540983606557377, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9270152505446623, 'classification_report': {'0': {'precision': 0.75, 'recall': 1.0, 'f1-score': 0.8571428571428571, 'support': 27}, '1': {'precision': 1.0, 'recall': 0.7352941176470589, 'f1-score': 0.8474576271186441, 'support': 34}, 'accuracy': 0.8524590163934426, 'macro avg': {'precision': 0.875, 'recall': 0.8676470588235294, 'f1-score': 0.8523002421307506, 'support': 61}, 'weighted avg': {'precision': 0.889344262295082, 'recall': 0.8524590163934426, 'f1-score': 0.8517445322113285, 'support


	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope'


Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturba

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8459051724137931, 'classification_report': {'0': {'precision': 0.8064516129032258, 'recall': 0.78125, 'f1-score': 0.7936507936507936, 'support': 32}, '1': {'precision': 0.7666666666666667, 'recall': 0.7931034482758621, 'f1-score': 0.7796610169491527, 'support': 29}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.7865591397849463, 'recall': 0.787176724137931, 'f1-score': 0.7866559052999731, 'support': 61}, 'weighted avg': {'precision': 0.7875374581350255, 'recall': 0.7868852459016393, 'f1-score': 0.7869999162024726, 'support': 61}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.13114754098360656, 'Recall': 0.5, 'F1-score': 0.20779220779220778, 'Accuracy': 0.26229508196721313}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.44000000000000006, 'Recall': 0.3538461538461538, 'F1-score': 0.27213114754098366, 'Accuracy': 0.26229508196721313, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8459051724137931, 'classification_report': {'0': {'precision': 0.78125, 'recall': 0.78125, 'f1-score': 0.78125, 'support': 32}, '1': {'precision': 0.7586206896551724, 'recall': 0.7586206896551724, 'f1-score': 0.7586206896551724, 'support': 29}, 'accuracy': 0.7704918032786885, 'macro avg': {'precision': 0.7699353448275862, 'recall': 0.7699353448275862, 'f1-score': 0.7699353448275862, 'support': 61}, 'weighted avg': {'precision': 0.7704918032786885, 'recall': 0.7704918032786885, 'f1-score': 0.7704918032786885, 's

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.13114754098360656, 'Recall': 0.5, 'F1-score': 0.20779220779220778, 'Accuracy': 0.26229508196721313}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.44000000000000006, 'Recall': 0.3538461538461538, 'F1-score': 0.27213114754098366, 'Accuracy': 0.26229508196721313, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.84375, 'classification_report': {'0': {'precision': 0.7941176470588235, 'recall': 0.84375, 'f1-score': 0.8181818181818182, 'support': 32}, '1': {'precision': 0.8148148148148148, 'recall': 0.7586206896551724, 'f1-score': 0.7857142857142857, 'support': 29}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.8044662309368191, 'recall': 0.8011853448275862, 'f1-score': 0.801948051948052, 'support': 61}, 'weighted avg': {'precision': 0.803957284188721, 'recall': 0.8032786885245902, '


	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Sex', 'fract

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.05737704918032787, 'Recall': 0.5, 'F1-score': 0.10294117647058824, 'Accuracy': 0.11475409836065574}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.11097560975609756, 'Recall': 0.13501144164759726, 'F1-score': 0.11260775862068965, 'Accuracy': 0.11475409836065574, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8439024390243902, 'classification_report': {'0': {'precision': 0.813953488372093, 'recall': 0.8536585365853658, 'f1-score': 0.8333333333333333, 'support': 41}, '1': {'precision': 0.6666666666666666, 'recall': 0.6, 'f1-score': 0.631578947368421, 'support': 20}, 'accuracy': 0.7704918032786885, 'macro avg': {'precision': 0.7403100775193798, 'recall': 0.7268292682926829, 'f1-score': 0.7324561403508771, 'support': 61}, 'weighted avg': {'precision': 0.7656627271571991, 'recall': 0.7704918032786885, 'f1-score': 0.767

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.05737704918032787, 'Recall': 0.5, 'F1-score': 0.10294117647058824, 'Accuracy': 0.11475409836065574}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.11097560975609756, 'Recall': 0.13501144164759726, 'F1-score': 0.11260775862068965, 'Accuracy': 0.11475409836065574, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8768292682926829, 'classification_report': {'0': {'precision': 0.8409090909090909, 'recall': 0.9024390243902439, 'f1-score': 0.8705882352941177, 'support': 41}, '1': {'precision': 0.7647058823529411, 'recall': 0.65, 'f1-score': 0.7027027027027027, 'support': 20}, 'accuracy': 0.819672131147541, 'macro avg': {'precision': 0.802807486631016, 'recall': 0.776219512195122, 'f1-score': 0.7866454689984101, 'support': 61}, 'weighted avg': {'precision': 0.8159244323660909, 'recall': 0.81967213114


Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8461538461538461, 'Recall': 0.9615384615384616, 'F1-score': 0.8890909090909092, 'Accuracy': 0.9344262295081968}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2141.410596337928}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9395604395604396, 'classification_report': {'0': {'precision': 0.8888888888888888, 'recall': 0.9142857142857143, 'f1-score': 0.9014084507042254, 'support': 35}, '1': {'precision': 0.88, 'recall': 0.8461538461538461, 'f1-score': 0.8627450980392156, 'support': 26}, 'accuracy': 0.8852459016393442, 'macro avg': {'precision': 0.8844444444444444, 'recall': 0.8802197802197802, 'f1-score': 0.8820767743717205, 'support': 61}, 'weighted avg': {'precision': 0.8851001821493625, 'recall': 0.8852459016393442, 'f1-score': 0.8849289889125819, 'support': 61}}}

Ou

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'colum

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9411764705882353, 'Recall': 0.9782608695652174, 'F1-score': 0.9576388888888889, 'Accuracy': 0.9672131147540983}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 26.481468501322922}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8939024390243901, 'classification_report': {'0': {'precision': 0.8823529411764706, 'recall': 0.7317073170731707, 'f1-score': 0.8, 'support': 41}, '1': {'precision': 0.5925925925925926, 'recall': 0.8, 'f1-score': 0.6808510638297872, 'support': 20}, 'accuracy': 0.7540983606557377, 'macro avg': {'precision': 0.7374727668845316, 'recall': 0.7658536585365854, 'f1-score': 0.7404255319148936, 'support': 61}, 'weighted avg': {'precision': 0.7873495481981498, 'recall': 0.7540983606557377, 'f1-score': 0.7609347750261598, 'support': 61}}}

Outlier detection


Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8888888888888888, 'Recall': 0.925, 'F1-score': 0.8969594594594594, 'Accuracy': 0.9016393442622951}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.908768739481873}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9245689655172413, 'classification_report': {'0': {'precision': 0.8125, 'recall': 0.896551724137931, 'f1-score': 0.8524590163934426, 'support': 29}, '1': {'precision': 0.896551724137931, 'recall': 0.8125, 'f1-score': 0.8524590163934426, 'support': 32}, 'accuracy': 0.8524590163934426, 'macro avg': {'precision': 0.8545258620689655, 'recall': 0.8545258620689655, 'f1-score': 0.8524590163934426, 'support': 61}, 'weighted avg': {'precision': 0.8565927077444884, 'recall': 0.8524590163934426, 'f1-score': 0.8524590163934426, 'support': 61}}}

Outlier detection method: P


Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 r

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9326923076923077, 'Recall': 0.78125, 'F1-score': 0.8239175257731959, 'Accuracy': 0.8852459016393442}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 3638.9259725383804}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8692307692307691, 'classification_report': {'0': {'precision': 0.8205128205128205, 'recall': 0.9142857142857143, 'f1-score': 0.8648648648648648, 'support': 35}, '1': {'precision': 0.8636363636363636, 'recall': 0.7307692307692307, 'f1-score': 0.7916666666666666, 'support': 26}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.842074592074592, 'recall': 0.8225274725274725, 'f1-score': 0.8282657657657657, 'support': 61}, 'weighted avg': {'precision': 0.8388933470900684, 'recall': 0.8360655737704918, 'f1-score': 0.8336656328459607, 'support': 61}}}



Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9655172413793103, 'Recall': 0.7142857142857143, 'F1-score': 0.7821428571428571, 'Accuracy': 0.9344262295081968}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2388.971266407719}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.908008658008658, 'classification_report': {'0': {'precision': 0.8709677419354839, 'recall': 0.8181818181818182, 'f1-score': 0.84375, 'support': 33}, '1': {'precision': 0.8, 'recall': 0.8571428571428571, 'f1-score': 0.8275862068965518, 'support': 28}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.8354838709677419, 'recall': 0.8376623376623377, 'f1-score': 0.8356681034482759, 'support': 61}, 'weighted avg': {'precision': 0.8383923849814913, 'recall': 0.8360655737704918, 'f1-score': 0.8363305539853025, 'support': 61}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.4426229508196721, 'Recall': 0.5, 'F1-score': 0.4695652173913043, 'Accuracy': 0.8852459016393442}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2680.7343200824994}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9123376623376623, 'classification_report': {'0': {'precision': 0.8709677419354839, 'recall': 0.8181818181818182, 'f1-score': 0.84375, 'support': 33}, '1': {'precision': 0.8, 'recall': 0.8571428571428571, 'f1-score': 0.8275862068965518, 'support': 28}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.8354838709677419, 'recall': 0.8376623376623377, 'f1-score': 0.8356681034482759, 'support': 61}, 'weighted avg': {'precision': 0.8383923849814913, 'recall': 0.8360655737704918, 'f1-score': 0.8363305539853025, 'support': 61}}}

Best


	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbat

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 127.10487538629512}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9311827956989247, 'classification_report': {'0': {'precision': 0.8484848484848485, 'recall': 0.9032258064516129, 'f1-score': 0.875, 'support': 31}, '1': {'precision': 0.8928571428571429, 'recall': 0.8333333333333334, 'f1-score': 0.8620689655172413, 'support': 30}, 'accuracy': 0.8688524590163934, 'macro avg': {'precision': 0.8706709956709957, 'recall': 0.8682795698924731, 'f1-score': 0.8685344827586207, 'support': 61}, 'weighted avg': {'precision': 0.8703072883400752, 'recall': 0.8688524590163934, 'f1-score': 0.8686404748445449, 'support': 61}}}

Outlier detection method:


Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MNAR'}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8814102564102564, 'Recall': 0.844927536231884, 'F1-score': 0.8609422492401215, 'Accuracy': 0.9016393442622951}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.4640856543722111}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9365591397849463, 'classification_report': {'0': {'precision': 0.8620689655172413, 'recall': 0.8064516129032258, 'f1-score': 0.8333333333333334, 'support': 31}, '1': {'precision': 0.8125, 'recall': 0.8666666666666667, 'f1-score': 0.8387096774193549, 'support': 30}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.8372844827586207, 'recall': 0.8365591397849462, 'f1-score': 0.8360215053763441, 'support': 61}, 'weighted avg': {'precision': 0.8376907857546636, 'recall': 0.8360655737704918, 'f1-score': 0.8359774369821964, 'support': 61}}}




Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MAR'}

Generating

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7299465240641712, 'Recall': 0.6849462365591398, 'F1-score': 0.6712056737588652, 'Accuracy': 0.6885245901639344}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 70.04887352855789}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8387096774193548, 'classification_report': {'0': {'precision': 0.7857142857142857, 'recall': 0.7333333333333333, 'f1-score': 0.7586206896551724, 'support': 30}, '1': {'precision': 0.7575757575757576, 'recall': 0.8064516129032258, 'f1-score': 0.7812499999999999, 'support': 31}, 'accuracy': 0.7704918032786885, 'macro avg': {'precision': 0.7716450216450217, 'recall': 0.7698924731182795, 'f1-score': 0.7699353448275861, 'support': 61}, 'weighted avg': {'precision': 0.7714143779717549, 'recall': 0.7704918032786885, 'f1-score': 0.7701208309779536, 'suppo


Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MAR'}



PPP score no cleaning: {'roc_auc_score': 0.8397932816537468, 'classification_report': {'0': {'precision': 0.8444444444444444, 'recall': 0.8837209302325582, 'f1-score': 0.8636363636363636, 'support': 43}, '1': {'precision': 0.6875, 'recall': 0.6111111111111112, 'f1-score': 0.6470588235294118, 'support': 18}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.7659722222222223, 'recall': 0.7474160206718347, 'f1-score': 0.7553475935828877, 'support': 61}, 'weighted avg': {'precision': 0.7981329690346084, 'recall': 0.8032786885245902, 'f1-score': 0.7997282370474271, 'support': 61}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.13114754098360656, 'Recall': 0.5, 'F1-score': 0.20779220779220778, 'Accuracy': 0.26229508196721313}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 11.306136616016467}
Cleaner: (NoOutlierDete

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7527056277056277, 'Recall': 0.8243055555555556, 'F1-score': 0.7511655011655012, 'Accuracy': 0.7704918032786885}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.357725308841117}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8449612403100775, 'classification_report': {'0': {'precision': 0.8604651162790697, 'recall': 0.8604651162790697, 'f1-score': 0.8604651162790697, 'support': 43}, '1': {'precision': 0.6666666666666666, 'recall': 0.6666666666666666, 'f1-score': 0.6666666666666666, 'support': 18}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.7635658914728682, 'recall': 0.7635658914728682, 'f1-score': 0.7635658914728682, 'support': 61}, 'weighted avg': {'precision': 0.8032786885245902, 'recall': 0.8032786885245902, 'f1-score': 0.8032786885245902, 'suppo


Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.5729166666666666, 'Recall': 0.6203703703703703, 'F1-score': 0.321302578018996, 'Accuracy': 0.32786885245901637}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 69.53750798584915}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9084668192219679, 'classification_report': {'0': {'precision': 0.8378378378378378, 'recall': 0.8157894736842105, 'f1-score': 0.8266666666666665, 'support': 38}, '1': {'precision': 0.7083333333333334, 'recall': 0.7391304347826086, 'f1-score': 0.723404255319149, 'support': 23}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.7730855855855856, 'recall': 0.7774599542334095, 'f1-score': 0.7750354609929078, 'support': 61}, 'weighted avg': {'precision': 0.7890082705656477, 'recall': 0.7868852459016393, 'f1-score': 0.7877316591094059, 'suppor

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Chest_pain_type', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column':

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Sex', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column'

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Chest_pain_type', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Gau

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.569672131147541, 'Recall': 0.625, 'F1-score': 0.5951327433628317, 'Accuracy': 0.889344262295082}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9883720930232558, 'Recall': 0.975, 'F1-score': 0.980889724310777, 'Accuracy': 0.9836065573770492, 'Mean Squared Error': 226407389.81516418}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8268817204301075, 'classification_report': {'0': {'precision': 0.88, 'recall': 0.7333333333333333, 'f1-score': 0.8, 'support': 30}, '1': {'precision': 0.7777777777777778, 'recall': 0.9032258064516129, 'f1-score': 0.835820895522388, 'support': 31}, 'accuracy': 0.819672131147541, 'macro avg': {'precision': 0.8288888888888889, 'recall': 0.8182795698924731, 'f1-score': 0.817910447761194, 'support': 61}, 'weighted avg': {'precision': 0.8280510018214935, 'recall': 0.819672131147541, 'f1-score': 0.8182040616589185, 'support': 61}}}


	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbatio

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating co

	perturbation: MissingValues: {'column': 'Chest_pain_type', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Chest_pain_type', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': n

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MNAR'}

Gener

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.38114754098360654, 'Recall': 0.5, 'F1-score': 0.4325216496613221, 'Accuracy': 0.7622950819672131}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7601010101010102, 'Recall': 0.8803475935828877, 'F1-score': 0.7529279440119974, 'Accuracy': 0.7868852459016393, 'Mean Squared Error': 20432460.931747813}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7894736842105263, 'classification_report': {'0': {'precision': 0.8048780487804879, 'recall': 0.868421052631579, 'f1-score': 0.8354430379746836, 'support': 38}, '1': {'precision': 0.75, 'recall': 0.6521739130434783, 'f1-score': 0.6976744186046512, 'support': 23}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.7774390243902439, 'recall': 0.7602974828375286, 'f1-score': 0.7665587282896673, 'support': 61}, 'weighted avg': {'precision': 0.784186325469812, 'recall': 0.7868852459016393, 'f1-score': 0.783

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8829803047194352, 'Recall': 0.8876428372352286, 'F1-score': 0.8802390416130034, 'Accuracy': 0.9180327868852459}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7023809523809523, 'Recall': 0.6818181818181819, 'F1-score': 0.6842105263157895, 'Accuracy': 0.8688524590163934, 'Mean Squared Error': 1105.5585737083861}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8855835240274599, 'classification_report': {'0': {'precision': 0.8181818181818182, 'recall': 0.9473684210526315, 'f1-score': 0.8780487804878049, 'support': 38}, '1': {'precision': 0.8823529411764706, 'recall': 0.6521739130434783, 'f1-score': 0.75, 'support': 23}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.8502673796791445, 'recall': 0.799771167048055, 'f1-score': 0.8140243902439024, 'support': 61}, 'weighted avg': {'precision': 0.8423774875076708, 'recall': 0.8360655737

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7748947335344394, 'Recall': 0.8142732720178372, 'F1-score': 0.7659843695461165, 'Accuracy': 0.8073770491803278}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7023809523809523, 'Recall': 0.6818181818181819, 'F1-score': 0.6842105263157895, 'Accuracy': 0.8688524590163934, 'Mean Squared Error': 1418.3315284656494}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8935926773455377, 'classification_report': {'0': {'precision': 0.813953488372093, 'recall': 0.9210526315789473, 'f1-score': 0.8641975308641974, 'support': 38}, '1': {'precision': 0.8333333333333334, 'recall': 0.6521739130434783, 'f1-score': 0.7317073170731708, 'support': 23}, 'accuracy': 0.819672131147541, 'macro avg': {'precision': 0.8236434108527132, 'recall': 0.7866132723112128, 'f1-score': 0.7979524239686842, 'support': 61}, 'weighted avg': {'precision': 0.821

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	pert

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 



	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.5, 'sampling':

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Ga

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7022894711163153, 'Recall': 0.7560483870967742, 'F1-score': 0.7128412929758967, 'Accuracy': 0.7581967213114753}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4892241379310345, 'Recall': 0.4735449735449735, 'F1-score': 0.4089147286821705, 'Accuracy': 0.5081967213114754, 'Mean Squared Error': 1271.309366413792}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9301075268817205, 'classification_report': {'0': {'precision': 0.8709677419354839, 'recall': 0.8709677419354839, 'f1-score': 0.8709677419354839, 'support': 31}, '1': {'precision': 0.8666666666666667, 'recall': 0.8666666666666667, 'f1-score': 0.8666666666666667, 'support': 30}, 'accuracy': 0.8688524590163934, 'macro avg': {'precision': 0.8688172043010753, 'recall': 0.8688172043010753, 'f1-score': 0.8688172043010753, 'support': 61}, 'weighted avg': {'precision': 0.8688524590163934, 'recall'

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6826436824537585, 'Recall': 0.7288978494623656, 'F1-score': 0.684950527226337, 'Accuracy': 0.7295081967213114}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4892241379310345, 'Recall': 0.4735449735449735, 'F1-score': 0.4089147286821705, 'Accuracy': 0.5081967213114754, 'Mean Squared Error': 1415.9757469533226}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9258064516129033, 'classification_report': {'0': {'precision': 0.9032258064516129, 'recall': 0.9032258064516129, 'f1-score': 0.9032258064516129, 'support': 31}, '1': {'precision': 0.9, 'recall': 0.9, 'f1-score': 0.9, 'support': 30}, 'accuracy': 0.9016393442622951, 'macro avg': {'precision': 0.9016129032258065, 'recall': 0.9016129032258065, 'f1-score': 0.9016129032258065, 'support': 61}, 'weighted avg': {'precision': 0.9016393442622951, 'recall': 0.9016393442622951,

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'T

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}
	per

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Ga

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.6996127064834875, 'Recall': 0.7564236111111111, 'F1-score': 0.699082929420008, 'Accuracy': 0.7459016393442622}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.3615853658536585, 'Recall': 0.2774509803921569, 'F1-score': 0.24192212096106047, 'Accuracy': 0.26229508196721313, 'Mean Squared Error': 196.27833692944316}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9302832244008715, 'classification_report': {'0': {'precision': 0.8529411764705882, 'recall': 0.8529411764705882, 'f1-score': 0.8529411764705882, 'support': 34}, '1': {'precision': 0.8148148148148148, 'recall': 0.8148148148148148, 'f1-score': 0.8148148148148148, 'support': 27}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.8338779956427015, 'recall': 0.8338779956427015, 'f1-score': 0.8338779956427015, 'support': 61}, 'weighted avg': {'precision': 0.8360655737704918, 'recal

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6099873592570163, 'Recall': 0.6557291666666667, 'F1-score': 0.5942343221754987, 'Accuracy': 0.6270491803278688}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.3615853658536585, 'Recall': 0.2774509803921569, 'F1-score': 0.24192212096106047, 'Accuracy': 0.26229508196721313, 'Mean Squared Error': 201.38402538065122}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8943355119825707, 'classification_report': {'0': {'precision': 0.8108108108108109, 'recall': 0.8823529411764706, 'f1-score': 0.8450704225352113, 'support': 34}, '1': {'precision': 0.8333333333333334, 'recall': 0.7407407407407407, 'f1-score': 0.7843137254901961, 'support': 27}, 'accuracy': 0.819672131147541, 'macro avg': {'precision': 0.8220720720720721, 'recall': 0.8115468409586056, 'f1-score': 0.8146920740127037, 'support': 61}, 'weighted avg': {'precision': 0.

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Max_he

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted trai

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr


Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.514344262295082, 'Recall': 0.6209016393442623, 'F1-score': 0.5236691784151678, 'Accuracy': 0.7704918032786885}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.49531386840692715, 'Recall': 0.3805652680652681, 'F1-score': 0.3664742574532522, 'Accuracy': 0.3770491803278689, 'Mean Squared Error': 2539.9252139056925}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8297413793103449, 'classification_report': {'0': {'precision': 0.6666666666666666, 'recall': 0.896551724137931, 'f1-score': 0.7647058823529411, 'support': 29}, '1': {'precision': 0.8636363636363636, 'recall': 0.59375, 'f1-score': 0.7037037037037037, 'support': 32}, 'accuracy': 0.7377049180327869, 'macro avg': {'precision': 0.7651515151515151, 'recall': 0.7451508620689655, 'f1-score': 0.7342047930283224, 'support': 61}, 'weighted avg': {'precision': 0.7699950322901142, 'recall': 0.7377049

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.514344262295082, 'Recall': 0.6209016393442623, 'F1-score': 0.5236691784151678, 'Accuracy': 0.7704918032786885}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.49531386840692715, 'Recall': 0.3805652680652681, 'F1-score': 0.3664742574532522, 'Accuracy': 0.3770491803278689, 'Mean Squared Error': 2539.9252139056925}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8362068965517241, 'classification_report': {'0': {'precision': 0.627906976744186, 'recall': 0.9310344827586207, 'f1-score': 0.7499999999999999, 'support': 29}, '1': {'precision': 0.8888888888888888, 'recall': 0.5, 'f1-score': 0.64, 'support': 32}, 'accuracy': 0.7049180327868853, 'macro avg': {'precision': 0.7583979328165374, 'recall': 0.7155172413793103, 'f1-score': 0.695, 'support': 61}, 'weighted avg': {'precision': 0.7648155208200956, 'recall': 0.70491803278688


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9861111111111112, 'Recall': 0.9895833333333334, 'F1-score': 0.9876167961274344, 'Accuracy': 0.9836065573770492, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9480519480519481, 'classification_report': {'0': {'precision': 0.7941176470588235, 'recall': 0.9642857142857143, 'f1-score': 0.8709677419354839, 'support': 28}, '1': {'precision': 0.9629629629629629, 'recall': 0.7878787878787878, 'f1-score': 0.8666666666666665, 'support': 33}, 'accuracy': 0.8688524590163934, 'macro avg': {'precision': 0.8785403050108932, 'recall': 0.876082251082251, 'f1-score': 0.8688172043010751, 'support': 61}, 'weighted avg': {'precision': 0.8854601950069644, 'recall': 0.8688524590163934, 'f1-score': 0.8686409307244843, 'support': 61}}}

Out


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 r

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8947368421052632, 'Recall': 0.9565217391304348, 'F1-score': 0.9184491978609626, 'Accuracy': 0.9344262295081968}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 353.85396360546014}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9366666666666668, 'classification_report': {'0': {'precision': 0.8888888888888888, 'recall': 0.8888888888888888, 'f1-score': 0.8888888888888888, 'support': 36}, '1': {'precision': 0.84, 'recall': 0.84, 'f1-score': 0.8399999999999999, 'support': 25}, 'accuracy': 0.8688524590163934, 'macro avg': {'precision': 0.8644444444444443, 'recall': 0.8644444444444443, 'f1-score': 0.8644444444444443, 'support': 61}, 'weighted avg': {'precision': 0.8688524590163934, 'recall': 0.8688524590163934, 'f1-score': 0.8688524590163934, 'support': 61}}}

Outlier detecti


	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9411764705882353, 'Recall': 0.935483870967742, 'F1-score': 0.9342672413793103, 'Accuracy': 0.9344262295081968}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 233.01415013531494}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8924485125858124, 'classification_report': {'0': {'precision': 0.8571428571428571, 'recall': 0.7894736842105263, 'f1-score': 0.8219178082191781, 'support': 38}, '1': {'precision': 0.6923076923076923, 'recall': 0.782608695652174, 'f1-score': 0.7346938775510203, 'support': 23}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.7747252747252746, 'recall': 0.7860411899313502, 'f1-score': 0.7783058428850993, 'support': 61}, 'weighted avg': {'precision': 0.794991893352549, 'recall': 0.7868852459016393, 'f1-score': 0.7890300966557744, 'support


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Exercise_induced_angina', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8303571428571428, 'Recall': 0.6041666666666666, 'F1-score': 0.5702632554690397, 'Accuracy': 0.6885245901639344, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9725400457665903, 'classification_report': {'0': {'precision': 0.9444444444444444, 'recall': 0.8947368421052632, 'f1-score': 0.918918918918919, 'support': 38}, '1': {'precision': 0.84, 'recall': 0.9130434782608695, 'f1-score': 0.8749999999999999, 'support': 23}, 'accuracy': 0.9016393442622951, 'macro avg': {'precision': 0.8922222222222222, 'recall': 0.9038901601830663, 'f1-score': 0.8969594594594594, 'support': 61}, 'weighted avg': {'precision': 0.905063752276867, 'recall': 0.9016393442622951, 'f1-score': 0.9023593265396544, 'support': 61}}}

Outlier detection 


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Chest_pain_type', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.9, 'sampling': 'MNAR',

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8644444444444445, 'classification_report': {'0': {'precision': 0.8125, 'recall': 0.7222222222222222, 'f1-score': 0.7647058823529411, 'support': 36}, '1': {'precision': 0.6551724137931034, 'recall': 0.76, 'f1-score': 0.7037037037037037, 'support': 25}, 'accuracy': 0.7377049180327869, 'macro avg': {'precision': 0.7338362068965517, 'recall': 0.741111111111111, 'f1-score': 0.7342047930283224, 'support': 61}, 'weighted avg': {'precision': 0.7480214810627474, 'recall': 0.7377049180327869, 'f1-score': 0.7397049894639094, 'support': 61}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8220338983050848, 'Recall': 0.5434782608695652, 'F1-score': 0.4717525773195876, 'Accuracy': 0.6557377049180327, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, 


Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShif

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.4672131147540984, 'Recall': 0.5, 'F1-score': 0.48305084745762716, 'Accuracy': 0.9344262295081968}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8095238095238096, 'Recall': 0.9540229885057471, 'F1-score': 0.8419753086419753, 'Accuracy': 0.9344262295081968, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8885135135135135, 'classification_report': {'0': {'precision': 0.775, 'recall': 0.8378378378378378, 'f1-score': 0.8051948051948051, 'support': 37}, '1': {'precision': 0.7142857142857143, 'recall': 0.625, 'f1-score': 0.6666666666666666, 'support': 24}, 'accuracy': 0.7540983606557377, 'macro avg': {'precision': 0.7446428571428572, 'recall': 0.7314189189189189, 'f1-score': 0.7359307359307359, 'support': 61}, 'weighted avg': {'precision': 0.751112412177986, 'recall': 0.7540983606557377, 'f1-score': 0.7506919310197998, '

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.4672131147540984, 'Recall': 0.5, 'F1-score': 0.48305084745762716, 'Accuracy': 0.9344262295081968}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8095238095238096, 'Recall': 0.9540229885057471, 'F1-score': 0.8419753086419753, 'Accuracy': 0.9344262295081968, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8885135135135135, 'classification_report': {'0': {'precision': 0.7804878048780488, 'recall': 0.8648648648648649, 'f1-score': 0.8205128205128206, 'support': 37}, '1': {'precision': 0.75, 'recall': 0.625, 'f1-score': 0.6818181818181818, 'support': 24}, 'accuracy': 0.7704918032786885, 'macro avg': {'precision': 0.7652439024390244, 'recall': 0.7449324324324325, 'f1-score': 0.7511655011655012, 'support': 61}, 'weighted avg': {'precision': 0.7684926029588164, 'recall': 0.7704918032786885, 'f1-score


Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fracti

PPP score no cleaning: {'roc_auc_score': 0.9313186813186813, 'classification_report': {'0': {'precision': 0.825, 'recall': 0.9428571428571428, 'f1-score': 0.88, 'support': 35}, '1': {'precision': 0.9047619047619048, 'recall': 0.7307692307692307, 'f1-score': 0.8085106382978723, 'support': 26}, 'accuracy': 0.8524590163934426, 'macro avg': {'precision': 0.8648809523809524, 'recall': 0.8368131868131867, 'f1-score': 0.8442553191489361, 'support': 61}, 'weighted avg': {'precision': 0.8589968774395004, 'recall': 0.8524590163934426, 'f1-score': 0.8495291245204045, 'support': 61}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9313186813186813, 'classificat


Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'colu

PPP score no cleaning: {'roc_auc_score': 0.943956043956044, 'classification_report': {'0': {'precision': 0.85, 'recall': 0.9714285714285714, 'f1-score': 0.9066666666666667, 'support': 35}, '1': {'precision': 0.9523809523809523, 'recall': 0.7692307692307693, 'f1-score': 0.8510638297872339, 'support': 26}, 'accuracy': 0.8852459016393442, 'macro avg': {'precision': 0.9011904761904761, 'recall': 0.8703296703296703, 'f1-score': 0.8788652482269503, 'support': 61}, 'weighted avg': {'precision': 0.8936377829820452, 'recall': 0.8852459016393442, 'f1-score': 0.8829670968492036, 'support': 61}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.943956043956044, '


Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.75, 'sampling': 'M

Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8966450216450217, 'classification_report': {'0': {'precision': 0.8333333333333334, 'recall': 0.7575757575757576, 'f1-score': 0.7936507936507938, 'support': 33}, '1': {'precision': 0.7419354838709677, 'recall': 0.8214285714285714, 'f1-score': 0.7796610169491526, 'support': 28}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.7876344086021505, 'recall': 0.7895021645021645, 'f1-score': 0.7866559052999732, 'support': 61}, 'weighted avg': {'precision': 0.7913802221047065, 'recall': 0.7868852459016393, 'f1-score': 0.7872292568041388, 'support': 61}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_sc


Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: CategoricalS

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.07377049180327869, 'Recall': 0.5, 'F1-score': 0.1285714285714286, 'Accuracy': 0.14754098360655737}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.35540540540540533, 'Recall': 0.32555555555555554, 'F1-score': 0.3108407871198569, 'Accuracy': 0.14754098360655737, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8461538461538463, 'classification_report': {'0': {'precision': 0.8235294117647058, 'recall': 0.717948717948718, 'f1-score': 0.767123287671233, 'support': 39}, '1': {'precision': 0.5925925925925926, 'recall': 0.7272727272727273, 'f1-score': 0.6530612244897959, 'support': 22}, 'accuracy': 0.7213114754098361, 'macro avg': {'precision': 0.7080610021786492, 'recall': 0.7226107226107226, 'f1-score': 0.7100922560805144, 'support': 61}, 'weighted avg': {'precision': 0.7402407228829602, 'recall': 0.7213114754098361, 'f1-score': 0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.07377049180327869, 'Recall': 0.5, 'F1-score': 0.1285714285714286, 'Accuracy': 0.14754098360655737}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.35540540540540533, 'Recall': 0.32555555555555554, 'F1-score': 0.3108407871198569, 'Accuracy': 0.14754098360655737, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8391608391608392, 'classification_report': {'0': {'precision': 0.8181818181818182, 'recall': 0.6923076923076923, 'f1-score': 0.7500000000000001, 'support': 39}, '1': {'precision': 0.5714285714285714, 'recall': 0.7272727272727273, 'f1-score': 0.64, 'support': 22}, 'accuracy': 0.7049180327868853, 'macro avg': {'precision': 0.6948051948051948, 'recall': 0.7097902097902098, 'f1-score': 0.6950000000000001, 'support': 61}, 'weighted avg': {'precision': 0.7291888439429424, 'recall': 0.7049180327868853, 'f1-score': 0.71

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.07377049180327869, 'Recall': 0.5, 'F1-score': 0.1285714285714286, 'Accuracy': 0.14754098360655737}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.35540540540540533, 'Recall': 0.32555555555555554, 'F1-score': 0.3108407871198569, 'Accuracy': 0.14754098360655737, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.837995337995338, 'classification_report': {'0': {'precision': 0.8275862068965517, 'recall': 0.6153846153846154, 'f1-score': 0.7058823529411765, 'support': 39}, '1': {'precision': 0.53125, 'recall': 0.7727272727272727, 'f1-score': 0.6296296296296297, 'support': 22}, 'accuracy': 0.6721311475409836, 'macro avg': {'precision': 0.6794181034482758, 'recall': 0.6940559440559441, 'f1-score': 0.6677559912854031, 'support': 61}, 'weighted avg': {'precision': 0.7207108535895986, 'recall': 0.67213114


Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7368421052631579, 'Recall': 0.9038461538461539, 'F1-score': 0.7682370820668694, 'Accuracy': 0.8360655737704918}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 260.47150770181645}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9015151515151515, 'classification_report': {'0': {'precision': 0.8387096774193549, 'recall': 0.7878787878787878, 'f1-score': 0.8125, 'support': 33}, '1': {'precision': 0.7666666666666667, 'recall': 0.8214285714285714, 'f1-score': 0.793103448275862, 'support': 28}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.8026881720430108, 'recall': 0.8046536796536796, 'f1-score': 0.802801724137931, 'support': 61}, 'weighted avg': {'precision': 0.8056407544509079, 'recall': 0.8032786885245902, 'f1-score': 0.8035966647823629, 'support': 61}}}

O


	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'T

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.96875, 'Recall': 0.9891304347826086, 'F1-score': 0.97837646224743, 'Accuracy': 0.9836065573770492}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 20.820843331269366}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.843010752688172, 'classification_report': {'0': {'precision': 0.7142857142857143, 'recall': 0.8333333333333334, 'f1-score': 0.7692307692307692, 'support': 30}, '1': {'precision': 0.8076923076923077, 'recall': 0.6774193548387096, 'f1-score': 0.7368421052631579, 'support': 31}, 'accuracy': 0.7540983606557377, 'macro avg': {'precision': 0.760989010989011, 'recall': 0.7553763440860215, 'f1-score': 0.7530364372469636, 'support': 61}, 'weighted avg': {'precision': 0.7617546388038192, 'recall': 0.7540983606557377, 'f1-score': 0.75277095639477, 'support': 61}}}

Outl

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9090909090909092, 'classification_report': {'0': {'precision': 0.8055555555555556, 'recall': 0.8787878787878788, 'f1-score': 0.8405797101449276, 'support': 33}, '1': {'precision': 0.84, 'recall': 0.75, 'f1-score': 0.7924528301886793, 'support': 28}, 'accuracy': 0.819672131147541, 'macro avg': {'precision': 0.8227777777777778, 'recall': 0.8143939393939394, 'f1-score': 0.8165162701668034, 'support': 61}, 'weighted avg': {'precision': 0.8213661202185794, 'recall': 0.819672131147541, 'f1-score': 0.8184886832797644, 'support': 61}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.96875, 'Recall': 0.967741935483871, 'F1-score': 0.9672043010752688, 'Accuracy': 0.9672131147540983}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 58.30610759589341}
Cleaner: (PyODKNNOutlierDetection, M


Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9891304347826086, 'Recall': 0.96875, 'F1-score': 0.97837646224743, 'Accuracy': 0.9836065573770492}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 391.4684453382092}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8795045045045046, 'classification_report': {'0': {'precision': 0.8823529411764706, 'recall': 0.8108108108108109, 'f1-score': 0.8450704225352113, 'support': 37}, '1': {'precision': 0.7407407407407407, 'recall': 0.8333333333333334, 'f1-score': 0.7843137254901961, 'support': 24}, 'accuracy': 0.819672131147541, 'macro avg': {'precision': 0.8115468409586056, 'recall': 0.8220720720720721, 'f1-score': 0.8146920740127037, 'support': 61}, 'weighted avg': {'precision': 0.8266366655951999, 'recall': 0.819672131147541, 'f1-score': 0.8211661482879923, 'support': 61}}}

Out


Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training da

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9821428571428572, 'Recall': 0.8571428571428572, 'F1-score': 0.9075757575757575, 'Accuracy': 0.9672131147540983}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1901.4329107234237}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9194444444444444, 'classification_report': {'0': {'precision': 0.9090909090909091, 'recall': 0.8333333333333334, 'f1-score': 0.8695652173913043, 'support': 36}, '1': {'precision': 0.7857142857142857, 'recall': 0.88, 'f1-score': 0.830188679245283, 'support': 25}, 'accuracy': 0.8524590163934426, 'macro avg': {'precision': 0.8474025974025974, 'recall': 0.8566666666666667, 'f1-score': 0.8498769483182936, 'support': 61}, 'weighted avg': {'precision': 0.8585267191824569, 'recall': 0.8524590163934426, 'f1-score': 0.8534272919216235, 'support': 61}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.4426229508196721, 'Recall': 0.5, 'F1-score': 0.4695652173913043, 'Accuracy': 0.8852459016393442}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2256.0171413983417}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9233333333333332, 'classification_report': {'0': {'precision': 0.9090909090909091, 'recall': 0.8333333333333334, 'f1-score': 0.8695652173913043, 'support': 36}, '1': {'precision': 0.7857142857142857, 'recall': 0.88, 'f1-score': 0.830188679245283, 'support': 25}, 'accuracy': 0.8524590163934426, 'macro avg': {'precision': 0.8474025974025974, 'recall': 0.8566666666666667, 'f1-score': 0.8498769483182936, 'support': 61}, 'weighted avg': {'precision': 0.8585267191824569, 'recall': 0.8524590163934426, 'f1-score': 0.8534272919216235, 'support': 


Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7464285714285714, 'Recall': 0.6474358974358975, 'F1-score': 0.6772486772486772, 'Accuracy': 0.8688524590163934}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 37.0897468661295}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9354838709677419, 'classification_report': {'0': {'precision': 0.8235294117647058, 'recall': 0.9333333333333333, 'f1-score': 0.8749999999999999, 'support': 30}, '1': {'precision': 0.9259259259259259, 'recall': 0.8064516129032258, 'f1-score': 0.8620689655172414, 'support': 31}, 'accuracy': 0.8688524590163934, 'macro avg': {'precision': 0.8747276688453158, 'recall': 0.8698924731182796, 'f1-score': 0.8685344827586207, 'support': 61}, 'weighted avg': {'precision': 0.8755669845351619, 'recall': 0.8688524590163934, 'f1-score': 0.8684284906726963, 'suppor


Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MNAR'

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9428879310344827, 'classification_report': {'0': {'precision': 0.8529411764705882, 'recall': 0.90625, 'f1-score': 0.8787878787878787, 'support': 32}, '1': {'precision': 0.8888888888888888, 'recall': 0.8275862068965517, 'f1-score': 0.8571428571428572, 'support': 29}, 'accuracy': 0.8688524590163934, 'macro avg': {'precision': 0.8709150326797386, 'recall': 0.8669181034482758, 'f1-score': 0.8679653679653679, 'support': 61}, 'weighted avg': {'precision': 0.8700310725383049, 'recall': 0.8688524590163934, 'f1-score': 0.8684976225959832, 'support': 61}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9302884615384616, 'Recall': 0.8891304347826088, 'F1-score': 0.9072948328267477, 'Accuracy': 0.9344262295081968}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.5739487975304502}
Clea


Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.5, 'sampling': 'MNA

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8032036613272311, 'Recall': 0.7849462365591398, 'F1-score': 0.783155592015313, 'Accuracy': 0.7868852459016393}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 348.6870775873954}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9120879120879121, 'classification_report': {'0': {'precision': 0.8787878787878788, 'recall': 0.8285714285714286, 'f1-score': 0.8529411764705883, 'support': 35}, '1': {'precision': 0.7857142857142857, 'recall': 0.8461538461538461, 'f1-score': 0.8148148148148148, 'support': 26}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.8322510822510822, 'recall': 0.8373626373626374, 'f1-score': 0.8338779956427016, 'support': 61}, 'weighted avg': {'precision': 0.8391171669860193, 'recall': 0.8360655737704918, 'f1-score': 0.8366905960927176, 'suppor


Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.75, 'sampling': 'MAR'}

G

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.6866883116883117, 'Recall': 0.7395833333333333, 'F1-score': 0.68006993006993, 'Accuracy': 0.7049180327868853}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.007868922705724}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9666666666666667, 'classification_report': {'0': {'precision': 0.7837837837837838, 'recall': 0.9354838709677419, 'f1-score': 0.8529411764705881, 'support': 31}, '1': {'precision': 0.9166666666666666, 'recall': 0.7333333333333333, 'f1-score': 0.8148148148148148, 'support': 30}, 'accuracy': 0.8360655737704918, 'macro avg': {'precision': 0.8502252252252251, 'recall': 0.8344086021505376, 'f1-score': 0.8338779956427014, 'support': 61}, 'weighted avg': {'precision': 0.8491360212671688, 'recall': 0.8360655737704918, 'f1-score': 0.8341905068038143, 'support


Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MCAR

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.05737704918032787, 'Recall': 0.5, 'F1-score': 0.10294117647058824, 'Accuracy': 0.11475409836065574}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 3.2771415871757914}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.981981981981982, 'classification_report': {'0': {'precision': 0.8974358974358975, 'recall': 0.9459459459459459, 'f1-score': 0.9210526315789475, 'support': 37}, '1': {'precision': 0.9090909090909091, 'recall': 0.8333333333333334, 'f1-score': 0.8695652173913043, 'support': 24}, 'accuracy': 0.9016393442622951, 'macro avg': {'precision': 0.9032634032634033, 'recall': 0.8896396396396397, 'f1-score': 0.8953089244851259, 'support': 61}, 'weighted avg': {'precision': 0.9020214757919675, 'recall': 0.9016393442622951, 'f1-score': 0.900795288292006, 'support': 61}}}

Outlier det

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.15, 'sampli

	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	

	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8322222222222222, 'Recall': 0.8646341463414634, 'F1-score': 0.8415584415584414, 'Accuracy': 0.8524590163934426, 'Mean Squared Error': 3006889925.35601}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8769230769230769, 'classification_report': {'0': {'precision': 0.7894736842105263, 'recall': 0.8571428571428571, 'f1-score': 0.8219178082191781, 'support': 35}, '1': {'precision': 0.782608695652174, 'recall': 0.6923076923076923, 'f1-score': 0.7346938775510203, 'support': 26}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.7860411899313502, 'recall': 0.7747252747252746, 'f1-score': 0.7783058428850993, 'support': 61}, 'weighted avg': {'precision': 0.7865476235135236, 'recall': 0.7868852459016393, 'f1-s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7654443234361268, 'Recall': 0.8026175213675214, 'F1-score': 0.7748582065061873, 'Accuracy': 0.9180327868852459}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8322222222222222, 'Recall': 0.8646341463414634, 'F1-score': 0.8415584415584414, 'Accuracy': 0.8524590163934426, 'Mean Squared Error': 470.13808998980727}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8747252747252747, 'classification_report': {'0': {'precision': 0.7692307692307693, 'recall': 0.8571428571428571, 'f1-score': 0.8108108108108107, 'support': 35}, '1': {'precision': 0.7727272727272727, 'recall': 0.6538461538461539, 'f1-score': 0.7083333333333333, 'support': 26}, 'accuracy': 0.7704918032786885, 'macro avg': {'precision': 0.770979020979021, 'recall': 0.7554945054945055, 'f1-score': 0.759572072072072, 'support': 61}, 'weighted avg': {'precision': 0.770721082196492, 'recall': 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.592569281811085, 'Recall': 0.7064636752136751, 'F1-score': 0.5961022879249346, 'Accuracy': 0.7540983606557377}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8322222222222222, 'Recall': 0.8646341463414634, 'F1-score': 0.8415584415584414, 'Accuracy': 0.8524590163934426, 'Mean Squared Error': 899.3712022955075}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8670329670329671, 'classification_report': {'0': {'precision': 0.7560975609756098, 'recall': 0.8857142857142857, 'f1-score': 0.8157894736842105, 'support': 35}, '1': {'precision': 0.8, 'recall': 0.6153846153846154, 'f1-score': 0.6956521739130435, 'support': 26}, 'accuracy': 0.7704918032786885, 'macro avg': {'precision': 0.7780487804878049, 'recall': 0.7505494505494505, 'f1-score': 0.755720823798627, 'support': 61}, 'weighted avg': {'precision': 0.7748100759696122, '

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': '

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted trai

	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.25, '

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: Missi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.47950819672131145, 'Recall': 0.625, 'F1-score': 0.5330085504076357, 'Accuracy': 0.7090163934426229}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9127906976744187, 'Recall': 0.8863636363636364, 'F1-score': 0.8736536868268434, 'Accuracy': 0.8770491803278688, 'Mean Squared Error': 121310.9949621037}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9752252252252253, 'classification_report': {'0': {'precision': 0.8809523809523809, 'recall': 1.0, 'f1-score': 0.9367088607594937, 'support': 37}, '1': {'precision': 1.0, 'recall': 0.7916666666666666, 'f1-score': 0.8837209302325582, 'support': 24}, 'accuracy': 0.9180327868852459, 'macro avg': {'precision': 0.9404761904761905, 'recall': 0.8958333333333333, 'f1-score': 0.9102148954960259, 'support': 61}, 'weighted avg': {'precision': 0.9277907884465262, 'recall': 0.9180327868852459, 'f1-score': 0.9158611503882403


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Thal', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Sl

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Cholesterol', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling

	perturbation: CategoricalShift: {'column': 'Chest_pain_type', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Slope', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': '

PPP score no cleaning: {'roc_auc_score': 0.9105603448275862, 'classification_report': {'0': {'precision': 0.8846153846153846, 'recall': 0.71875, 'f1-score': 0.7931034482758621, 'support': 32}, '1': {'precision': 0.7428571428571429, 'recall': 0.896551724137931, 'f1-score': 0.8125, 'support': 29}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.8137362637362637, 'recall': 0.8076508620689655, 'f1-score': 0.802801724137931, 'support': 61}, 'weighted avg': {'precision': 0.8172221221401549, 'recall': 0.8032786885245902, 'f1-score': 0.8023247597512719, 'support': 61}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.2540983606557377, 'Recall': 0.5, 'F1-score': 0.3369565217391305, 'Accuracy': 0.5081967213114754}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.6407624633431086, 'Recall': 0.7, 'F1-score': 0.5050595238095238, 'Accuracy': 0.5081967213114754, 'Mean Squared Error': 28563724.65959

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.6955391002533873, 'Recall': 0.7442204301075268, 'F1-score': 0.7008036022736179, 'Accuracy': 0.7459016393442622}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.6407624633431086, 'Recall': 0.7, 'F1-score': 0.5050595238095238, 'Accuracy': 0.5081967213114754, 'Mean Squared Error': 775.1359872335287}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8836206896551725, 'classification_report': {'0': {'precision': 0.88, 'recall': 0.6875, 'f1-score': 0.7719298245614036, 'support': 32}, '1': {'precision': 0.7222222222222222, 'recall': 0.896551724137931, 'f1-score': 0.7999999999999999, 'support': 29}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8011111111111111, 'recall': 0.7920258620689655, 'f1-score': 0.7859649122807018, 'support': 61}, 'weighted avg': {'precision': 0.8049908925318761, 'recall': 0.7868852459016393, 'f1-score': 0.785274

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6443009506420139, 'Recall': 0.6725806451612903, 'F1-score': 0.6203305877889429, 'Accuracy': 0.6721311475409837}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.6407624633431086, 'Recall': 0.7, 'F1-score': 0.5050595238095238, 'Accuracy': 0.5081967213114754, 'Mean Squared Error': 862.2963586786249}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8911637931034483, 'classification_report': {'0': {'precision': 0.88, 'recall': 0.6875, 'f1-score': 0.7719298245614036, 'support': 32}, '1': {'precision': 0.7222222222222222, 'recall': 0.896551724137931, 'f1-score': 0.7999999999999999, 'support': 29}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8011111111111111, 'recall': 0.7920258620689655, 'f1-score': 0.7859649122807018, 'support': 61}, 'weighted avg': {'precision': 0.8049908925318761, 'recall': 0.78688524590163


Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Sex', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.75, 'sampling':

	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Number_of_vessels_colored', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Resting_ecg', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbat

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Sex', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Max_heart_rate', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 61

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.13114754098360656, 'Recall': 0.5, 'F1-score': 0.20779220779220778, 'Accuracy': 0.26229508196721313}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5511386639676114, 'Recall': 0.40902777777777777, 'F1-score': 0.3944515306122449, 'Accuracy': 0.5, 'Mean Squared Error': 697109.1049444135}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8084415584415585, 'classification_report': {'0': {'precision': 0.6923076923076923, 'recall': 0.8181818181818182, 'f1-score': 0.7500000000000001, 'support': 33}, '1': {'precision': 0.7272727272727273, 'recall': 0.5714285714285714, 'f1-score': 0.64, 'support': 28}, 'accuracy': 0.7049180327868853, 'macro avg': {'precision': 0.7097902097902098, 'recall': 0.6948051948051948, 'f1-score': 0.6950000000000001, 'support': 61}, 'weighted avg': {'precision': 0.7083572165539379, 'recall': 0.7049180327868853, 'f1-score': 0.69950819672131

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7219757454630495, 'Recall': 0.81875, 'F1-score': 0.7390746519937196, 'Accuracy': 0.7622950819672131}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5511386639676114, 'Recall': 0.40902777777777777, 'F1-score': 0.3944515306122449, 'Accuracy': 0.5, 'Mean Squared Error': 1627.0784169763526}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7878787878787878, 'classification_report': {'0': {'precision': 0.7222222222222222, 'recall': 0.7878787878787878, 'f1-score': 0.7536231884057971, 'support': 33}, '1': {'precision': 0.72, 'recall': 0.6428571428571429, 'f1-score': 0.6792452830188679, 'support': 28}, 'accuracy': 0.7213114754098361, 'macro avg': {'precision': 0.721111111111111, 'recall': 0.7153679653679654, 'f1-score': 0.7164342357123326, 'support': 61}, 'weighted avg': {'precision': 0.7212021857923497, 'recall': 0.7213114754098361, 'f1-score': 0.719

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7408119702799152, 'Recall': 0.8119791666666667, 'F1-score': 0.7470009268214056, 'Accuracy': 0.7745901639344263}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5511386639676114, 'Recall': 0.40902777777777777, 'F1-score': 0.3944515306122449, 'Accuracy': 0.5, 'Mean Squared Error': 1465.3444648626212}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7911255411255411, 'classification_report': {'0': {'precision': 0.6842105263157895, 'recall': 0.7878787878787878, 'f1-score': 0.732394366197183, 'support': 33}, '1': {'precision': 0.6956521739130435, 'recall': 0.5714285714285714, 'f1-score': 0.6274509803921569, 'support': 28}, 'accuracy': 0.6885245901639344, 'macro avg': {'precision': 0.6899313501144164, 'recall': 0.6796536796536796, 'f1-score': 0.6799226732946699, 'support': 61}, 'weighted avg': {'precision': 0.6894624301309226

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Slope', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Number_of_vessels_colored', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation:

	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Fasting_blood_sugar_&lt;_120', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Resting_ecg', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Exercise_induced_angina', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Trestbps', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'

	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Sex', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Sex', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'Oldpeak', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: MissingValues: {'column': 'Age', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'Thal', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'Max_heart_rate', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.9,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.7947510822510823, 'Recall': 0.9157442216652742, 'F1-score': 0.8027730193407473, 'Accuracy': 0.8770491803278688}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.41379310344827586, 'Recall': 0.47058823529411764, 'F1-score': 0.4403669724770642, 'Accuracy': 0.7868852459016393, 'Mean Squared Error': 1371.7013794552827}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8884615384615385, 'classification_report': {'0': {'precision': 0.8484848484848485, 'recall': 0.8, 'f1-score': 0.823529411764706, 'support': 35}, '1': {'precision': 0.75, 'recall': 0.8076923076923077, 'f1-score': 0.7777777777777779, 'support': 26}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.7992424242424243, 'recall': 0.8038461538461539, 'f1-score': 0.8006535947712419, 'support': 61}, 'weighted avg': {'precision': 0.806507699950323, 'recall': 0.8032786885245902, 'f1-sc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6554552364517852, 'Recall': 0.7937204121414647, 'F1-score': 0.6946963191528409, 'Accuracy': 0.8811475409836066}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.41379310344827586, 'Recall': 0.47058823529411764, 'F1-score': 0.4403669724770642, 'Accuracy': 0.7868852459016393, 'Mean Squared Error': 1205.7108083163946}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8807692307692309, 'classification_report': {'0': {'precision': 0.8484848484848485, 'recall': 0.8, 'f1-score': 0.823529411764706, 'support': 35}, '1': {'precision': 0.75, 'recall': 0.8076923076923077, 'f1-score': 0.7777777777777779, 'support': 26}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.7992424242424243, 'recall': 0.8038461538461539, 'f1-score': 0.8006535947712419, 'support': 61}, 'weighted avg': {'precision': 0.806507699950323, 'recall': 0.


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingVal

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9705882352941176, 'Recall': 0.9375, 'F1-score': 0.9515151515151515, 'Accuracy': 0.9583333333333334, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.9230769230769231, 'f1-score': 0.9600000000000001, 'support': 13}, 'yes': {'precision': 0.9166666666666666, 'recall': 1.0, 'f1-score': 0.9565217391304348, 'support': 11}, 'accuracy': 0.9583333333333334, 'macro avg': {'precision': 0.9583333333333333, 'recall': 0.9615384615384616, 'f1-score': 0.9582608695652175, 'support': 24}, 'weighted avg': {'precision': 0.9618055555555555, 'recall': 0.9583333333333334, 'f1-score': 0.9584057971014494, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetect


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValue

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9375, 'Recall': 0.9, 'F1-score': 0.9111111111111112, 'Accuracy': 0.9166666666666666, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 0.8461538461538461, 'recall': 1.0, 'f1-score': 0.9166666666666666, 'support': 11}, 'yes': {'precision': 1.0, 'recall': 0.8461538461538461, 'f1-score': 0.9166666666666666, 'support': 13}, 'accuracy': 0.9166666666666666, 'macro avg': {'precision': 0.9230769230769231, 'recall': 0.9230769230769231, 'f1-score': 0.9166666666666666, 'support': 24}, 'weighted avg': {'precision': 0.9294871794871794, 'recall': 0.9166666666666666, 'f1-score': 0.9166666666666666, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: 


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'col

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9615384615384616, 'Recall': 0.9583333333333333, 'F1-score': 0.9582608695652175, 'Accuracy': 0.9583333333333334}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.7329711009837976}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 15}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 9}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8333333333333333, 'Recall': 0.75, 'F1-score': 0.7333333333333334, 'Accuracy': 0.75}
Imputat

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.75, 'sampling': '

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2.2408992513020856}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8913043478260869, 'Recall': 0.5833333333333334, 'F1-score': 0.5818815331010454, 'Accuracy': 0.7916666666666666}
Imputation method: MeanModeImputation,


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'c

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.0625, 'Recall': 0.5, 'F1-score': 0.1111111111111111, 'Accuracy': 0.125}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8695652173913043, 'Recall': 0.5714285714285714, 'F1-score': 0.55, 'Accuracy': 0.75, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 8}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.86956521739


	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Catego


Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 8}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'yes': {'precision': 1.0, 'recall':


Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.375, 'Recall': 0.5, 'F1-score': 0.42857142857142855, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8125, 'Recall': 0.7857142857142857, 'F1-score': 0.7482517482517483, 'Accuracy': 0.75, 'Mean Squared Error': nan}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'reca

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.375, 'Recall': 0.5, 'F1-score': 0.42857142857142855, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8125, 'Recall': 0.7857142857142857, 'F1-score': 0.7482517482517483, 'Accuracy': 0.75, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.


Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted trainin


Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall'


	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Categor

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.125, 'Recall': 0.5, 'F1-score': 0.2, 'Accuracy': 0.25}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.3445378151260504, 'Recall': 0.30526315789473685, 'F1-score': 0.24475524475524474, 'Accuracy': 0.25, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9851851851851852, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.3333333333333333, 'f1-score': 0.5, 'support': 15}, 'yes': {'precision': 0.47368421052631576, 'recall': 1.0, 'f1-score': 0.6428571428571429, 'support': 9}, 'accuracy': 0.5833333333333334, 'macro avg': {'precision': 0.7368421052631579, 'recall': 0.6666666666666666, 'f1-score': 0.5714285714285714, 'support': 24}, 'weighted avg': {'precision': 0.8026315789473685, 'recall': 0.5833333333333334, 'f1-score': 0.5535714285714286, 'support': 24}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.125, 'Recall': 0.5, 'F1-score': 0.2, 'Accuracy': 0.25}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.3445378151260504, 'Recall': 0.30526315789473685, 'F1-score': 0.24475524475524474, 'Accuracy': 0.25, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9185185185185185, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.4, 'f1-score': 0.5714285714285715, 'support': 15}, 'yes': {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666666666666666, 'support': 9}, 'accuracy': 0.625, 'macro avg': {'precision': 0.75, 'recall': 0.7, 'f1-score': 0.6190476190476191, 'support': 24}, 'weighted avg': {'precision': 0.8125, 'recall': 0.625, 'f1-score': 0.6071428571428572, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.985


Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted traini

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.0625, 'Recall': 0.5, 'F1-score': 0.1111111111111111, 'Accuracy': 0.125}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.075, 'Recall': 0.21428571428571427, 'F1-score': 0.1111111111111111, 'Accuracy': 0.125, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9300699300699302, 'classification_report': {'no': {'precision': 0.8571428571428571, 'recall': 0.9230769230769231, 'f1-score': 0.888888888888889, 'support': 13}, 'yes': {'precision': 0.9, 'recall': 0.8181818181818182, 'f1-score': 0.8571428571428572, 'support': 11}, 'accuracy': 0.875, 'macro avg': {'precision': 0.8785714285714286, 'recall': 0.8706293706293706, 'f1-score': 0.873015873015873, 'support': 24}, 'weighted avg': {'precision': 0.8767857142857144, 'recall': 0.875, 'f1-score': 0.8743386243386245, 'support': 24}}}

Best cleaning method:
C


Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6875, 'Recall': 0.8809523809523809, 'F1-score': 0.7051597051597052, 'Accuracy': 0.7916666666666666}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.114922236689815}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 8}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-sc


Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': '

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5804195804195804, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.5384615384615384, 'f1-score': 0.7000000000000001, 'support': 13}, 'yes': {'precision': 0.6470588235294118, 'recall': 1.0, 'f1-score': 0.7857142857142858, 'support': 11}, 'accuracy': 0.75, 'macro avg': {'precision': 0.8235294117647058, 'recall': 0.7692307692307692, 'f1-score': 0.7428571428571429, 'support': 24}, 'weighted avg': {'precision': 0.8382352941176471, 'recall': 0.75, 'f1-score': 0.7392857142857144, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9285714285714286, 'Recall': 0.9722222222222222, 'F1-score': 0.9472527472527472, 'Accuracy': 0.9583333333333334}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.5867602991174711}
Cleaner: (PyODKNNOutlierDetection, MeanModeImput


	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.9615384615384616, 'Recall': 0.9583333333333333, 'F1-score': 0.9582608695652175, 'Accuracy': 0.9583333333333334}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.9931075032552032}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.8666666666666667, 'f1-score': 0.9285714285714286, 'support': 15}, 'yes': {'precision': 0.8181818181818182, 'recall': 1.0, 'f1-score': 0.9, 'support': 9}, 'accuracy': 0.9166666666666666, 'macro avg': {'precision': 0.9090909090909092, 'recall': 0.9333333333333333, 'f1-score': 0.9142857142857144, 'support': 24}, 'weighted avg': {'precision': 0.9318181818181818, 'recall': 0.9166666666666666, 'f1-score': 0.9178571428571428, 'support': 24}}}

Best cleaning method:


Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'colum

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8913043478260869, 'Recall': 0.5833333333333334, 'F1-score': 0.5818815331010454, 'Accuracy': 0.7916666666666666}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 3.3275769947193337}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 8}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 're


Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.4230769230769231, 'class

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.9565217391304348, 'Recall': 0.6666666666666666, 'F1-score': 0.7272727272727273, 'Accuracy': 0.9166666666666666}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 3.0799276620370333}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.958041958041958, 'classification_report': {'no': {'precision': 0.8333333333333334, 'recall': 0.45454545454545453, 'f1-score': 0.5882352941176471, 'support': 11}, 'yes': {'precision': 0.6666666666666666, 'recall': 0.9230769230769231, 'f1-score': 0.7741935483870968, 'support': 13}, 'accuracy': 0.7083333333333334, 'macro avg': {'precision': 0.75, 'recall': 0.6888111888111889, 'f1-score': 0.6812144212523719, 'support': 24}, 'weighted avg': {'precision': 0.7430555555555557, 'recall': 0.7083333333333334, 'f1-score': 0.6889626818


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 r

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.59375, 'Recall': 0.6904761904761905, 'F1-score': 0.5872235872235873, 'Accuracy': 0.7083333333333334}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.4616917893261643}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 row

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.375, 'Recall': 0.5, 'F1-score': 0.42857142857142855, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2.9936968630618215}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9111111111111111, 'classification_report': {'no': {'precision': 0.9375, 'recall': 1.0, 'f1-score': 0.967741935483871, 'support': 15}, 'yes': {'precision': 1.0, 'recall': 0.8888888888888888, 'f1-score': 0.9411764705882353, 'support': 9}, 'accuracy': 0.9583333333333334, 'macro avg': {'precision': 0.96875, 'recall': 0.9444444444444444, 'f1-score': 0.9544592030360531, 'support': 24}, 'weighted avg': {'precision': 0.9609375, 'recall': 0.9583333333333334, 'f1-score': 0.9577798861480075, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8473684


	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'f

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7222222222222222, 'Recall': 0.7083333333333334, 'F1-score': 0.7037037037037037, 'Accuracy': 0.7083333333333334}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2.0990731470982333}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 0.9230769230769231, 'recall': 1.0, 'f1-score': 0.9600000000000001, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, 'accuracy': 0.9583333333333334, 'macro avg': {'precision': 0.9615384615384616, 'recall': 0.9583333333333333, 'f1-score': 0.9582608695652175, 'support': 24}, 'weighted avg': {'precision': 0.9615384615384616, 'recall': 0.9583333333333334, 'f1-score': 0.9582608695652176, 'support': 24}}}

Best 


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 r

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7444444444444445, 'Recall': 0.8055555555555556, 'F1-score': 0.7575757575757576, 'Accuracy': 0.7916666666666666}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2.464670763207703}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 8}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'rec


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows..

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.6666666666666666, 'Recall': 0.8571428571428572, 'F1-score': 0.6666666666666666, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 4.060925070810225}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 0.8333333333333334, 'recall': 1.0, 'f1-score': 0.9090909090909091, 'support': 15}, 'yes': {'precision': 1.0, 'recall': 0.6666666666666666, 'f1-score': 0.8, 'support': 9}, 'accuracy': 0.875, 'macro avg': {'precision': 0.9166666666666667, 'recall': 0.8333333333333333, 'f1-score': 0.8545454545454545, 'support': 24}, 'weighted avg': {'precision': 0.8958333333333334, 'recall': 0.875, 'f1-score': 0.8681818181818182, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted tra

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted tra


	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MNAR'}

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.40625, 'Recall': 0.5, 'F1-score': 0.44761904761904764, 'Accuracy': 0.8125}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9615384615384616, 'Recall': 0.9583333333333333, 'F1-score': 0.9582608695652175, 'Accuracy': 0.9583333333333334, 'Mean Squared Error': 252764997.183156}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.82421875, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.8125, 'f1-score': 0.896551724137931, 'support': 16}, 'yes': {'precision': 0.7272727272727273, 'recall': 1.0, 'f1-score': 0.8421052631578948, 'support': 8}, 'accuracy': 0.875, 'macro avg': {'precision': 0.8636363636363636, 'recall': 0.90625, 'f1-score': 0.8693284936479129, 'support': 24}, 'weighted avg': {'precision': 0.9090909090909092, 'recall': 0.875, 'f1-score': 0.8784029038112523, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Det

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianN

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted trai

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.33333333333333337, 'Recall': 0.5, 'F1-score': 0.39849624060150374, 'Accuracy': 0.6666666666666667}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9071637426900585, 'Recall': 0.827922077922078, 'F1-score': 0.8330013736263736, 'Accuracy': 0.8541666666666667, 'Mean Squared Error': 79910.10488071553}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8210227272727273, 'Recall': 0.8571428571428572, 'F1-score': 0.83597

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8020833333333335, 'Recall': 0.8392857142857144, 'F1-score': 0.8151848151848151, 'Accuracy': 0.8958333333333333}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9071637426900585, 'Recall': 0.827922077922078, 'F1-score': 0.8330013736263736, 'Accuracy': 0.8541666666666667, 'Mean Squared Error': 2.3723052300347356}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0,

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data 


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data o

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9583333333333334, 'Recall': 0.9166666666666666, 'F1-score': 0.9272727272727272, 'Accuracy': 0.9375, 'Mean Squared Error': 1.763567708333329}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 0.8571428571428571, 'recall': 1.0, 'f1-score': 0.923076923076923, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 0.8333333333333334, 'f1-score': 0.9090909090909091, 'support': 12}, 'accuracy': 0.9166666666666666, 'macro avg': {'precision': 0.9285714285714286, 'recall': 0.9166666666666667, 'f1-score': 0.916083916083916, 'support': 24}, 'weighted avg': {'precision': 0.9285714285714285, 'recall': 0.9166666666666666, 'f1-score': 0.9160839160839161, 'support': 24}}}

Outlier detection method: PyODIsolationFores


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'co

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianN

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted traini

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.07291666666666666, 'Recall': 0.5, 'F1-score': 0.12, 'Accuracy': 0.14583333333333331}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5710526315789474, 'Recall': 0.43863636363636366, 'F1-score': 0.459375, 'Accuracy': 0.5, 'Mean Squared Error': 2545306359.0323396}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5666666666666667, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.13333333333333333, 'f1-score': 0.23529411764705882, 'support': 15}, 'yes': {'precision': 0.4090909090909091, 'recall': 1.0, 'f1-score': 0.5806451612903226, 'support': 9}, 'accuracy': 0.4583333333333333, 'macro avg': {'precision': 0.7045454545454546, 'recall': 0.5666666666666667, 'f1-score': 0.4079696394686907, 'support': 24}, 'weighted avg': {'precision': 0.7784090909090909, 'recall': 0.4583333333333333, 'f1-score': 0.36480075901328274, 'support': 24}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.5208333333333334, 'Recall': 0.625, 'F1-score': 0.5446808510638298, 'Accuracy': 0.7916666666666667}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5710526315789474, 'Recall': 0.43863636363636366, 'F1-score': 0.459375, 'Accuracy': 0.5, 'Mean Squared Error': 3.110174696180548}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9111111111111111, 'classification_report': {'no': {'precision': 0.75, 'recall': 0.8, 'f1-score': 0.7741935483870969, 'support': 15}, 'yes': {'precision': 0.625, 'recall': 0.5555555555555556, 'f1-score': 0.5882352941176471, 'support': 9}, 'accuracy': 0.7083333333333334, 'macro avg': {'precision': 0.6875, 'recall': 0.6777777777777778, 'f1-score': 0.681214421252372, 'support': 24}, 'weighted avg': {'precision': 0.703125, 'recall': 0.7083333333333334, 'f1-score': 0.7044592030360532, 'support': 24}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.5208333333333334, 'Recall': 0.625, 'F1-score': 0.5446808510638298, 'Accuracy': 0.7916666666666667}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.5710526315789474, 'Recall': 0.43863636363636366, 'F1-score': 0.459375, 'Accuracy': 0.5, 'Mean Squared Error': 3.110174696180548}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9111111111111111, 'classification_report': {'no': {'precision': 0.75, 'recall': 0.8, 'f1-score': 0.7741935483870969, 'support': 15}, 'yes': {'precision': 0.625, 'recall': 0.5555555555555556, 'f1-score': 0.5882352941176471, 'support': 9}, 'accuracy': 0.7083333333333334, 'macro avg': {'precision': 0.6875, 'recall': 0.6777777777777778, 'f1-score': 0.681214421252372, 'support': 24}, 'weighted avg': {'precision': 0.703125, 'recall': 0.7083333333333334, 'f1-score': 0.7044592030360532, 'support': 24}}}

Best

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'co

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.27604166666666663, 'Recall': 0.625, 'F1-score': 0.2977777777777778, 'Accuracy': 0.30208333333333337}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.9318181818181819, 'Recall': 0.8125, 'F1-score': 0.8105263157894738, 'Accuracy': 0.875, 'Mean Squared Error': 5244393076.677745}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5925925925925926, 'classification_report': {'no': {'precision': 0.5, 'recall': 0.5555555555555556, 'f1-score': 0.5263157894736842, 'support': 9}, 'yes': {'precision': 0.7142857142857143, 'recall': 0.6666666666666666, 'f1-score': 0.689655172413793, 'support': 15}, 'accuracy': 0.625, 'macro avg': {'precision': 0.6071428571428572, 'recall': 0.6111111111111112, 'f1-score': 0.6079854809437386, 'support': 24}, 'weighted avg': {'precision': 0.6339285714285715, 'recall': 0.625, 'f1-score': 0.6284029038112522, 'support': 24}}}

Outlier detect


	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.15, 'sampling':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accu


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.7236200629340255}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8333333333333333, 'Recall': 0.9166666666666667, 'F1-score': 0.8545454545454545, 'Accuracy': 0.875}
Imputation method: MeanModeImputation, Imputation S


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'c

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 18}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accur


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7380952380952381, 'Recall': 0.6071428571428571, 'F1-score': 0.4990512333965844, 'Accuracy': 0.5416666666666666, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 14}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Sco


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'c

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7857142857142857, 'Recall': 0.625, 'F1-score': 0.5636363636363637, 'Accuracy': 0.625, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 17}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 7}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.7857142


Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted t

Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support':


Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.375, 'Recall': 0.5, 'F1-score': 0.42857142857142855, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.6875, 'Recall': 0.7222222222222222, 'F1-score': 0.6974789915966387, 'Accuracy': 0.75, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 14}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.375, 'Recall': 0.5, 'F1-score': 0.42857142857142855, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.6875, 'Recall': 0.7222222222222222, 'F1-score': 0.6974789915966387, 'Accuracy': 0.75, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 14}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.


Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training


Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.125, 'Recall': 0.5, 'F1-score': 0.2, 'Accuracy': 0.25}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.2222222222222222, 'Recall': 0.23776223776223776, 'F1-score': 0.22857142857142854, 'Accuracy': 0.25, 'Mean Squared Error': nan}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 14}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.125, 'Recall': 0.5, 'F1-score': 0.2, 'Accuracy': 0.25}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.2222222222222222, 'Recall': 0.23776223776223776, 'F1-score': 0.22857142857142854, 'Accuracy': 0.25, 'Mean Squared Error': nan}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9999999999999999, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 14}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, '


Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.9, 'sampling': 'MCAR'}

Applying cleaners... 

PPP sc


Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.875, 'Recall': 0.9761904761904762, 'F1-score': 0.9163763066202091, 'Accuracy': 0.9583333333333334}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.3572614655671309}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7142857142857143, 'Recall': 0.9047619047619048, 'F1-score': 0.7473684210526316, 'Accuracy': 0.833333333


	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro a

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.75, 'Recall': 0.8333333333333333, 'F1-score': 0.7333333333333334, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.5562695312500017}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 


Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.799166666666668}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8333333333333333, 'Recall': 0.75, 'F1-score': 0.7333333333333334, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precisio


Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 14}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weight

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2.347618001302064}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 14}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-scor


	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.8389723036024321}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2.5969455295138917}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13},


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 r

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8, 'Recall': 0.9523809523809523, 'F1-score': 0.85, 'Accuracy': 0.9166666666666666}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.099617513020834}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 13}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6666666666666666, 'Recall': 0.8571428571428572, 'F1-score': 0.6666666666666666, 'Accuracy': 0.75}
Imputation method: Mea


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 row

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8, 'Recall': 0.7222222222222222, 'F1-score': 0.7473684210526316, 'Accuracy': 0.8333333333333334}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2.9723483859313498}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 15}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 9}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7444444444444445, 'Recall': 0.8055555555555556, 'F1-score': 0.7575757575757576, 'Accuracy': 0.791666666666


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.9, 'Recall': 0.875, 'F1-score': 0.873015873015873, 'Accuracy': 0.875}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.0470287387010713}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 8}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.8428571428571429, 'Recall': 0.8333333333333333, 'F1-score': 0.8321678321678322, 'Accuracy': 0.8333333333333334}
Imputation method: Me


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 ro

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 2.0185989040798558}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 18}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.95, 'Recall': 0.8333333333333333, 'F1-score': 0.8736842105263158, 'Accuracy': 0.9166666666666666}
Imputation method: MeanModeImputation, Imputation Sco


Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Applying cleaners... 

PPP score no cleaning: {

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.6666666666666666, 'Recall': 0.8571428571428572, 'F1-score': 0.6666666666666666, 'Accuracy': 0.75}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 3.090675575145774}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 18}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7142857142857143, 'Recall': 0.9047619047619048, 'F1-score': 0.7473684210526316, 'Accuracy': 0.833333333333

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training d

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fracti

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.15, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.15, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}
	perturbation: GaussianNoi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8863636363636362, 'Recall': 0.9305555555555556, 'F1-score': 0.8861480075901329, 'Accuracy': 0.8958333333333333}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0, 'Mean Squared Error': 1.0813512731481476}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fractio

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.25, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.25, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted trai

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.25, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.25, 'sampling': 'MCAR'}

Generating corrupted traini

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9930069930069929, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.9090909090909091, 'f1-score': 0.9523809523809523, 'support': 11}, 'yes': {'precision': 0.9285714285714286, 'recall': 1.0, 'f1-score': 0.962962962962963, 'support': 13}, 'accuracy': 0.9583333333333334, 'macro avg': {'precision': 0.9642857142857143, 'recall': 0.9545454545454546, 'f1-score': 0.9576719576719577, 'support': 24}, 'weighted avg': {'precision': 0.9613095238095237, 'recall': 0.9583333333333334, 'f1-score': 0.9581128747795414, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8245192307692308, 'Recall': 0.8541666666666665, 'F1-score': 0.8362732919254658, 'Accuracy': 0.9166666666666667}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8342105263157894, 'Recall': 0.7860644257703082, 'F1-score': 0.8038493038493038, 'Accuracy': 0.8541666666666667, 'Me

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7702205882352942, 'Recall': 0.7708333333333335, 'F1-score': 0.7482499351827845, 'Accuracy': 0.8333333333333333}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8342105263157894, 'Recall': 0.7860644257703082, 'F1-score': 0.8038493038493038, 'Accuracy': 0.8541666666666667, 'Mean Squared Error': 2.306667073567707}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.993006993006993, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.9090909090909091, 'f1-score': 0.9523809523809523, 'support': 11}, 'yes': {'precision': 0.9285714285714286, 'recall': 1.0, 'f1-score': 0.962962962962963, 'support': 13}, 'accuracy': 0.9583333333333334, 'macro avg': {'precision': 0.9642857142857143, 'recall': 0.9545454545454546, 'f1-score': 0.9576719576719577, 'support': 24}, 'weighted avg': {'precision': 0.9613095238095237, 'recall': 0.9

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V3', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data o

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.5, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.5,

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.5, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.5, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.5, 'sampling': 'MNAR'}

Generating corrupted training data on 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9453125, 'classification_report': {'no': {'precision': 0.9, 'recall': 0.5625, 'f1-score': 0.6923076923076923, 'support': 16}, 'yes': {'precision': 0.5, 'recall': 0.875, 'f1-score': 0.6363636363636364, 'support': 8}, 'accuracy': 0.6666666666666666, 'macro avg': {'precision': 0.7, 'recall': 0.71875, 'f1-score': 0.6643356643356644, 'support': 24}, 'weighted avg': {'precision': 0.7666666666666666, 'recall': 0.6666666666666666, 'f1-score': 0.6736596736596736, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 1.0, 'Recall': 1.0, 'F1-score': 1.0, 'Accuracy': 1.0}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.8695652173913043, 'Recall': 0.7692307692307692, 'F1-score': 0.6974789915966386, 'Accuracy': 0.75, 'Mean Squared Error': 3.1928466796875035}
Cleaner: (PyODKNNOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.953125, 'classifica


Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'

	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNois

	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V6', 'fraction': 0.75, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.75, 'sampling': 'MAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.75, 'sampling': 'MNAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.75, 'sampling': 'MAR'}
	perturbation: GaussianN

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.05208333333333333, 'Recall': 0.5, 'F1-score': 0.09401709401709402, 'Accuracy': 0.10416666666666666}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4421052631578948, 'Recall': 0.45378151260504196, 'F1-score': 0.4444444444444444, 'Accuracy': 0.5833333333333334, 'Mean Squared Error': 5240666653.057351}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9785714285714285, 'classification_report': {'no': {'precision': 0.9, 'recall': 0.9, 'f1-score': 0.9, 'support': 10}, 'yes': {'precision': 0.9285714285714286, 'recall': 0.9285714285714286, 'f1-score': 0.9285714285714286, 'support': 14}, 'accuracy': 0.9166666666666666, 'macro avg': {'precision': 0.9142857142857144, 'recall': 0.9142857142857144, 'f1-score': 0.9142857142857144, 'support': 24}, 'weighted avg': {'precision': 0.9166666666666666, 'recall': 0.9166666666666666, 'f1-score': 0.9166666666666666, 'support'

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V6', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.

	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MNAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data o


	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V5', 'fraction': 0.9, 'sampling': 'MNAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V3', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbation: Scaling: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}
	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.9, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.9, 'sampling': 'MCAR', 'na_value': nan}
	perturbation: CategoricalShift: {'column': 'V4', 'fraction': 0.9, 'sampling': 'MCAR'}
	perturbati

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.8464285714285714, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.7142857142857143, 'f1-score': 0.8333333333333333, 'support': 14}, 'yes': {'precision': 0.7142857142857143, 'recall': 1.0, 'f1-score': 0.8333333333333333, 'support': 10}, 'accuracy': 0.8333333333333334, 'macro avg': {'precision': 0.8571428571428572, 'recall': 0.8571428571428572, 'f1-score': 0.8333333333333333, 'support': 24}, 'weighted avg': {'precision': 0.8809523809523809, 'recall': 0.8333333333333334, 'f1-score': 0.8333333333333334, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.765625, 'Recall': 0.875, 'F1-score': 0.7777777777777778, 'Accuracy': 0.78125}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': 0.4635265700483091, 'Recall': 0.3352272727272727, 'F1-score': 0.28396135265700484, 'Accuracy': 0.3541666666666667, 'Mean Squared Error': 3.33060185185

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
# stdoutOrigin=sys.stdout 
# sys.stdout = open("/home/rupali/Documents/Master Thesis/jenga/out/results.txt", "w")

# print(ind_results)
ind_results

# sys.stdout.close()
# sys.stdout=stdoutOrigin

  and should_run_async(code)


[{'dataset': 'thoracic_surgery',
  'model': SGDClassifier(loss='log'),
  'corruptions': defaultdict(list,
              {('DGN',): [MissingValues: {'column': 'DGN', 'fraction': 0.15, 'sampling': 'MAR', 'na_value': nan}]}),
  'cleaners': [{'Outlier detection method': NoOutlierDetection,
    'Outlier Detection Score': {'Precision': 0.425531914893617,
     'Recall': 0.5,
     'F1-score': 0.4597701149425288,
     'Accuracy': 0.851063829787234},
    'Imputation method': MeanModeImputation,
    'Imputation Score': {'Precision': 0.990625,
     'Recall': 0.9511363636363637,
     'F1-score': 0.9687483870967742,
     'Accuracy': 0.9680851063829787,
     'Mean Squared Error': nan},
    'PPP score with cleaning': {'roc_auc_score': 0.6151898734177215,
     'classification_report': {'F': {'precision': 0.8426966292134831,
       'recall': 0.9493670886075949,
       'f1-score': 0.8928571428571428,
       'support': 79},
      'T': {'precision': 0.2,
       'recall': 0.06666666666666667,
       'f1-sco