In [1]:
import warnings
warnings.filterwarnings('ignore')

import pickle

import sys
sys.path.append('/home/rupali/Documents/Master Thesis/jenga')

In [2]:
import random
import numpy as np
import pandas as pd

from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

from jenga.basis import Dataset
from jenga.corruptions.generic import MissingValues, SwappedValues, CategoricalShift
from jenga.corruptions.numerical import Scaling, GaussianNoise
from jenga.cleaning.ppp import PipelinePerformancePrediction
from jenga.cleaning.outlier_detection import NoOutlierDetection, PyODKNNOutlierDetection, PyODIsolationForestOutlierDetection, PyODPCAOutlierDetection, PyODCBLOFOutlierDetection, PyODSOSOutlierDetection, SklearnOutlierDetection
from jenga.cleaning.imputation import MeanModeImputation, SklearnImputation, DatawigImputation
from jenga.cleaning.clean import Clean

import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [20, 10]

In [3]:
## use categorical columns as strings
def cat_cols_to_str(df):
    for col in df.columns:
        if pd.api.types.is_categorical_dtype(df[col]):
            df[col] = df[col].astype(str)

    return df

  and should_run_async(code)


In [4]:
def run_experiment(dataset_name, learner, param_grid, corruptions, fraction, cleaners, num_repetitions, categorical_precision_threshold=0.85, numerical_std_error_threshold=0.9):
    
    ## dataset
    dataset = Dataset(dataset_name)
    
    all_data = dataset.all_data
    attribute_names = dataset.attribute_names
    attribute_types = dataset.attribute_types
    
    ## categorical and numerical features
    categorical_columns = dataset.categorical_columns
    numerical_columns = dataset.numerical_columns
    print(f"Found {len(categorical_columns)} categorical and {len(numerical_columns)} numeric features \n")
    
    ## train and test data
    df_train, lab_train, df_test, lab_test = dataset.get_train_test_data()
    ### if we don't convert the categorical columns to str, the swapping corruption doesn't let us assign new values to the column: "Cannot setitem on a Categorical with a new category, set the categories first"
    df_train = cat_cols_to_str(df_train)
    df_test = cat_cols_to_str(df_test)
    
    ## pipeline performance prediction (ppp)
    ppp = PipelinePerformancePrediction(df_train, lab_train, df_test, lab_test, categorical_columns, numerical_columns, learner, param_grid)
    ppp_model = ppp.fit_ppp(df_train)
    ## the class column is added to the training data for Autogluon model fit
    if learner == None:
        df_train = df_train.loc[:, df_train.columns != 'class']
    
    ## generate corrupted data
    for _ in range(num_repetitions):
        df_corrupted, perturbations, cols_perturbed, summary_col_corrupt = ppp.get_corrupted(df_test, corruptions, fraction, num_repetitions)
    
    ## cleaning
    clean = Clean(df_train, df_corrupted, categorical_columns, numerical_columns, categorical_precision_threshold, numerical_std_error_threshold, ppp, ppp_model, cleaners)
    df_outliers, df_cleaned, corrupted_score_ppp, best_cleaning_score, cleaner_scores_ppp, summary_cleaners = clean(df_train, df_test, df_corrupted, cols_perturbed)
    
    ## results
    result = {
        'ppp_score_model': ppp.predict_score_ppp(ppp_model, df_test),
        'ppp_score_corrupted': corrupted_score_ppp,
        'ppp_score_cleaned': best_cleaning_score,
        'ppp_scores_cleaners': cleaner_scores_ppp
    }
    
    ## summary
    summary = {
        'dataset': dataset_name,
        'model': learner,
        'corruptions': summary_col_corrupt,
        'cleaners': summary_cleaners,
        'result': result
    }
    
    return summary #summary_col_corrupt, result

In [5]:
def save_obj(obj, name):
    with open('/home/rupali/Documents/Master Thesis/jenga/results/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open('/home/rupali/Documents/Master Thesis/jenga/results/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [6]:
datasets = [
    'thoracic_surgery',
    'cleve',
    'acute-inflammations',
    'hill-valley'
]

In [7]:
## model parameters
## models is a dict where key = leaner & value = param_grid
models = {SGDClassifier(loss='log'): {'learner__max_iter': [500, 1000, 5000], 
                                         'learner__penalty': ['l2', 'l1', 'elasticnet'], 
                                         'learner__alpha': [0.0001, 0.001, 0.01, 0.1]
                                        },
          RandomForestClassifier():{'learner__n_estimators': [100, 200, 500], 
                                    'learner__max_depth': [5, 10, 15]
                                   },
          None:None
         }

## make dict of multiple leraners and corresponding param_grids

In [8]:
fractions = [0.15, 0.25, 0.5, 0.75, 0.9]

In [9]:
cleaners = []
for od in [NoOutlierDetection, PyODKNNOutlierDetection, PyODIsolationForestOutlierDetection, PyODPCAOutlierDetection, PyODCBLOFOutlierDetection, PyODSOSOutlierDetection, SklearnOutlierDetection]:
    for imp in [MeanModeImputation, SklearnImputation, DatawigImputation]:
        cleaners.append((od, imp))

## Gaussian

In [10]:
corruptions = [[GaussianNoise]]

In [11]:
ind_results = {}

for fraction in fractions:
    ind_results[fraction] = []
    for learner, param_grid in models.items():
        for corruption in corruptions:
            for dataset in datasets:
                try:
                    ind_results[fraction].append(run_experiment(dataset, learner, param_grid, corruption, fraction, cleaners, 5))
                except ConnectionError:
                    print(f'Connection refused for dataset: {dataset}')
                    continue
                except ValueError:
                    print("Something went wrong with a value :(")
                    continue

Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/thoracic_surgery
2021-04-19 01:06:11,931 [INFO]  Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/thoracic_surgery
0.2469578s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/thoracic_surgery
2021-04-19 01:06:12,179 [INFO]  0.2469578s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/thoracic_surgery
pickle load data thoracic_surgery
2021-04-19 01:06:12,195 [INFO]  pickle load data thoracic_surgery
NumExpr defaulting to 4 threads.
2021-04-19 01:06:12,216 [INFO]  NumExpr defaulting to 4 threads.


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    3.4s finished
  _warn_prf(average, modifier, msg_start, len(result))



Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.5017543859649123, 'classification_report': {'F': {'precision': 0.797752808988764, 'recall': 0.9466666666666667, 'f1-score': 0.8658536585365854, 'support': 75}, 'T': {'precision': 0.2, 'recall': 0.05263157894736842, 'f1-score': 0.083333333333



Classifier for col: PRE25 reached 0.97
Classifier for col: PRE30 reached 0.8133333333333332
Classifier for col: PRE32 reached 0.9866666666666666
Regressor for col: PRE4/lower reached 0.2263164087891963
Regressor for col: PRE4/median reached 0.6922809325135914
Regressor for col: PRE4/upper reached 0.4576467283411829
Regressor for col: PRE5/lower reached -0.037137180801056324
Regressor for col: PRE5/median reached -0.05062477702933377
Regressor for col: PRE5/upper reached -0.5287667963683063
Regressor for col: AGE/lower reached -0.7877976973292447
Regressor for col: AGE/median reached 0.08800932475682721


  _warn_prf(average, modifier, msg_start, len(result))
CategoricalEncoder for column DGN                                found only 37 occurrences of value DGN2
2021-04-19 01:06:26,708 [INFO]  CategoricalEncoder for column DGN                                found only 37 occurrences of value DGN2
CategoricalEncoder for column DGN                                found only 37 occurrences of value DGN4
2021-04-19 01:06:26,709 [INFO]  CategoricalEncoder for column DGN                                found only 37 occurrences of value DGN4
CategoricalEncoder for column DGN                                found only 9 occurrences of value DGN5
2021-04-19 01:06:26,711 [INFO]  CategoricalEncoder for column DGN                                found only 9 occurrences of value DGN5
CategoricalEncoder for column DGN                                found only 3 occurrences of value DGN6
2021-04-19 01:06:26,713 [INFO]  CategoricalEncoder for column DGN                                found only 3 occurre

Regressor for col: AGE/upper reached -1.1968498733376893

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.500081422710122}
Cleaner: (NoOutlierDetection, SklearnImputation): {'roc_auc_score': 0.4743859649122807, 'classification_report': {'F': {'precision': 0.7931034482758621, 'recall': 0.92, 'f1-score': 0.851851851851852, 'support': 75}, 'T': {'precision': 0.14285714285714285, 'recall': 0.05263157894736842, 'f1-score': 0.07692307692307693, 'support': 19}, 'accuracy': 0.7446808510638298, 'macro avg': {'precision': 0.46798029556650245, 'recall': 0.48631578947368426, 'f1-score': 0.4643874643874645, 'support': 94}, 'weighted avg': {'precision': 0.6616706844146316, 'recall': 0.7446808510638298, 'f1-score': 0.695217

Test set does not contain any ocurrences of values [DGN1] in column [DGN], consider using a more representative test set.

2021-04-19 01:06:27,003 [INFO]  
Already bound, ignoring bind()
Epoch[0] Batch [0-11]	Speed: 1261.71 samples/sec	cross-entropy=1.383834	DGN-accuracy=0.661458
2021-04-19 01:06:27,182 [INFO]  Epoch[0] Batch [0-11]	Speed: 1261.71 samples/sec	cross-entropy=1.383834	DGN-accuracy=0.661458
Epoch[0] Train-cross-entropy=1.103991
2021-04-19 01:06:27,347 [INFO]  Epoch[0] Train-cross-entropy=1.103991
Epoch[0] Train-DGN-accuracy=0.724432
2021-04-19 01:06:27,348 [INFO]  Epoch[0] Train-DGN-accuracy=0.724432
Epoch[0] Time cost=0.336
2021-04-19 01:06:27,350 [INFO]  Epoch[0] Time cost=0.336
Saved checkpoint to "imputer_model/model-0000.params"
2021-04-19 01:06:27,357 [INFO]  Saved checkpoint to "imputer_model/model-0000.params"
Epoch[0] Validation-cross-entropy=0.753086
2021-04-19 01:06:27,401 [INFO]  Epoch[0] Validation-cross-entropy=0.753086
Epoch[0] Validation-DGN-accuracy=0.7916

Epoch[8] Validation-cross-entropy=0.703255
2021-04-19 01:06:30,604 [INFO]  Epoch[8] Validation-cross-entropy=0.703255
Epoch[8] Validation-DGN-accuracy=0.791667
2021-04-19 01:06:30,607 [INFO]  Epoch[8] Validation-DGN-accuracy=0.791667
Epoch[9] Batch [0-11]	Speed: 997.45 samples/sec	cross-entropy=0.818952	DGN-accuracy=0.713542
2021-04-19 01:06:30,810 [INFO]  Epoch[9] Batch [0-11]	Speed: 997.45 samples/sec	cross-entropy=0.818952	DGN-accuracy=0.713542
Epoch[9] Train-cross-entropy=0.758553
2021-04-19 01:06:30,959 [INFO]  Epoch[9] Train-cross-entropy=0.758553
Epoch[9] Train-DGN-accuracy=0.750000
2021-04-19 01:06:30,961 [INFO]  Epoch[9] Train-DGN-accuracy=0.750000
Epoch[9] Time cost=0.353
2021-04-19 01:06:30,963 [INFO]  Epoch[9] Time cost=0.353
Saved checkpoint to "imputer_model/model-0009.params"
2021-04-19 01:06:30,972 [INFO]  Saved checkpoint to "imputer_model/model-0009.params"
Epoch[9] Validation-cross-entropy=0.701743
2021-04-19 01:06:31,015 [INFO]  Epoch[9] Validation-cross-entropy=0.7

Epoch[17] Validation-DGN-accuracy=0.791667
2021-04-19 01:06:34,113 [INFO]  Epoch[17] Validation-DGN-accuracy=0.791667
Epoch[18] Batch [0-11]	Speed: 996.22 samples/sec	cross-entropy=0.794910	DGN-accuracy=0.718750
2021-04-19 01:06:34,310 [INFO]  Epoch[18] Batch [0-11]	Speed: 996.22 samples/sec	cross-entropy=0.794910	DGN-accuracy=0.718750
Epoch[18] Train-cross-entropy=0.735817
2021-04-19 01:06:34,439 [INFO]  Epoch[18] Train-cross-entropy=0.735817
Epoch[18] Train-DGN-accuracy=0.755682
2021-04-19 01:06:34,440 [INFO]  Epoch[18] Train-DGN-accuracy=0.755682
Epoch[18] Time cost=0.329
2021-04-19 01:06:34,443 [INFO]  Epoch[18] Time cost=0.329
Saved checkpoint to "imputer_model/model-0018.params"
2021-04-19 01:06:34,449 [INFO]  Saved checkpoint to "imputer_model/model-0018.params"
Epoch[18] Validation-cross-entropy=0.691564
2021-04-19 01:06:34,493 [INFO]  Epoch[18] Validation-cross-entropy=0.691564
Epoch[18] Validation-DGN-accuracy=0.791667
2021-04-19 01:06:34,495 [INFO]  Epoch[18] Validation-DGN-

Epoch[27] Batch [0-11]	Speed: 1251.58 samples/sec	cross-entropy=0.779398	DGN-accuracy=0.713542
2021-04-19 01:06:37,867 [INFO]  Epoch[27] Batch [0-11]	Speed: 1251.58 samples/sec	cross-entropy=0.779398	DGN-accuracy=0.713542
Epoch[27] Train-cross-entropy=0.719565
2021-04-19 01:06:38,007 [INFO]  Epoch[27] Train-cross-entropy=0.719565
Epoch[27] Train-DGN-accuracy=0.752841
2021-04-19 01:06:38,009 [INFO]  Epoch[27] Train-DGN-accuracy=0.752841
Epoch[27] Time cost=0.300
2021-04-19 01:06:38,011 [INFO]  Epoch[27] Time cost=0.300
Saved checkpoint to "imputer_model/model-0027.params"
2021-04-19 01:06:38,021 [INFO]  Saved checkpoint to "imputer_model/model-0027.params"
Epoch[27] Validation-cross-entropy=0.684747
2021-04-19 01:06:38,061 [INFO]  Epoch[27] Validation-cross-entropy=0.684747
Epoch[27] Validation-DGN-accuracy=0.791667
2021-04-19 01:06:38,063 [INFO]  Epoch[27] Validation-DGN-accuracy=0.791667
Epoch[28] Batch [0-11]	Speed: 1218.35 samples/sec	cross-entropy=0.777866	DGN-accuracy=0.713542
202

2021-04-19 01:06:41,229 [INFO]  Epoch[36] Batch [0-11]	Speed: 1213.17 samples/sec	cross-entropy=0.766539	DGN-accuracy=0.718750
Epoch[36] Train-cross-entropy=0.706411
2021-04-19 01:06:41,363 [INFO]  Epoch[36] Train-cross-entropy=0.706411
Epoch[36] Train-DGN-accuracy=0.755682
2021-04-19 01:06:41,364 [INFO]  Epoch[36] Train-DGN-accuracy=0.755682
Epoch[36] Time cost=0.300
2021-04-19 01:06:41,366 [INFO]  Epoch[36] Time cost=0.300
Saved checkpoint to "imputer_model/model-0036.params"
2021-04-19 01:06:41,371 [INFO]  Saved checkpoint to "imputer_model/model-0036.params"
Epoch[36] Validation-cross-entropy=0.681202
2021-04-19 01:06:41,410 [INFO]  Epoch[36] Validation-cross-entropy=0.681202
Epoch[36] Validation-DGN-accuracy=0.791667
2021-04-19 01:06:41,412 [INFO]  Epoch[36] Validation-DGN-accuracy=0.791667
Epoch[37] Batch [0-11]	Speed: 1128.16 samples/sec	cross-entropy=0.765559	DGN-accuracy=0.718750
2021-04-19 01:06:41,589 [INFO]  Epoch[37] Batch [0-11]	Speed: 1128.16 samples/sec	cross-entropy=0.

Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE


  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.500081422710122}
Cleaner: (NoOutlierDetection, DatawigImputation): {'roc_auc_score': 0.4743859649122807, 'classification_report': {'F': {'precision': 0.7931034482758621, 'recall': 0.92, 'f1-score': 0.851851851851852, 'support': 75}, 'T': {'precision': 0.14285714285714285, 'recall': 0.05263157894736842, 'f1-score': 0.07692307692307693, 'support': 19}, 'accuracy': 0.7446808510638298, 'macro avg': {'precision': 0.46798029556650245, 'recall': 0.48631578947368426, 'f1-score': 0.4643874643874645, 'support': 94}, 'weighted avg': {'precision': 0.6616706844146316, 'recall': 0.7446808510638298, 'f1-score': 0.695217312238589, 'support': 94}}}

Outlier detection method: Py



Classifier for col: DGN reached 0.7333333333333334
Classifier for col: PRE6 reached 0.8333333333333333
Classifier for col: PRE7 reached 0.9266666666666666
Classifier for col: PRE8 reached 0.8433333333333333
Classifier for col: PRE9 reached 0.91
Classifier for col: PRE10 reached 0.8733333333333333
Classifier for col: PRE11 reached 0.8566666666666667
Classifier for col: PRE14 reached 0.5233333333333334
Classifier for col: PRE17 reached 0.9233333333333333
Classifier for col: PRE25 reached 0.9633333333333334
Classifier for col: PRE30 reached 0.8200000000000001
Classifier for col: PRE32 reached 0.9866666666666667
Regressor for col: PRE4/lower reached 0.36404449677456474
Regressor for col: PRE4/median reached 0.7072967697435153
Regressor for col: PRE4/upper reached 0.2954172995356528
Regressor for col: PRE5/lower reached -0.05762379563459763
Regressor for col: PRE5/median reached -0.03922787050127019
Regressor for col: PRE5/upper reached -0.5663685818034161
Regressor for col: AGE/lower reach

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.6963330786860198, 'Recall': 0.7294642857142857, 'F1-score': 0.7102938154920896, 'Accuracy': 0.8404255319148937}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.1639847654554384}
Cleaner: (PyODKNNOutlierDetection, DatawigImputation): {'roc_auc_score': 0.5508771929824561, 'classification_report': {'F': {'precision': 0.797752808988764, 'recall': 0.9466666666666667, 'f1-score': 0.8658536585365854, 'support': 75}, 'T': {'precision': 0.2, 'recall': 0.05263157894736842, 'f1-score': 0.08333333333333333, 'support': 19}, 'accuracy': 0.7659574468085106, 'macro avg': {'precision': 0.498876404494382, 'recall': 0.49964912280701756, 'f1-score': 0.47459349593



Classifier for col: DGN reached 0.6933333333333334
Classifier for col: PRE6 reached 0.8233333333333334
Classifier for col: PRE7 reached 0.9133333333333333
Classifier for col: PRE8 reached 0.8533333333333334
Classifier for col: PRE9 reached 0.9233333333333333
Classifier for col: PRE10 reached 0.8899999999999999
Classifier for col: PRE11 reached 0.8766666666666667
Classifier for col: PRE14 reached 0.4966666666666667
Classifier for col: PRE17 reached 0.9366666666666666




Classifier for col: PRE19 reached nan
Classifier for col: PRE25 reached 0.9833333333333334
Classifier for col: PRE30 reached 0.8133333333333334
Classifier for col: PRE32 reached 0.99
Regressor for col: PRE4/lower reached 0.35636245531316146
Regressor for col: PRE4/median reached 0.6979089158097767
Regressor for col: PRE4/upper reached 0.3204889661580295
Regressor for col: PRE5/lower reached -0.034115886105380855
Regressor for col: PRE5/median reached -0.02588232302357374
Regressor for col: PRE5/upper reached -0.11694504707246045
Regressor for col: AGE/lower reached -0.865025278213488
Regressor for col: AGE/median reached 0.09698275841204662
Regressor for col: AGE/upper reached -1.0199214022772725
Imputed 34 values in column PRE4
Imputed 25 values in column PRE5
Imputed 28 values in column AGE

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.5906862745098039, 'Recall': 0.6651785714285714, 'F1-score': 0.5803571428571428, 'Accuracy':

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.623991935483871, 'Recall': 0.7196428571428571, 'F1-score': 0.6258420085731782, 'Accuracy': 0.723404255319149}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.690621910639571}
Cleaner: (PyODIsolationForestOutlierDetection, DatawigImputation): {'roc_auc_score': 0.5449122807017543, 'classification_report': {'F': {'precision': 0.797752808988764, 'recall': 0.9466666666666667, 'f1-score': 0.8658536585365854, 'support': 75}, 'T': {'precision': 0.2, 'recall': 0.05263157894736842, 'f1-score': 0.08333333333333333, 'support': 19}, 'accuracy': 0.7659574468085106, 'macro avg': {'precision': 0.498876404494382, 'recall': 0.49964912280701756, 'f1-



Classifier for col: DGN reached 0.6966666666666667
Classifier for col: PRE6 reached 0.8566666666666667
Classifier for col: PRE7 reached 0.9233333333333333
Classifier for col: PRE8 reached 0.86
Classifier for col: PRE9 reached 0.9299999999999999
Classifier for col: PRE10 reached 0.8766666666666667
Classifier for col: PRE11 reached 0.8666666666666667
Classifier for col: PRE14 reached 0.45666666666666667
Classifier for col: PRE17 reached 0.9233333333333333




Classifier for col: PRE19 reached nan
Classifier for col: PRE25 reached 0.9766666666666667
Classifier for col: PRE30 reached 0.8233333333333334




Classifier for col: PRE32 reached nan
Regressor for col: PRE4/lower reached 0.2626124114212312
Regressor for col: PRE4/median reached 0.6712056736963097
Regressor for col: PRE4/upper reached 0.40448529058465177
Regressor for col: PRE5/lower reached -0.04010779802539377
Regressor for col: PRE5/median reached -0.02963392898038808
Regressor for col: PRE5/upper reached -0.14765134786299894
Regressor for col: AGE/lower reached -1.0081018077786312
Regressor for col: AGE/median reached 0.07470053646621988
Regressor for col: AGE/upper reached -1.0267351138084733
Imputed 26 values in column PRE4
Imputed 21 values in column PRE5
Imputed 18 values in column AGE

Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.6363122171945701, 'Recall': 0.7151785714285714, 'F1-score': 0.6506756756756757, 'Accuracy': 0.7659574468085106}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Square

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.6363122171945701, 'Recall': 0.7151785714285714, 'F1-score': 0.6506756756756757, 'Accuracy': 0.7659574468085106}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.154476366482772}
Cleaner: (PyODPCAOutlierDetection, DatawigImputation): {'roc_auc_score': 0.5480701754385965, 'classification_report': {'F': {'precision': 0.7912087912087912, 'recall': 0.96, 'f1-score': 0.8674698795180723, 'support': 75}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.7659574468085106, 'macro avg': {'precision': 0.3956043956043956, 'recall': 0.48, 'f1-score': 0.43373493975903615, 'support': 94}, 'weighted avg': {'precision': 0.6312



Classifier for col: DGN reached 0.6966666666666667
Classifier for col: PRE6 reached 0.8333333333333333
Classifier for col: PRE7 reached 0.9333333333333333
Classifier for col: PRE8 reached 0.8500000000000001
Classifier for col: PRE9 reached 0.9366666666666666
Classifier for col: PRE10 reached 0.8933333333333333
Classifier for col: PRE11 reached 0.8833333333333333
Classifier for col: PRE14 reached 0.48
Classifier for col: PRE17 reached 0.9466666666666667




Classifier for col: PRE19 reached nan
Classifier for col: PRE25 reached 0.97
Classifier for col: PRE30 reached 0.8433333333333333
Classifier for col: PRE32 reached 0.9866666666666666
Regressor for col: PRE4/lower reached 0.34692083771561405
Regressor for col: PRE4/median reached 0.6416844256114453
Regressor for col: PRE4/upper reached 0.3491668007051449
Regressor for col: PRE5/lower reached -0.049141314579051576
Regressor for col: PRE5/median reached -0.018532438392937833
Regressor for col: PRE5/upper reached -0.06938313361284404
Regressor for col: AGE/lower reached -0.9157556623169649
Regressor for col: AGE/median reached -0.003014052586051031
Regressor for col: AGE/upper reached -1.2735985022090675
Imputed 31 values in column PRE4
Imputed 26 values in column PRE5
Imputed 20 values in column AGE

Outlier detection method: PyODCBLOFOutlierDetection, Outlier Detection Score: {'Precision': 0.605478750640041, 'Recall': 0.6839285714285714, 'F1-score': 0.6055944055944056, 'Accuracy': 0.7127

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODCBLOFOutlierDetection, Outlier Detection Score: {'Precision': 0.6003024193548387, 'Recall': 0.6776785714285715, 'F1-score': 0.5970606246172688, 'Accuracy': 0.7021276595744681}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.46637987378185114}
Cleaner: (PyODCBLOFOutlierDetection, DatawigImputation): {'roc_auc_score': 0.5459649122807017, 'classification_report': {'F': {'precision': 0.7954545454545454, 'recall': 0.9333333333333333, 'f1-score': 0.8588957055214724, 'support': 75}, 'T': {'precision': 0.16666666666666666, 'recall': 0.05263157894736842, 'f1-score': 0.08, 'support': 19}, 'accuracy': 0.7553191489361702, 'macro avg': {'precision': 0.481060606060606, 'recall': 0.49298245614035086, 'f1-score': 0.4694

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.500081422710122}
Cleaner: (PyODSOSOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.4743859649122807, 'classification_report': {'F': {'precision': 0.7931034482758621, 'recall': 0.92, 'f1-score': 0.851851851851852, 'support': 75}, 'T': {'precision': 0.14285714285714285, 'recall': 0.05263157894736842, 'f1-score': 0.07692307692307693, 'support': 19}, 'accuracy': 0.7446808510638298, 'macro avg': {'precision': 0.46798029556650245, 'recall': 0.48631578947368426, 'f1-score': 0.4643874643874645, 'support': 94}, 'weighted avg': {'precision': 0.6616706844146316, 'recall': 0.7446808510638298, 'f1-score': 0.695217312238589, 'support': 94}}}




Classifier for col: DGN reached 0.69
Classifier for col: PRE6 reached 0.8400000000000001
Classifier for col: PRE7 reached 0.9366666666666666
Classifier for col: PRE8 reached 0.8500000000000001
Classifier for col: PRE9 reached 0.9266666666666666
Classifier for col: PRE10 reached 0.87
Classifier for col: PRE11 reached 0.85
Classifier for col: PRE14 reached 0.5133333333333333
Classifier for col: PRE17 reached 0.9266666666666666
Classifier for col: PRE25 reached 0.9733333333333334
Classifier for col: PRE30 reached 0.8233333333333334
Classifier for col: PRE32 reached 0.9866666666666666
Regressor for col: PRE4/lower reached 0.1122282088475759
Regressor for col: PRE4/median reached 0.6717753028702672
Regressor for col: PRE4/upper reached 0.3501247284415564
Regressor for col: PRE5/lower reached -0.03467624366764177
Regressor for col: PRE5/median reached 0.005600028044360269
Regressor for col: PRE5/upper reached -0.4894808975176565
Regressor for col: AGE/lower reached -1.017150473778933
Regress

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: DGN


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE


  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.500081422710122}
Cleaner: (PyODSOSOutlierDetection, DatawigImputation): {'roc_auc_score': 0.4743859649122807, 'classification_report': {'F': {'precision': 0.7931034482758621, 'recall': 0.92, 'f1-score': 0.851851851851852, 'support': 75}, 'T': {'precision': 0.14285714285714285, 'recall': 0.05263157894736842, 'f1-score': 0.07692307692307693, 'support': 19}, 'accuracy': 0.7446808510638298, 'macro avg': {'precision': 0.46798029556650245, 'recall': 0.48631578947368426, 'f1-score': 0.4643874643874645, 'support': 94}, 'weighted avg': {'precision': 0.6616706844146316, 'recall': 0.7446808510638298, 'f1-score': 0.695217312238589, 'support': 94}}}
Classifier for col:



Classifier for col: PRE19 reached nan
Classifier for col: PRE25 reached 0.9733333333333334
Classifier for col: PRE30 reached 0.8066666666666666
Classifier for col: PRE32 reached 0.99
Regressor for col: PRE4/lower reached 0.19508606479420004
Regressor for col: PRE4/median reached 0.6773632037914089
Regressor for col: PRE4/upper reached 0.38227744668897046
Regressor for col: PRE5/lower reached -0.04608510921526865
Regressor for col: PRE5/median reached 0.046266984933385824
Regressor for col: PRE5/upper reached -1.6546393265107113
Regressor for col: AGE/lower reached -0.9110485498142515
Regressor for col: AGE/median reached 0.0863367060574447
Regressor for col: AGE/upper reached -1.294028839047094
Column DGN contained 0 nans before, now 0
Column PRE6 contained 0 nans before, now 0
Column PRE7 contained 0 nans before, now 0
Column PRE8 contained 0 nans before, now 0
Column PRE9 contained 0 nans before, now 0
Column PRE10 contained 0 nans before, now 0
Column PRE11 contained 0 nans before, 



Classifier for col: DGN reached 0.7033333333333334
Classifier for col: PRE6 reached 0.8266666666666667
Classifier for col: PRE7 reached 0.94
Classifier for col: PRE8 reached 0.8366666666666667
Classifier for col: PRE9 reached 0.9099999999999999
Classifier for col: PRE10 reached 0.8766666666666667
Classifier for col: PRE11 reached 0.86
Classifier for col: PRE14 reached 0.53
Classifier for col: PRE17 reached 0.9166666666666667
Classifier for col: PRE19 reached nan




Classifier for col: PRE25 reached 0.9633333333333334
Classifier for col: PRE30 reached 0.8133333333333334
Classifier for col: PRE32 reached 0.9933333333333333
Regressor for col: PRE4/lower reached 0.3196343865556816
Regressor for col: PRE4/median reached 0.7568720980356626
Regressor for col: PRE4/upper reached 0.4451400187979921
Regressor for col: PRE5/lower reached -0.03395516075538452
Regressor for col: PRE5/median reached -0.0015158808700960766
Regressor for col: PRE5/upper reached -0.26697371558886085
Regressor for col: AGE/lower reached -0.9174506689340304
Regressor for col: AGE/median reached 0.044122662726241935
Regressor for col: AGE/upper reached -1.0749276705204374
Column DGN contained 0 nans before, now 30
Column PRE6 contained 0 nans before, now 0
Column PRE7 contained 0 nans before, now 0
Column PRE8 contained 0 nans before, now 0
Column PRE9 contained 0 nans before, now 0
Column PRE10 contained 0 nans before, now 0
Column PRE11 contained 0 nans before, now 0
Column PRE14 



Classifier for col: DGN reached 0.69
Classifier for col: PRE6 reached 0.8433333333333334
Classifier for col: PRE7 reached 0.9366666666666666
Classifier for col: PRE8 reached 0.85
Classifier for col: PRE9 reached 0.9299999999999999
Classifier for col: PRE10 reached 0.88
Classifier for col: PRE11 reached 0.8766666666666667
Classifier for col: PRE14 reached 0.5266666666666667
Classifier for col: PRE17 reached 0.9133333333333333
Classifier for col: PRE19 reached nan




Classifier for col: PRE25 reached 0.9666666666666667
Classifier for col: PRE30 reached 0.8233333333333333
Classifier for col: PRE32 reached 0.9833333333333334
Regressor for col: PRE4/lower reached 0.3726222252621777
Regressor for col: PRE4/median reached 0.712262161491636
Regressor for col: PRE4/upper reached 0.4597069478091055
Regressor for col: PRE5/lower reached -0.02996566178959592
Regressor for col: PRE5/median reached 0.005097003739231221
Regressor for col: PRE5/upper reached -0.24893093884850226
Regressor for col: AGE/lower reached -0.7661043045620678
Regressor for col: AGE/median reached 0.10680544559064747
Regressor for col: AGE/upper reached -1.3301070190628008
Column DGN contained 0 nans before, now 31
Column PRE6 contained 0 nans before, now 0
Column PRE7 contained 0 nans before, now 0
Column PRE8 contained 0 nans before, now 0
Column PRE9 contained 0 nans before, now 0
Column PRE10 contained 0 nans before, now 0
Column PRE11 contained 0 nans before, now 0
Column PRE14 cont

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6
Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: SklearnOutlierDetection, Outlier Detection Score: {'Precision': 0.6719348659003831, 'Recall': 0.8205357142857144, 'F1-score': 0.6730434782608696, 'Accuracy': 0.7446808510638298}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.4728377023591101}
Cleaner: (SklearnOutlierDetection, DatawigImputation): {'roc_auc_score': 0.5312280701754386, 'classification_report': {'F': {'precision': 0.7934782608695652, 'recall': 0.9733333333333334, 'f1-score': 0.874251497005988, 'support': 75}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.776595744680851, 'macro avg': {'precision': 0.3967391304347826, 'recall': 0.4866666666666667, 'f1-score': 0.437125748502994, 'support': 94}, 'weighted a



Classifier for col: DGN reached 0.7166666666666667
Classifier for col: PRE6 reached 0.8733333333333333
Classifier for col: PRE7 reached 0.9266666666666666
Classifier for col: PRE8 reached 0.8433333333333334
Classifier for col: PRE9 reached 0.9166666666666667
Classifier for col: PRE10 reached 0.8866666666666667
Classifier for col: PRE11 reached 0.8666666666666667
Classifier for col: PRE14 reached 0.53
Classifier for col: PRE17 reached 0.9166666666666666
Classifier for col: PRE19 reached nan




Classifier for col: PRE25 reached 0.9633333333333334
Classifier for col: PRE30 reached 0.8133333333333334
Classifier for col: PRE32 reached 0.9833333333333334
Regressor for col: PRE4/lower reached 0.13798996636858285
Regressor for col: PRE4/median reached 0.6401993077014713
Regressor for col: PRE4/upper reached 0.24917137685426766
Regressor for col: PRE5/lower reached -0.037473431357004205
Regressor for col: PRE5/median reached -0.01636503307666271
Regressor for col: PRE5/upper reached -0.13247566261810007
Regressor for col: AGE/lower reached -0.9310641034357994
Regressor for col: AGE/median reached 0.12328087666075904
Regressor for col: AGE/upper reached -1.297038926598134
Column DGN contained 0 nans before, now 30
Column PRE6 contained 0 nans before, now 0
Column PRE7 contained 0 nans before, now 0
Column PRE8 contained 0 nans before, now 0
Column PRE9 contained 0 nans before, now 6
Column PRE10 contained 0 nans before, now 16
Column PRE11 contained 0 nans before, now 0
Column PRE14 

INFO:root:Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/cleve


Column PRE4 contained 0 nans before, now 41
Column PRE5 contained 0 nans before, now 46
Column AGE contained 0 nans before, now 30

Best cleaning method:
Cleaning score: Cleaner: (SklearnOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5803508771929824, 'classification_report': {'F': {'precision': 0.7912087912087912, 'recall': 0.96, 'f1-score': 0.8674698795180723, 'support': 75}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19}, 'accuracy': 0.7659574468085106, 'macro avg': {'precision': 0.3956043956043956, 'recall': 0.48, 'f1-score': 0.43373493975903615, 'support': 94}, 'weighted avg': {'precision': 0.6312836100070142, 'recall': 0.7659574468085106, 'f1-score': 0.6921302230197385, 'support': 94}}} 

Cleaning improved the overall score 





INFO:root:0.2198110s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/cleve
INFO:openml.datasets.dataset:pickle load data cleve
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Dataset: cleve
Found 8 categorical and 5 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Done 151 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    1.5s finished



Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Oldpeak', 'fraction': 0.15, 'sampling': 'MCAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.9494505494505494, 'classification_report': {'0': {'precision': 0.85, 'recall': 0.9714285714285714, 'f1-score': 0.9066666666666667, 'support': 35}, '1': {'precision': 0.9523809523809523, 'recall': 0.7692307692307693, 'f

  _warn_prf(average, modifier, msg_start, len(result))


Classifier for col: Sex reached 0.6993664089347079
Classifier for col: Chest_pain_type reached 0.47132731958762886
Classifier for col: Fasting_blood_sugar_&lt;_120 reached 0.782323883161512
Classifier for col: Resting_ecg reached 0.5905283505154639
Classifier for col: Exercise_induced_angina reached 0.7150987972508591
Classifier for col: Slope reached 0.689110824742268
Classifier for col: Number_of_vessels_colored reached 0.5182023195876289




Classifier for col: Thal reached 0.6319265463917525
Regressor for col: Age/lower reached -0.6919881419617806
Regressor for col: Age/median reached 0.2105392503575021
Regressor for col: Age/upper reached -0.6632035330238587
Regressor for col: Trestbps/lower reached -0.8604612585131177
Regressor for col: Trestbps/median reached 0.027459430267686158
Regressor for col: Trestbps/upper reached -1.0316822878249794
Regressor for col: Cholesterol/lower reached -0.8083001878405554
Regressor for col: Cholesterol/median reached -0.05497128024200309
Regressor for col: Cholesterol/upper reached -0.843409962804786
Regressor for col: Max_heart_rate/lower reached -0.6934644470788621
Regressor for col: Max_heart_rate/median reached 0.32358543297614994
Regressor for col: Max_heart_rate/upper reached -0.5522290852026541
Regressor for col: Oldpeak/lower reached -0.5679134688525362
Regressor for col: Oldpeak/median reached 0.279495105173393


  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:CategoricalEncoder for column Sex                                found only 68 occurrences of value 0


Regressor for col: Oldpeak/upper reached -0.39590030623716

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.704832001944271}
Cleaner: (NoOutlierDetection, SklearnImputation): {'roc_auc_score': 0.9186813186813186, 'classification_report': {'0': {'precision': 0.8461538461538461, 'recall': 0.9428571428571428, 'f1-score': 0.8918918918918919, 'support': 35}, '1': {'precision': 0.9090909090909091, 'recall': 0.7692307692307693, 'f1-score': 0.8333333333333333, 'support': 26}, 'accuracy': 0.8688524590163934, 'macro avg': {'precision': 0.8776223776223776, 'recall': 0.8560439560439561, 'f1-score': 0.8626126126126126, 'support': 61}, 'weighted avg': {'precision': 0.8729794795368566, 'recall': 0.8688524590163934, 'f1-sc

INFO:root:
INFO:root:Epoch[0] Batch [0-7]	Speed: 1356.89 samples/sec	cross-entropy=0.794793	Sex-accuracy=0.585938
INFO:root:Epoch[0] Train-cross-entropy=0.705966
INFO:root:Epoch[0] Train-Sex-accuracy=0.638393
INFO:root:Epoch[0] Time cost=0.175
INFO:root:Saved checkpoint to "imputer_model/model-0000.params"
INFO:root:Epoch[0] Validation-cross-entropy=0.879042
INFO:root:Epoch[0] Validation-Sex-accuracy=0.437500
INFO:root:Epoch[1] Batch [0-7]	Speed: 1287.13 samples/sec	cross-entropy=0.584350	Sex-accuracy=0.710938
INFO:root:Epoch[1] Train-cross-entropy=0.548104
INFO:root:Epoch[1] Train-Sex-accuracy=0.736607
INFO:root:Epoch[1] Time cost=0.187
INFO:root:Saved checkpoint to "imputer_model/model-0001.params"
INFO:root:Epoch[1] Validation-cross-entropy=0.860429
INFO:root:Epoch[1] Validation-Sex-accuracy=0.406250
INFO:root:Epoch[2] Batch [0-7]	Speed: 1290.54 samples/sec	cross-entropy=0.555162	Sex-accuracy=0.718750
INFO:root:Epoch[2] Train-cross-entropy=0.511151
INFO:root:Epoch[2] Train-Sex-accur

Fitting model for column: Chest_pain_type
Something went wrong with a value :(
Dataset: acute-inflammations
Found 5 categorical and 1 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    1.3s finished



Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score

  _warn_prf(average, modifier, msg_start, len(result))


Classifier for col: V2 reached 1.0
Classifier for col: V3 reached 0.9736842105263157
Classifier for col: V4 reached 0.8947368421052632
Classifier for col: V5 reached 0.8947368421052632
Classifier for col: V6 reached 0.8026315789473684
Regressor for col: V1/lower reached 0.2299526421028555
Regressor for col: V1/median reached 0.7535389798423601
Regressor for col: V1/upper reached 0.051070530986431883

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.360779915420274}
Cleaner: (NoOutlierDetection, SklearnImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro av

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V1


  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.360779915420274}
Cleaner: (NoOutlierDetection, DatawigImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.875, 'Recall': 0.9761904761904762, 'F1-score': 0.9163763066202091, 'Accuracy': 0.9583333333333334}
Imputation method: MeanModeImputation, Imputation Score:

Classifier for col: V5 reached 0.9342105263157895
Classifier for col: V6 reached 0.8421052631578947
Regressor for col: V1/lower reached 0.4358042747547527
Regressor for col: V1/median reached 0.7862197391019679
Regressor for col: V1/upper reached -0.17652457186615
Imputed 9 values in column V1

Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.5777777777777777, 'Recall': 0.6666666666666666, 'F1-score': 0.5555555555555556, 'Accuracy': 0.6666666666666666}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.3757774292904525}
Cleaner: (PyODPCAOutlierDetection, SklearnImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 's

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.360779915420274}
Cleaner: (PyODSOSOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}
Classifier for col: V2 reached 1.0
Classifier for col: V3 reached 0.9605263157894737
Classifier for col: V4 reached 0.9342105263157894
Classifier for col: V5 reached 0.881578947368421
Classifier for col: V6 reached 0.8421052631578947
Re

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V1


  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.360779915420274}
Cleaner: (PyODSOSOutlierDetection, DatawigImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}
Classifier for col: V2 reached 1.0
Classifier for col: V3 reached 1.0
Classifier for col: V4 reached 0.8947368421052632
Classifier for col: V5 reached 0.8947368421052632
Classifier for col: V6 reached 0.8421052631578947
Regressor for col:

INFO:root:Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/hill-valley



Outlier detection method: SklearnOutlierDetection, Outlier Detection Score: {'Precision': 0.6666666666666666, 'Recall': 0.8571428571428572, 'F1-score': 0.6666666666666666, 'Accuracy': 0.75}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.843980397384276}
Cleaner: (SklearnOutlierDetection, DatawigImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Best cleaning method:
Cleaning score: Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 

INFO:root:0.1865757s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/hill-valley
INFO:openml.datasets.dataset:pickle load data hill-valley
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Dataset: hill-valley
Found 0 categorical and 100 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    2.6s finished



Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V88', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V67', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V8', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V43', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V99', 'fraction': 0.15, 'sampling': 'MCAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.9583050847457627, 'classification_report': {'0': {'precision': 0.6212121212121212, 'recall': 0.984, 'f1-score': 0.7616099071207432, 'support': 125}, '1': {'precision': 0.9555555555555556, 'recall': 0.3644067796610169, 'f1-score': 0.52760736

  _warn_prf(average, modifier, msg_start, len(result))


Regressor for col: V1/lower reached 0.03297172507535345
Regressor for col: V1/median reached 0.9769592927630304
Regressor for col: V1/upper reached 0.9812322619335373
Regressor for col: V2/lower reached 0.04362283678975065
Regressor for col: V2/median reached 0.9786829457051989
Regressor for col: V2/upper reached 0.9899158599961799
Regressor for col: V3/lower reached 0.036674005805926335
Regressor for col: V3/median reached 0.9756808386258027
Regressor for col: V3/upper reached 0.9820886324258085
Regressor for col: V4/lower reached 0.03371904452531871
Regressor for col: V4/median reached 0.9793307987469455
Regressor for col: V4/upper reached 0.9874465850550993
Regressor for col: V5/lower reached 0.04369709698127133
Regressor for col: V5/median reached 0.9755840269733884
Regressor for col: V5/upper reached 0.9884230003944083
Regressor for col: V6/lower reached 0.03903309554819179
Regressor for col: V6/median reached 0.9812157224901796
Regressor for col: V6/upper reached 0.98598321492555

Regressor for col: V49/upper reached 0.9867775217718437
Regressor for col: V50/lower reached 0.042537868846728444
Regressor for col: V50/median reached 0.9765324599536915
Regressor for col: V50/upper reached 0.983716807255832
Regressor for col: V51/lower reached 0.039256748138865205
Regressor for col: V51/median reached 0.9701316397107773
Regressor for col: V51/upper reached 0.9697073959943976
Regressor for col: V52/lower reached 0.037684900213718564
Regressor for col: V52/median reached 0.9733393788602345
Regressor for col: V52/upper reached 0.9806227103759133
Regressor for col: V53/lower reached 0.03340409767317176
Regressor for col: V53/median reached 0.9757867938188607
Regressor for col: V53/upper reached 0.9828163730800412
Regressor for col: V54/lower reached 0.02679473052956127
Regressor for col: V54/median reached 0.9789214350139446
Regressor for col: V54/upper reached 0.9901171218278579
Regressor for col: V55/lower reached 0.035779591292821544
Regressor for col: V55/median reac

Regressor for col: V98/lower reached 0.03563544268216301
Regressor for col: V98/median reached 0.9736980115594205
Regressor for col: V98/upper reached 0.9847013137239184
Regressor for col: V99/lower reached 0.056949983872916665
Regressor for col: V99/median reached 0.980583132437636
Regressor for col: V99/upper reached 0.9883016174883872
Regressor for col: V100/lower reached 0.03713493337929963
Regressor for col: V100/median reached 0.9799624688118749
Regressor for col: V100/upper reached 0.978891554190108

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.42592592592592593, 'Recall': 0.5, 'F1-score': 0.46, 'Accuracy': 0.8518518518518519}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1107382942.7264836}
Cleaner: (NoOutlierDetection, SklearnImputation): {'roc_auc_score': 0.8381016949152542, 'classification_report': {'0': {'precision': 0.601063829787234

  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:
INFO:root:Epoch[0] Batch [0-28]	Speed: 10370.54 samples/sec	cross-entropy=7.442974	V1-accuracy=0.000000
INFO:root:Epoch[0] Train-cross-entropy=4.898033
INFO:root:Epoch[0] Train-V1-accuracy=0.000000
INFO:root:Epoch[0] Time cost=0.080
INFO:root:Saved checkpoint to "imputer_model/model-0000.params"
INFO:root:Epoch[0] Validation-cross-entropy=0.043004
INFO:root:Epoch[0] Validation-V1-accuracy=0.000000
INFO:root:Epoch[1] Batch [0-28]	Speed: 13844.93 samples/sec	cross-entropy=0.081936	V1-accuracy=0.000000
INFO:root:Epoch[1] Train-cross-entropy=0.132945
INFO:root:Epoch[1] Train-V1-accuracy=0.000000
INFO:root:Epoch[1] Time cost=0.071
INFO:root:Saved checkpoint to "imputer_model/model-0001.params"
INFO:root:Epoch[1] Validation-cross-entropy=0.024351
INFO:root:Epoch[1] Validation-V1-accuracy=0.000000
INFO:root:Epoch[2] Batch [0-28]	Speed: 11902.43 samples/sec	cross-entropy=0.063788	V1-accuracy=0.000000
INFO:root:Epoch[2] Train-cro

Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for column: V13
Fitting model for column: V14
Fitting model for column: V15
Fitting model for column: V16
Fitting model for column: V17
Fitting model for column: V18
Fitting model for column: V19
Fitting model for column: V20
Fitting model for column: V21
Fitting model for column: V22
Fitting model for column: V23
Fitting model for column: V24
Fitting model for column: V25
Fitting model for column: V26
Fitting model for column: V27
Fitting model for column: V28
Fitting model for column: V29
Fitting model for column: V30
Fitting model for column: V31
Fitting model for column: V32
Fitting model for column: V33
Fitting model for column: V34
Fitting model for 

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.42592592592592593, 'Recall': 0.5, 'F1-score': 0.46, 'Accuracy': 0.8518518518518519}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1107382942.7264836}
Cleaner: (NoOutlierDetection, DatawigImputation): {'roc_auc_score': 0.8381016949152542, 'classification_report': {'0': {'precision': 0.601063829787234, 'recall': 0.904, 'f1-score': 0.7220447284345047, 'support': 125}, '1': {'precision': 0.7818181818181819, 'recall': 0.3644067796610169, 'f1-score': 0.49710982658959535, 'support': 118}, 'accuracy': 0.6419753086419753, 'macro avg': {'precision': 0.691441005802708, 'recall': 0.6342033898305085, 'f1-score': 0.60957727751205, 'support': 243}, 'weighted avg': {'precision': 0.6888375480574063, 'recall': 0.6419753086419753, 'f1-score': 0.6128170806250425, 'support': 243}}}

Outlier detection method: PyODKNNOutlie

Regressor for col: V37/upper reached 0.9837841439022723
Regressor for col: V38/lower reached 0.0112797695461756
Regressor for col: V38/median reached 0.9779787258124224
Regressor for col: V38/upper reached 0.9906184137097329
Regressor for col: V39/lower reached 0.0104195049894652
Regressor for col: V39/median reached 0.9783447130357444
Regressor for col: V39/upper reached 0.9898139710334114
Regressor for col: V40/lower reached 0.013423741384742505
Regressor for col: V40/median reached 0.978152400536983
Regressor for col: V40/upper reached 0.9876277505597937
Regressor for col: V41/lower reached 0.01132679381121815
Regressor for col: V41/median reached 0.9804858038037623
Regressor for col: V41/upper reached 0.9883585397906929
Regressor for col: V42/lower reached 0.009649107571489013
Regressor for col: V42/median reached 0.9716055119581681
Regressor for col: V42/upper reached 0.9824451715740041
Regressor for col: V43/lower reached 0.01456916936820829
Regressor for col: V43/median reached 

Regressor for col: V85/upper reached 0.9876443279653525
Regressor for col: V86/lower reached 0.01389995595387783
Regressor for col: V86/median reached 0.9839609058029533
Regressor for col: V86/upper reached 0.9828585545784543
Regressor for col: V87/lower reached 0.010692387081056243
Regressor for col: V87/median reached 0.9831831118554928
Regressor for col: V87/upper reached 0.985072911480971
Regressor for col: V88/lower reached 0.014561644360746584
Regressor for col: V88/median reached 0.9791484265433693
Regressor for col: V88/upper reached 0.9832937547705589
Regressor for col: V89/lower reached 0.003662192888362392
Regressor for col: V89/median reached 0.9858602946097066
Regressor for col: V89/upper reached 0.9850990192393914
Regressor for col: V90/lower reached 0.013168143916643615
Regressor for col: V90/median reached 0.9850283020070988
Regressor for col: V90/upper reached 0.9871277567048742
Regressor for col: V91/lower reached -0.0017166206014246899
Regressor for col: V91/median r

Fitting model for column: V48
Fitting model for column: V49
Fitting model for column: V50
Fitting model for column: V51
Fitting model for column: V52
Fitting model for column: V53
Fitting model for column: V54
Fitting model for column: V55
Fitting model for column: V56
Fitting model for column: V57
Fitting model for column: V58
Fitting model for column: V59
Fitting model for column: V60
Fitting model for column: V61
Fitting model for column: V62
Fitting model for column: V63
Fitting model for column: V64
Fitting model for column: V65
Fitting model for column: V66
Fitting model for column: V67
Fitting model for column: V68
Fitting model for column: V69
Fitting model for column: V70
Fitting model for column: V71
Fitting model for column: V72
Fitting model for column: V73
Fitting model for column: V74
Fitting model for column: V75
Fitting model for column: V76
Fitting model for column: V77
Fitting model for column: V78
Fitting model for column: V79
Fitting model for column: V80
Fitting mo

Regressor for col: V28/median reached 0.9762545315169815
Regressor for col: V28/upper reached 0.9885526280475933
Regressor for col: V29/lower reached 0.03296908362617712
Regressor for col: V29/median reached 0.9739589223771091
Regressor for col: V29/upper reached 0.9877711173996042
Regressor for col: V30/lower reached 0.04270826608174433
Regressor for col: V30/median reached 0.9842760102832876
Regressor for col: V30/upper reached 0.9901489542058495
Regressor for col: V31/lower reached 0.03601015953890924
Regressor for col: V31/median reached 0.9789327759570894
Regressor for col: V31/upper reached 0.9887106590376922
Regressor for col: V32/lower reached 0.04235630503723242
Regressor for col: V32/median reached 0.9813147074155485
Regressor for col: V32/upper reached 0.984919757788326
Regressor for col: V33/lower reached 0.03736881308012058
Regressor for col: V33/median reached 0.9787285556038166
Regressor for col: V33/upper reached 0.9908186187843837
Regressor for col: V34/lower reached 0

Regressor for col: V76/upper reached 0.9859608636833308
Regressor for col: V77/lower reached 0.04397382059497945
Regressor for col: V77/median reached 0.98212274110384
Regressor for col: V77/upper reached 0.9883382227208237
Regressor for col: V78/lower reached 0.0625811805385359
Regressor for col: V78/median reached 0.978025774542542
Regressor for col: V78/upper reached 0.9891698456697404
Regressor for col: V79/lower reached 0.04336708369941428
Regressor for col: V79/median reached 0.9860950895042823
Regressor for col: V79/upper reached 0.9894578188959539
Regressor for col: V80/lower reached 0.03987871137339705
Regressor for col: V80/median reached 0.9768556712074069
Regressor for col: V80/upper reached 0.9877536732062608
Regressor for col: V81/lower reached 0.04229028888074127
Regressor for col: V81/median reached 0.9849412833785786
Regressor for col: V81/upper reached 0.990230680797559
Regressor for col: V82/lower reached 0.050920351517316775
Regressor for col: V82/median reached 0.9

Fitting model for column: V1
Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for column: V13
Fitting model for column: V14
Fitting model for column: V15
Fitting model for column: V16
Fitting model for column: V17
Fitting model for column: V18
Fitting model for column: V19
Fitting model for column: V20
Fitting model for column: V21
Fitting model for column: V22
Fitting model for column: V23
Fitting model for column: V24
Fitting model for column: V25
Fitting model for column: V26
Fitting model for column: V27
Fitting model for column: V28
Fitting model for column: V29
Fitting model for column: V30
Fitting model for column: V31
Fitting model for column: V32
Fitting model for column: V33
Fitting model for c

Regressor for col: V20/lower reached 0.007059355256131827
Regressor for col: V20/median reached 0.9807034411093578
Regressor for col: V20/upper reached 0.9864322388241661
Regressor for col: V21/lower reached -0.000153338883386811
Regressor for col: V21/median reached 0.974424926018443
Regressor for col: V21/upper reached 0.9841102669618564
Regressor for col: V22/lower reached 0.015962365151361624
Regressor for col: V22/median reached 0.9800418474702803
Regressor for col: V22/upper reached 0.9865141719206705
Regressor for col: V23/lower reached 0.0061091983500180524
Regressor for col: V23/median reached 0.9756667436795898
Regressor for col: V23/upper reached 0.984575199598325
Regressor for col: V24/lower reached 0.0010293450477662502
Regressor for col: V24/median reached 0.9735639394455105
Regressor for col: V24/upper reached 0.9835204869604062
Regressor for col: V25/lower reached 0.004993989640292244
Regressor for col: V25/median reached 0.9763794800616323
Regressor for col: V25/upper 

Regressor for col: V68/lower reached 0.014486560199171494
Regressor for col: V68/median reached 0.9845628774608519
Regressor for col: V68/upper reached 0.9904868334840282
Regressor for col: V69/lower reached 0.012772381976970282
Regressor for col: V69/median reached 0.9878802290815094
Regressor for col: V69/upper reached 0.9893358400403198
Regressor for col: V70/lower reached 0.027962612946105747
Regressor for col: V70/median reached 0.9742951534071194
Regressor for col: V70/upper reached 0.9875039833232749
Regressor for col: V71/lower reached 0.01499519334324545
Regressor for col: V71/median reached 0.9822771956416636
Regressor for col: V71/upper reached 0.9884691501062853
Regressor for col: V72/lower reached 0.009924203704070156
Regressor for col: V72/median reached 0.9755290594453163
Regressor for col: V72/upper reached 0.9848797939661161
Regressor for col: V73/lower reached 0.018538606483459952
Regressor for col: V73/median reached 0.9684605846884392
Regressor for col: V73/upper re

Fitting model for column: V1
Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for column: V13
Fitting model for column: V14
Fitting model for column: V15
Fitting model for column: V16
Fitting model for column: V17
Fitting model for column: V18
Fitting model for column: V19
Fitting model for column: V20
Fitting model for column: V21
Fitting model for column: V22
Fitting model for column: V23
Fitting model for column: V24
Fitting model for column: V25
Fitting model for column: V26
Fitting model for column: V27
Fitting model for column: V28
Fitting model for column: V29
Fitting model for column: V30
Fitting model for column: V31
Fitting model for column: V32
Fitting model for column: V33
Fitting model for c

Regressor for col: V20/lower reached -0.010404967171264545
Regressor for col: V20/median reached 0.9710527390174846
Regressor for col: V20/upper reached 0.9857716269680711
Regressor for col: V21/lower reached 0.01865443890806856
Regressor for col: V21/median reached 0.9768188018012864
Regressor for col: V21/upper reached 0.9870196438904588
Regressor for col: V22/lower reached 0.03383087965710485
Regressor for col: V22/median reached 0.9759094480566879
Regressor for col: V22/upper reached 0.9858392277170562
Regressor for col: V23/lower reached 0.027000627299653246
Regressor for col: V23/median reached 0.9756943595149203
Regressor for col: V23/upper reached 0.9880162416554779
Regressor for col: V24/lower reached 0.022627906013760635
Regressor for col: V24/median reached 0.9722575160291795
Regressor for col: V24/upper reached 0.9878868271925476
Regressor for col: V25/lower reached 0.028757487058197972
Regressor for col: V25/median reached 0.9756578982237599
Regressor for col: V25/upper re

Regressor for col: V68/median reached 0.9791874900795631
Regressor for col: V68/upper reached 0.9897608809736183
Regressor for col: V69/lower reached 0.015047339568439566
Regressor for col: V69/median reached 0.9752626443693071
Regressor for col: V69/upper reached 0.9867274450690822
Regressor for col: V70/lower reached 0.022276937350730545
Regressor for col: V70/median reached 0.9739592630062281
Regressor for col: V70/upper reached 0.9905667461785939
Regressor for col: V71/lower reached 0.020575415465348468
Regressor for col: V71/median reached 0.9762584708520193
Regressor for col: V71/upper reached 0.9861645667618804
Regressor for col: V72/lower reached 0.009362308983304313
Regressor for col: V72/median reached 0.9724837879315398
Regressor for col: V72/upper reached 0.9867919498128149
Regressor for col: V73/lower reached 0.02466384969450313
Regressor for col: V73/median reached 0.9716780723419716
Regressor for col: V73/upper reached 0.985352080996668
Regressor for col: V74/lower reach

Fitting model for column: V1
Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for column: V13
Fitting model for column: V14
Fitting model for column: V15
Fitting model for column: V16
Fitting model for column: V17
Fitting model for column: V18
Fitting model for column: V19
Fitting model for column: V20
Fitting model for column: V21
Fitting model for column: V22
Fitting model for column: V23
Fitting model for column: V24
Fitting model for column: V25
Fitting model for column: V26
Fitting model for column: V27
Fitting model for column: V28
Fitting model for column: V29
Fitting model for column: V30
Fitting model for column: V31
Fitting model for column: V32
Fitting model for column: V33
Fitting model for c

Regressor for col: V20/lower reached 0.013065613972701173
Regressor for col: V20/median reached 0.9642683024601001
Regressor for col: V20/upper reached 0.9749255773401
Regressor for col: V21/lower reached 0.011144466623849433
Regressor for col: V21/median reached 0.9620330487671165
Regressor for col: V21/upper reached 0.9738282524339597
Regressor for col: V22/lower reached 0.01798304065477213
Regressor for col: V22/median reached 0.9647636739038576
Regressor for col: V22/upper reached 0.9775978412695734
Regressor for col: V23/lower reached 0.017879494722182587
Regressor for col: V23/median reached 0.9681411383567378
Regressor for col: V23/upper reached 0.9725245287314039
Regressor for col: V24/lower reached 0.01784149919250072
Regressor for col: V24/median reached 0.9668180553434881
Regressor for col: V24/upper reached 0.9698789131359586
Regressor for col: V25/lower reached 0.03195781788445262
Regressor for col: V25/median reached 0.9672024442322575
Regressor for col: V25/upper reached

Regressor for col: V68/median reached 0.9639147858669918
Regressor for col: V68/upper reached 0.9681696300566537
Regressor for col: V69/lower reached 0.015353332638616102
Regressor for col: V69/median reached 0.9654939782066679
Regressor for col: V69/upper reached 0.9784314738702917
Regressor for col: V70/lower reached 0.04015385035270064
Regressor for col: V70/median reached 0.9558541783639339
Regressor for col: V70/upper reached 0.958312932056661
Regressor for col: V71/lower reached 0.020944798833844147
Regressor for col: V71/median reached 0.9669895124250368
Regressor for col: V71/upper reached 0.9779191241154708
Regressor for col: V72/lower reached 0.03622196789462966
Regressor for col: V72/median reached 0.9579088494779426
Regressor for col: V72/upper reached 0.9630622138865252
Regressor for col: V73/lower reached 0.027217448085919693
Regressor for col: V73/median reached 0.9665608880807418
Regressor for col: V73/upper reached 0.9689097377098933
Regressor for col: V74/lower reache

Cleaner: (PyODSOSOutlierDetection, SklearnImputation): {'roc_auc_score': 0.6175593220338983, 'classification_report': {'0': {'precision': 0.5647668393782384, 'recall': 0.872, 'f1-score': 0.6855345911949685, 'support': 125}, '1': {'precision': 0.68, 'recall': 0.288135593220339, 'f1-score': 0.40476190476190477, 'support': 118}, 'accuracy': 0.588477366255144, 'macro avg': {'precision': 0.6223834196891191, 'recall': 0.5800677966101695, 'f1-score': 0.5451482479784366, 'support': 243}, 'weighted avg': {'precision': 0.6207236828077358, 'recall': 0.588477366255144, 'f1-score': 0.5491922990175959, 'support': 243}}}
Fitting model for column: V1
Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for column: V13
Fitti

Regressor for col: V22/median reached 0.9732319409227104
Regressor for col: V22/upper reached 0.9877343702689911
Regressor for col: V23/lower reached 0.03293331948465417
Regressor for col: V23/median reached 0.974595846378066
Regressor for col: V23/upper reached 0.9822651621402643
Regressor for col: V24/lower reached 0.026668930665420032
Regressor for col: V24/median reached 0.9699005844638218
Regressor for col: V24/upper reached 0.9806416225978429
Regressor for col: V25/lower reached 0.03203128978680825
Regressor for col: V25/median reached 0.9740390025920916
Regressor for col: V25/upper reached 0.9815286273227282
Regressor for col: V26/lower reached 0.032049475787923165
Regressor for col: V26/median reached 0.9752576442646734
Regressor for col: V26/upper reached 0.9852693942766759
Regressor for col: V27/lower reached 0.017033453031353896
Regressor for col: V27/median reached 0.9733940394844648
Regressor for col: V27/upper reached 0.9834338867887724
Regressor for col: V28/lower reache

Regressor for col: V70/upper reached 0.9854646648787057
Regressor for col: V71/lower reached 0.03026533011709065
Regressor for col: V71/median reached 0.9825507893974627
Regressor for col: V71/upper reached 0.9859324318617207
Regressor for col: V72/lower reached 0.031198973650862993
Regressor for col: V72/median reached 0.9762052603090956
Regressor for col: V72/upper reached 0.9835814316556017
Regressor for col: V73/lower reached 0.03947804602424304
Regressor for col: V73/median reached 0.9767354812989881
Regressor for col: V73/upper reached 0.9802197120552915
Regressor for col: V74/lower reached 0.0415243168682603
Regressor for col: V74/median reached 0.9720600484693804
Regressor for col: V74/upper reached 0.9863714983888501
Regressor for col: V75/lower reached 0.030272307990548142
Regressor for col: V75/median reached 0.9738658377044696
Regressor for col: V75/upper reached 0.981738418406638
Regressor for col: V76/lower reached 0.03366085231794125
Regressor for col: V76/median reached

Column V75 contained 0 nans before, now 116
Column V76 contained 0 nans before, now 112
Column V77 contained 0 nans before, now 113
Column V78 contained 0 nans before, now 106
Column V79 contained 0 nans before, now 89
Column V80 contained 0 nans before, now 104
Column V81 contained 0 nans before, now 103
Column V82 contained 0 nans before, now 91
Column V83 contained 0 nans before, now 105
Column V84 contained 0 nans before, now 104
Column V85 contained 0 nans before, now 108
Column V86 contained 0 nans before, now 92
Column V87 contained 0 nans before, now 101
Column V88 contained 0 nans before, now 100
Column V89 contained 0 nans before, now 100
Column V90 contained 0 nans before, now 104
Column V91 contained 0 nans before, now 109
Column V92 contained 0 nans before, now 97
Column V93 contained 0 nans before, now 112
Column V94 contained 0 nans before, now 116
Column V95 contained 0 nans before, now 104
Column V96 contained 0 nans before, now 116
Column V97 contained 0 nans before, 

Regressor for col: V37/lower reached 0.04871994006239899
Regressor for col: V37/median reached 0.9854343160389304
Regressor for col: V37/upper reached 0.9898216025341883
Regressor for col: V38/lower reached 0.06200323098093913
Regressor for col: V38/median reached 0.9827545914144336
Regressor for col: V38/upper reached 0.9882297962004559
Regressor for col: V39/lower reached 0.05811474225993901
Regressor for col: V39/median reached 0.9852164477472758
Regressor for col: V39/upper reached 0.9866780763741037
Regressor for col: V40/lower reached 0.05237223536754049
Regressor for col: V40/median reached 0.9818855026906499
Regressor for col: V40/upper reached 0.9895800130883043
Regressor for col: V41/lower reached 0.04747899021188806
Regressor for col: V41/median reached 0.98536565580433
Regressor for col: V41/upper reached 0.9867326525358362
Regressor for col: V42/lower reached 0.0597160481405662
Regressor for col: V42/median reached 0.9735817586867066
Regressor for col: V42/upper reached 0.

Regressor for col: V85/median reached 0.9795519275370174
Regressor for col: V85/upper reached 0.9801416378480057
Regressor for col: V86/lower reached 0.054496293274126806
Regressor for col: V86/median reached 0.9837666578732307
Regressor for col: V86/upper reached 0.9796533640100558
Regressor for col: V87/lower reached 0.051998175207499775
Regressor for col: V87/median reached 0.9845264880437984
Regressor for col: V87/upper reached 0.9857276259857636
Regressor for col: V88/lower reached 0.042376815176671634
Regressor for col: V88/median reached 0.9807777670642274
Regressor for col: V88/upper reached 0.9791550566141158
Regressor for col: V89/lower reached 0.046599595153776596
Regressor for col: V89/median reached 0.98944638019617
Regressor for col: V89/upper reached 0.9891408714189978
Regressor for col: V90/lower reached 0.05289759482520112
Regressor for col: V90/median reached 0.9874900196654428
Regressor for col: V90/upper reached 0.9882916227697216
Regressor for col: V91/lower reache

Imputed 110 values in column V69
Imputed 118 values in column V70
Imputed 112 values in column V71
Imputed 121 values in column V72
Imputed 102 values in column V73
Imputed 103 values in column V74
Imputed 116 values in column V75
Imputed 100 values in column V76
Imputed 110 values in column V77
Imputed 113 values in column V78
Imputed 110 values in column V79
Imputed 109 values in column V80
Imputed 100 values in column V81
Imputed 95 values in column V82
Imputed 121 values in column V83
Imputed 110 values in column V84
Imputed 90 values in column V85
Imputed 115 values in column V86
Imputed 109 values in column V87
Imputed 94 values in column V88
Imputed 116 values in column V89
Imputed 115 values in column V90
Imputed 116 values in column V91
Imputed 112 values in column V92
Imputed 107 values in column V93
Imputed 91 values in column V94
Imputed 121 values in column V95
Imputed 118 values in column V96
Imputed 123 values in column V97
Imputed 114 values in column V98
Imputed 122 va

Regressor for col: V37/median reached 0.9815575743776757
Regressor for col: V37/upper reached 0.9847616313258573
Regressor for col: V38/lower reached 0.025099261415552743
Regressor for col: V38/median reached 0.9830175896401803
Regressor for col: V38/upper reached 0.9849845826996888
Regressor for col: V39/lower reached 0.013617607666221776
Regressor for col: V39/median reached 0.9817520086982262
Regressor for col: V39/upper reached 0.9834440149732988
Regressor for col: V40/lower reached 0.027612262176409608
Regressor for col: V40/median reached 0.9797757401369798
Regressor for col: V40/upper reached 0.989256339850498
Regressor for col: V41/lower reached 0.014416843640056076
Regressor for col: V41/median reached 0.9861081809173478
Regressor for col: V41/upper reached 0.9875387721632285
Regressor for col: V42/lower reached 0.013919561355646037
Regressor for col: V42/median reached 0.9835463275752174
Regressor for col: V42/upper reached 0.9879121126436952
Regressor for col: V43/lower reac

Regressor for col: V85/upper reached 0.9821064019590046
Regressor for col: V86/lower reached 0.025250085452938276
Regressor for col: V86/median reached 0.9845726990896377
Regressor for col: V86/upper reached 0.9840862587824574
Regressor for col: V87/lower reached 0.027945549939794756
Regressor for col: V87/median reached 0.9835659122779985
Regressor for col: V87/upper reached 0.987495547467172
Regressor for col: V88/lower reached 0.016283826046669947
Regressor for col: V88/median reached 0.9831170895499448
Regressor for col: V88/upper reached 0.9850967677104507
Regressor for col: V89/lower reached 0.01142776936137957
Regressor for col: V89/median reached 0.9882084656098187
Regressor for col: V89/upper reached 0.9892684699886174
Regressor for col: V90/lower reached 0.02057897513297452
Regressor for col: V90/median reached 0.9862886331928047
Regressor for col: V90/upper reached 0.9887369589772336
Regressor for col: V91/lower reached 0.021273449649395426
Regressor for col: V91/median reac

Fitting model for column: V42
Fitting model for column: V43
Fitting model for column: V44
Fitting model for column: V45
Fitting model for column: V46
Fitting model for column: V47
Fitting model for column: V48
Fitting model for column: V49
Fitting model for column: V50
Fitting model for column: V51
Fitting model for column: V52
Fitting model for column: V53
Fitting model for column: V54
Fitting model for column: V55
Fitting model for column: V56
Fitting model for column: V57
Fitting model for column: V58
Fitting model for column: V59
Fitting model for column: V60
Fitting model for column: V61
Fitting model for column: V62
Fitting model for column: V63
Fitting model for column: V64
Fitting model for column: V65
Fitting model for column: V66
Fitting model for column: V67
Fitting model for column: V68
Fitting model for column: V69
Fitting model for column: V70
Fitting model for column: V71
Fitting model for column: V72
Fitting model for column: V73
Fitting model for column: V74
Fitting mo

INFO:root:Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/thoracic_surgery



Outlier detection method: SklearnOutlierDetection, Outlier Detection Score: {'Precision': 0.6607142857142857, 'Recall': 0.8164251207729469, 'F1-score': 0.6308172077402847, 'Accuracy': 0.6872427983539094}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 6090202.724295788}
Cleaner: (SklearnOutlierDetection, DatawigImputation): {'roc_auc_score': 0.4686101694915254, 'classification_report': {'0': {'precision': 0.5036496350364964, 'recall': 0.552, 'f1-score': 0.5267175572519084, 'support': 125}, '1': {'precision': 0.4716981132075472, 'recall': 0.423728813559322, 'f1-score': 0.4464285714285714, 'support': 118}, 'accuracy': 0.4897119341563786, 'macro avg': {'precision': 0.4876738741220218, 'recall': 0.48786440677966103, 'f1-score': 0.4865730643402399, 'support': 243}, 'weighted avg': {'precision': 0.488134081226554, 'recall': 0.4897119341563786, 'f1-score': 0.48772949006197513, 'support': 243}}}


INFO:root:0.2224972s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/thoracic_surgery
INFO:openml.datasets.dataset:pickle load data thoracic_surgery
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   10.0s finished



Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.7556089743589743, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.829787234

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7395833333333333, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}
Classifier for col: DGN reached 0.7
Classifier for col: PRE6 reached 0.8500000000000001
Classifier for col: PRE7 reached 0.9366666666666666
Classifier for col: PRE8 reached 0.8433333333333334
Classifier for col: PRE9 reached 0.9266666666666666
Classifier for col: PRE10 reached 0.8833333333333333
Classifier for col: PRE11 reached 0.8466666666666667
Classifier for col: PRE14 reached 0.5266666666666666
Classifier for col: PRE17 reached 0.9

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:CategoricalEncoder for column DGN                                found only 39 occurrences of value DGN2
INFO:root:CategoricalEncoder for column DGN                                found only 30 occurrences of value DGN4
INFO:root:CategoricalEncoder for column DGN                                found only 11 occurrences of value DGN5
INFO:root:CategoricalEncoder for column DGN                                found only 3 occurrences of value DGN6
INFO:root:CategoricalEncoder for column DGN                                found only 2 occurrences of value DGN8


Regressor for col: AGE/upper reached -1.0294406256522235

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 181.8572205660556}
Cleaner: (NoOutlierDetection, SklearnImputation): {'roc_auc_score': 0.7395833333333333, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}
Fitting model for column: DGN


INFO:root:
INFO:root:Epoch[0] Batch [0-11]	Speed: 1057.15 samples/sec	cross-entropy=1.283169	DGN-accuracy=0.677083
INFO:root:Epoch[0] Train-cross-entropy=1.148601
INFO:root:Epoch[0] Train-DGN-accuracy=0.715909
INFO:root:Epoch[0] Time cost=0.315
INFO:root:Saved checkpoint to "imputer_model/model-0000.params"
INFO:root:Epoch[0] Validation-cross-entropy=1.129829
INFO:root:Epoch[0] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[1] Batch [0-11]	Speed: 1223.22 samples/sec	cross-entropy=0.828543	DGN-accuracy=0.755208
INFO:root:Epoch[1] Train-cross-entropy=0.823789
INFO:root:Epoch[1] Train-DGN-accuracy=0.758523
INFO:root:Epoch[1] Time cost=0.295
INFO:root:Saved checkpoint to "imputer_model/model-0001.params"
INFO:root:Epoch[1] Validation-cross-entropy=1.012970
INFO:root:Epoch[1] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[2] Batch [0-11]	Speed: 1076.80 samples/sec	cross-entropy=0.804120	DGN-accuracy=0.755208
INFO:root:Epoch[2] Train-cross-entropy=0.789864
INFO:root:Epoch[2] Train-DGN-ac

INFO:root:Epoch[19] Validation-cross-entropy=0.978529
INFO:root:Epoch[19] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[20] Batch [0-11]	Speed: 1064.70 samples/sec	cross-entropy=0.715209	DGN-accuracy=0.760417
INFO:root:Epoch[20] Train-cross-entropy=0.718643
INFO:root:Epoch[20] Train-DGN-accuracy=0.764205
INFO:root:Epoch[20] Time cost=0.342
INFO:root:Saved checkpoint to "imputer_model/model-0020.params"
INFO:root:Epoch[20] Validation-cross-entropy=0.975914
INFO:root:Epoch[20] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[21] Batch [0-11]	Speed: 1167.15 samples/sec	cross-entropy=0.713335	DGN-accuracy=0.760417
INFO:root:Epoch[21] Train-cross-entropy=0.716928
INFO:root:Epoch[21] Train-DGN-accuracy=0.764205
INFO:root:Epoch[21] Time cost=0.335
INFO:root:Saved checkpoint to "imputer_model/model-0021.params"
INFO:root:Epoch[21] Validation-cross-entropy=0.974494
INFO:root:Epoch[21] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[22] Batch [0-11]	Speed: 1272.58 samples/sec	cross-entropy=0

INFO:root:Epoch[39] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[40] Batch [0-11]	Speed: 1000.34 samples/sec	cross-entropy=0.684154	DGN-accuracy=0.760417
INFO:root:Epoch[40] Train-cross-entropy=0.691789
INFO:root:Epoch[40] Train-DGN-accuracy=0.767045
INFO:root:Epoch[40] Time cost=0.358
INFO:root:Saved checkpoint to "imputer_model/model-0040.params"
INFO:root:Epoch[40] Validation-cross-entropy=0.945924
INFO:root:Epoch[40] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[41] Batch [0-11]	Speed: 1000.66 samples/sec	cross-entropy=0.682296	DGN-accuracy=0.760417
INFO:root:Epoch[41] Train-cross-entropy=0.690312
INFO:root:Epoch[41] Train-DGN-accuracy=0.767045
INFO:root:Epoch[41] Time cost=0.358
INFO:root:Saved checkpoint to "imputer_model/model-0041.params"
INFO:root:Epoch[41] Validation-cross-entropy=0.944039
INFO:root:Epoch[41] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[42] Batch [0-11]	Speed: 1232.07 samples/sec	cross-entropy=0.681326	DGN-accuracy=0.760417
INFO:root:Epoch[42] Trai

INFO:root:Epoch[60] Batch [0-11]	Speed: 1237.17 samples/sec	cross-entropy=0.659732	DGN-accuracy=0.760417
INFO:root:Epoch[60] Train-cross-entropy=0.670097
INFO:root:Epoch[60] Train-DGN-accuracy=0.767045
INFO:root:Epoch[60] Time cost=0.299
INFO:root:Saved checkpoint to "imputer_model/model-0060.params"
INFO:root:Epoch[60] Validation-cross-entropy=0.922758
INFO:root:Epoch[60] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[61] Batch [0-11]	Speed: 1249.20 samples/sec	cross-entropy=0.659177	DGN-accuracy=0.760417
INFO:root:Epoch[61] Train-cross-entropy=0.669355
INFO:root:Epoch[61] Train-DGN-accuracy=0.767045
INFO:root:Epoch[61] Time cost=0.292
INFO:root:Saved checkpoint to "imputer_model/model-0061.params"
INFO:root:Epoch[61] Validation-cross-entropy=0.921580
INFO:root:Epoch[61] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[62] Batch [0-11]	Speed: 1010.28 samples/sec	cross-entropy=0.657733	DGN-accuracy=0.760417
INFO:root:Epoch[62] Train-cross-entropy=0.668291
INFO:root:Epoch[62] Train-DG

INFO:root:Epoch[80] Train-cross-entropy=0.650495
INFO:root:Epoch[80] Train-DGN-accuracy=0.772727
INFO:root:Epoch[80] Time cost=0.325
INFO:root:Saved checkpoint to "imputer_model/model-0080.params"
INFO:root:Epoch[80] Validation-cross-entropy=0.913889
INFO:root:Epoch[80] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[81] Batch [0-11]	Speed: 1235.80 samples/sec	cross-entropy=0.637131	DGN-accuracy=0.770833
INFO:root:Epoch[81] Train-cross-entropy=0.649791
INFO:root:Epoch[81] Train-DGN-accuracy=0.772727
INFO:root:Epoch[81] Time cost=0.314
INFO:root:Saved checkpoint to "imputer_model/model-0081.params"
INFO:root:Epoch[81] Validation-cross-entropy=0.914433
INFO:root:Epoch[81] Validation-DGN-accuracy=0.562500
INFO:root:Epoch[82] Batch [0-11]	Speed: 980.36 samples/sec	cross-entropy=0.635739	DGN-accuracy=0.770833
INFO:root:Epoch[82] Train-cross-entropy=0.648626
INFO:root:Epoch[82] Train-DGN-accuracy=0.772727
INFO:root:Epoch[82] Time cost=0.346
INFO:root:Saved checkpoint to "imputer_model/model

Fitting model for column: PRE6
Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25
Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 181.8572205660556}
Cleaner: (NoOutlierDetection, DatawigImputation): {'roc_auc_score': 0.7395833333333333, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.

  _warn_prf(average, modifier, msg_start, len(result))


Classifier for col: DGN reached 0.7
Classifier for col: PRE6 reached 0.8600000000000001
Classifier for col: PRE7 reached 0.9233333333333333
Classifier for col: PRE8 reached 0.84
Classifier for col: PRE9 reached 0.92
Classifier for col: PRE10 reached 0.8466666666666667
Classifier for col: PRE11 reached 0.8566666666666667
Classifier for col: PRE14 reached 0.53
Classifier for col: PRE17 reached 0.92
Classifier for col: PRE19 reached nan




Classifier for col: PRE25 reached 0.98
Classifier for col: PRE30 reached 0.8166666666666667
Classifier for col: PRE32 reached 0.9866666666666667
Regressor for col: PRE4/lower reached 0.25167235783933944
Regressor for col: PRE4/median reached 0.713105289087569
Regressor for col: PRE4/upper reached 0.40734657833757865
Regressor for col: PRE5/lower reached -0.05067128452052738
Regressor for col: PRE5/median reached 0.009460367356782073
Regressor for col: PRE5/upper reached -0.2113967258467788
Regressor for col: AGE/lower reached -0.9224868525915807
Regressor for col: AGE/median reached 0.04144619298898039
Regressor for col: AGE/upper reached -1.1064142487316617
Imputed 1 values in column DGN
Imputed 5 values in column PRE4
Imputed 25 values in column PRE5
Imputed 8 values in column AGE

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.78, 'Recall': 0.93125, 'F1-score': 0.8220616072965066, 'Accuracy': 0.8829787234042553}
Imputation method: Sklearn

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: DGN


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6
Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25
Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.78, 'Recall': 0.93125, 'F1-score': 0.8220616072965066, 'Accuracy': 0.8829787234042553}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 87.0727819160383}
Cleaner: (PyODKNNOutlierDetection, DatawigImputation): {'roc_auc_score': 0.7548076923076924, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.7121212121212122, 'Recall': 0.88125, 'F1-score': 0.7304964539007092, 'Accuracy': 0.7978723404255319}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 89.58097027556062}
Cleaner: (PyODIsolationForestOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7740384615384615, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))


Classifier for col: DGN reached 0.7266666666666667
Classifier for col: PRE6 reached 0.8033333333333333
Classifier for col: PRE7 reached 0.9233333333333333
Classifier for col: PRE8 reached 0.8366666666666667
Classifier for col: PRE9 reached 0.92
Classifier for col: PRE10 reached 0.8933333333333333
Classifier for col: PRE11 reached 0.8600000000000001
Classifier for col: PRE14 reached 0.47333333333333333
Classifier for col: PRE17 reached 0.9066666666666667
Classifier for col: PRE19 reached 0.9833333333333334
Classifier for col: PRE25 reached 0.98
Classifier for col: PRE30 reached 0.8066666666666666
Classifier for col: PRE32 reached 0.99
Regressor for col: PRE4/lower reached 0.49508547394302493
Regressor for col: PRE4/median reached 0.7008716719480362
Regressor for col: PRE4/upper reached 0.4550452847165055
Regressor for col: PRE5/lower reached -0.049596678493639024
Regressor for col: PRE5/median reached -0.03708441606162616
Regressor for col: PRE5/upper reached -1.1858389830396843
Regress

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: DGN


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6
Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25
Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6842105263157895, 'Recall': 0.85, 'F1-score': 0.6809954751131222, 'Accuracy': 0.7446808510638298}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 88.74400961241318}
Cleaner: (PyODIsolationForestOutlierDetection, DatawigImputation): {'roc_auc_score': 0.7692307692307692, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.5953800298062593, 'Recall': 0.6714285714285715, 'F1-score': 0.5886524822695036, 'Accuracy': 0.6914893617021277}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 97.51447374595362}
Cleaner: (PyODPCAOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.795673076923077, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))


Classifier for col: DGN reached 0.69
Classifier for col: PRE6 reached 0.8566666666666667
Classifier for col: PRE7 reached 0.9199999999999999
Classifier for col: PRE8 reached 0.85
Classifier for col: PRE9 reached 0.9266666666666667
Classifier for col: PRE10 reached 0.8666666666666667
Classifier for col: PRE11 reached 0.8600000000000001
Classifier for col: PRE14 reached 0.5
Classifier for col: PRE17 reached 0.9266666666666667




Classifier for col: PRE19 reached nan
Classifier for col: PRE25 reached 0.9866666666666666
Classifier for col: PRE30 reached 0.8166666666666667
Classifier for col: PRE32 reached 0.9733333333333334
Regressor for col: PRE4/lower reached 0.25490135993297236
Regressor for col: PRE4/median reached 0.6336311748311902
Regressor for col: PRE4/upper reached 0.25081144778139314
Regressor for col: PRE5/lower reached -0.11552392335802986
Regressor for col: PRE5/median reached -0.20557257628666092
Regressor for col: PRE5/upper reached -0.364460900982859
Regressor for col: AGE/lower reached -0.9157106071939706
Regressor for col: AGE/median reached 0.10827206471127315
Regressor for col: AGE/upper reached -0.961645974449131
Imputed 1 values in column DGN
Imputed 27 values in column PRE4
Imputed 33 values in column PRE5
Imputed 24 values in column AGE

Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.5953800298062593, 'Recall': 0.6714285714285715, 'F1-score': 

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: DGN


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6
Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25
Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.5953800298062593, 'Recall': 0.6714285714285715, 'F1-score': 0.5886524822695036, 'Accuracy': 0.6914893617021277}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 91.71328010468073}
Cleaner: (PyODPCAOutlierDetection, DatawigImputation): {'roc_auc_score': 0.7860576923076923, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.453488372

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODCBLOFOutlierDetection, Outlier Detection Score: {'Precision': 0.6707317073170732, 'Recall': 0.83125, 'F1-score': 0.6530416951469583, 'Accuracy': 0.7127659574468085}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 90.24771683093611}
Cleaner: (PyODCBLOFOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7403846153846154, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}


  _warn_prf(average, modifier, msg_start, len(result))


Classifier for col: DGN reached 0.7066666666666667
Classifier for col: PRE6 reached 0.8633333333333333
Classifier for col: PRE7 reached 0.9333333333333333
Classifier for col: PRE8 reached 0.8400000000000001
Classifier for col: PRE9 reached 0.8999999999999999
Classifier for col: PRE10 reached 0.87
Classifier for col: PRE11 reached 0.8633333333333333
Classifier for col: PRE14 reached 0.48333333333333334
Classifier for col: PRE17 reached 0.9233333333333333
Classifier for col: PRE19 reached nan




Classifier for col: PRE25 reached 0.9866666666666667
Classifier for col: PRE30 reached 0.82
Classifier for col: PRE32 reached 0.99
Regressor for col: PRE4/lower reached 0.33591549519231073
Regressor for col: PRE4/median reached 0.7098112972469707
Regressor for col: PRE4/upper reached 0.3366863749417581
Regressor for col: PRE5/lower reached -0.042284395097528904
Regressor for col: PRE5/median reached 0.006179931747698153
Regressor for col: PRE5/upper reached -0.19252483616304583
Regressor for col: AGE/lower reached -1.001088177795452
Regressor for col: AGE/median reached 0.05860677226685068
Regressor for col: AGE/upper reached -0.9988129107586493
Imputed 1 values in column DGN
Imputed 25 values in column PRE4
Imputed 39 values in column PRE5
Imputed 19 values in column AGE

Outlier detection method: PyODCBLOFOutlierDetection, Outlier Detection Score: {'Precision': 0.6794871794871795, 'Recall': 0.84375, 'F1-score': 0.6715583508036338, 'Accuracy': 0.7340425531914894}
Imputation method: Sk

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: DGN


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6
Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25
Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODCBLOFOutlierDetection, Outlier Detection Score: {'Precision': 0.6707317073170732, 'Recall': 0.83125, 'F1-score': 0.6530416951469583, 'Accuracy': 0.7127659574468085}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 89.9039266343805}
Cleaner: (PyODCBLOFOutlierDetection, DatawigImputation): {'roc_auc_score': 0.7467948717948718, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 181.8572205660556}
Cleaner: (PyODSOSOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.7395833333333333, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}




Classifier for col: DGN reached 0.6966666666666667
Classifier for col: PRE6 reached 0.84
Classifier for col: PRE7 reached 0.9199999999999999
Classifier for col: PRE8 reached 0.83
Classifier for col: PRE9 reached 0.9266666666666666
Classifier for col: PRE10 reached 0.9033333333333333
Classifier for col: PRE11 reached 0.84
Classifier for col: PRE14 reached 0.5233333333333333
Classifier for col: PRE17 reached 0.9199999999999999
Classifier for col: PRE19 reached 0.99
Classifier for col: PRE25 reached 0.9866666666666667
Classifier for col: PRE30 reached 0.7866666666666666
Classifier for col: PRE32 reached 0.9933333333333333
Regressor for col: PRE4/lower reached 0.3190609082063066
Regressor for col: PRE4/median reached 0.6989486291972192
Regressor for col: PRE4/upper reached 0.3796973468088385
Regressor for col: PRE5/lower reached -0.047211367014822314
Regressor for col: PRE5/median reached -0.024724283334216723
Regressor for col: PRE5/upper reached -0.39228504387832586
Regressor for col: AG

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: DGN


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6
Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25
Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 181.8572205660556}
Cleaner: (PyODSOSOutlierDetection, DatawigImputation): {'roc_auc_score': 0.7395833333333333, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}
Classifier for col: DGN reached 0.68
Classifier for col: PRE6 reached 0.85
Classifi



Regressor for col: PRE4/lower reached 0.410382186681751
Regressor for col: PRE4/median reached 0.7296156421469093
Regressor for col: PRE4/upper reached 0.42052818569816003
Regressor for col: PRE5/lower reached -0.04919971304098381
Regressor for col: PRE5/median reached -0.009992081233381408
Regressor for col: PRE5/upper reached -0.27372895011482234
Regressor for col: AGE/lower reached -0.9392971725937932
Regressor for col: AGE/median reached 0.05411572872773024
Regressor for col: AGE/upper reached -1.0220009634598946
Column DGN contained 0 nans before, now 28
Column PRE6 contained 0 nans before, now 12
Column PRE7 contained 0 nans before, now 0
Column PRE8 contained 0 nans before, now 0
Column PRE9 contained 0 nans before, now 0
Column PRE10 contained 0 nans before, now 9
Column PRE11 contained 0 nans before, now 0
Column PRE14 contained 0 nans before, now 0
Column PRE17 contained 0 nans before, now 0
Column PRE19 contained 0 nans before, now 0
Column PRE25 contained 0 nans before, now

  _warn_prf(average, modifier, msg_start, len(result))


Classifier for col: DGN reached 0.73
Classifier for col: PRE6 reached 0.8533333333333333
Classifier for col: PRE7 reached 0.9333333333333333
Classifier for col: PRE8 reached 0.8600000000000001
Classifier for col: PRE9 reached 0.9199999999999999
Classifier for col: PRE10 reached 0.8600000000000001
Classifier for col: PRE11 reached 0.8666666666666667
Classifier for col: PRE14 reached 0.5366666666666666
Classifier for col: PRE17 reached 0.9133333333333333
Classifier for col: PRE19 reached nan




Classifier for col: PRE25 reached 0.96
Classifier for col: PRE30 reached 0.7733333333333333
Classifier for col: PRE32 reached 0.9833333333333334
Regressor for col: PRE4/lower reached 0.15217185401345806
Regressor for col: PRE4/median reached 0.6799414043456633
Regressor for col: PRE4/upper reached 0.2795025936901631
Regressor for col: PRE5/lower reached -0.051945159051018375
Regressor for col: PRE5/median reached -0.10270837808757016
Regressor for col: PRE5/upper reached -0.6447756059263423
Regressor for col: AGE/lower reached -1.1264976592369116
Regressor for col: AGE/median reached 0.07875917065644311
Regressor for col: AGE/upper reached -0.8901286383543461
Column DGN contained 0 nans before, now 27
Column PRE6 contained 0 nans before, now 0
Column PRE7 contained 0 nans before, now 0
Column PRE8 contained 0 nans before, now 0
Column PRE9 contained 0 nans before, now 0
Column PRE10 contained 0 nans before, now 0
Column PRE11 contained 0 nans before, now 0
Column PRE14 contained 0 nans

  _warn_prf(average, modifier, msg_start, len(result))


Classifier for col: DGN reached 0.7
Classifier for col: PRE6 reached 0.8366666666666667
Classifier for col: PRE7 reached 0.9133333333333333
Classifier for col: PRE8 reached 0.8366666666666667
Classifier for col: PRE9 reached 0.9366666666666666
Classifier for col: PRE10 reached 0.8766666666666667
Classifier for col: PRE11 reached 0.8433333333333334
Classifier for col: PRE14 reached 0.5333333333333334
Classifier for col: PRE17 reached 0.9199999999999999




Classifier for col: PRE19 reached nan
Classifier for col: PRE25 reached 0.98
Classifier for col: PRE30 reached 0.8066666666666666
Classifier for col: PRE32 reached nan




Regressor for col: PRE4/lower reached 0.3058172693661791
Regressor for col: PRE4/median reached 0.7297936011245649
Regressor for col: PRE4/upper reached 0.38875269264542317
Regressor for col: PRE5/lower reached -0.05566354520749495
Regressor for col: PRE5/median reached -0.012051863969829268
Regressor for col: PRE5/upper reached -0.02962449818863655
Regressor for col: AGE/lower reached -0.9280874075601748
Regressor for col: AGE/median reached -0.003429618076411789
Regressor for col: AGE/upper reached -0.8701773258209586
Column DGN contained 0 nans before, now 26
Column PRE6 contained 0 nans before, now 0
Column PRE7 contained 0 nans before, now 0
Column PRE8 contained 0 nans before, now 0
Column PRE9 contained 0 nans before, now 0
Column PRE10 contained 0 nans before, now 10
Column PRE11 contained 0 nans before, now 0
Column PRE14 contained 0 nans before, now 0
Column PRE17 contained 0 nans before, now 0
Column PRE19 contained 0 nans before, now 0
Column PRE25 contained 0 nans before, 

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6
Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25
Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: SklearnOutlierDetection, Outlier Detection Score: {'Precision': 0.6772397094430993, 'Recall': 0.8267857142857142, 'F1-score': 0.6825723094993392, 'Accuracy': 0.7553191489361702}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 87.50540963682887}
Cleaner: (SklearnOutlierDetection, DatawigImputation): {'roc_auc_score': 0.7283653846153846, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.453488372

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/cleve



Best cleaning method:
Cleaning score: Cleaner: (PyODPCAOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.795673076923077, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}} 

Cleaning improved the overall score 





INFO:root:0.3449862s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/cleve
INFO:openml.datasets.dataset:pickle load data cleve
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Dataset: cleve
Found 8 categorical and 5 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    9.5s finished



Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Trestbps', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Cholesterol', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 61 rows... 

	perturbation: GaussianNoise: {'column': 'Age', 'fraction': 0.15, 'sampling': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.9030172413793103, 'classification_report': {'0': {'precision': 0.7027027027027027, 'recall': 0.896551724137931, 'f1-score': 0.787878787878788, 'support': 29}, '1': {'precision': 0.875, 'recall': 0.65625, 'f1-score': 0.75

  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9030172413793103, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall': 0.7936422413793103, 'f1-score': 0.7848032564450473, 'support': 61}, 'weighted avg': {'precision': 0.8167648272498781, 'recall': 0.7868852459016393, 'f1-score': 0.7837622617167515, 'support': 61}}}
Classifier for col: Sex reached 0.7149914089347079
Classifier for col: Chest_pain_type reached 0.5131550687285223
Classifier for col: Fasting_blood_sugar_&lt;_120 reached 0.8808526632302405




Classifier for col: Resting_ecg reached 0.570178264604811
Classifier for col: Exercise_induced_angina reached 0.7668062714776632
Classifier for col: Slope reached 0.6994201030927836
Classifier for col: Number_of_vessels_colored reached 0.595897766323024




Classifier for col: Thal reached 0.6684922680412371
Regressor for col: Age/lower reached -0.4489461968537565
Regressor for col: Age/median reached 0.1498732837469387
Regressor for col: Age/upper reached -0.9605650601942951
Regressor for col: Trestbps/lower reached -0.8980087083476912
Regressor for col: Trestbps/median reached -0.018608428684469325
Regressor for col: Trestbps/upper reached -0.9163059461793146
Regressor for col: Cholesterol/lower reached -0.8828507347491286
Regressor for col: Cholesterol/median reached -0.06636172386172057
Regressor for col: Cholesterol/upper reached -1.0927866225293719
Regressor for col: Max_heart_rate/lower reached -0.8379885678984419
Regressor for col: Max_heart_rate/median reached 0.18093607550836277
Regressor for col: Max_heart_rate/upper reached -0.4292288009594287
Regressor for col: Oldpeak/lower reached -0.28814475547907537
Regressor for col: Oldpeak/median reached 0.14777038509167306


  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:CategoricalEncoder for column Sex                                found only 68 occurrences of value 0


Regressor for col: Oldpeak/upper reached -0.7431208414949148

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 240.51202267100246}
Cleaner: (NoOutlierDetection, SklearnImputation): {'roc_auc_score': 0.9030172413793103, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall': 0.7936422413793103, 'f1-score': 0.7848032564450473, 'support': 61}, 'weighted avg': {'precision': 0.8167648272498781, 'recall': 0.7868852459016393, 'f1-score': 0.

INFO:root:
INFO:root:Epoch[0] Batch [0-7]	Speed: 1415.37 samples/sec	cross-entropy=0.802156	Sex-accuracy=0.625000
INFO:root:Epoch[0] Train-cross-entropy=0.754468
INFO:root:Epoch[0] Train-Sex-accuracy=0.620536
INFO:root:Epoch[0] Time cost=0.177
INFO:root:Saved checkpoint to "imputer_model/model-0000.params"
INFO:root:Epoch[0] Validation-cross-entropy=0.999723
INFO:root:Epoch[0] Validation-Sex-accuracy=0.468750
INFO:root:Epoch[1] Batch [0-7]	Speed: 1432.04 samples/sec	cross-entropy=0.552122	Sex-accuracy=0.742188
INFO:root:Epoch[1] Train-cross-entropy=0.566636
INFO:root:Epoch[1] Train-Sex-accuracy=0.723214
INFO:root:Epoch[1] Time cost=0.175
INFO:root:Saved checkpoint to "imputer_model/model-0001.params"
INFO:root:Epoch[1] Validation-cross-entropy=1.098230
INFO:root:Epoch[1] Validation-Sex-accuracy=0.437500
INFO:root:Epoch[2] Batch [0-7]	Speed: 1341.95 samples/sec	cross-entropy=0.532192	Sex-accuracy=0.742188
INFO:root:Epoch[2] Train-cross-entropy=0.542108
INFO:root:Epoch[2] Train-Sex-accur

Fitting model for column: Chest_pain_type


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Fasting_blood_sugar_&lt;_120


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Resting_ecg
Fitting model for column: Exercise_induced_angina
Fitting model for column: Slope
Fitting model for column: Number_of_vessels_colored
Fitting model for column: Thal


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Age
Fitting model for column: Trestbps
Fitting model for column: Cholesterol
Fitting model for column: Max_heart_rate
Fitting model for column: Oldpeak


  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 240.51202267100246}
Cleaner: (NoOutlierDetection, DatawigImputation): {'roc_auc_score': 0.9030172413793103, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall': 0.7936422413793103, 'f1-score': 0.7848032564450473, 'support': 61}, 'weighted avg': {'precision': 0.8167648272498781, 'recall': 0.7868852459016393, 'f1-score': 0.7837622617167515, 'support': 61}}}

Outlier detection method:



Classifier for col: Thal reached 0.6477126288659794
Regressor for col: Age/lower reached -0.7213676557502594
Regressor for col: Age/median reached 0.2376822557378756
Regressor for col: Age/upper reached -1.1010809355255495
Regressor for col: Trestbps/lower reached -0.925575552208987
Regressor for col: Trestbps/median reached -0.05581137978479034
Regressor for col: Trestbps/upper reached -1.1545285086824744
Regressor for col: Cholesterol/lower reached -1.0601906747054284
Regressor for col: Cholesterol/median reached -0.03499168189203705
Regressor for col: Cholesterol/upper reached -1.30946021786481
Regressor for col: Max_heart_rate/lower reached -0.4358441115162951
Regressor for col: Max_heart_rate/median reached 0.22463712123853946
Regressor for col: Max_heart_rate/upper reached -0.5031342084521818
Regressor for col: Oldpeak/lower reached -0.33501165236458985
Regressor for col: Oldpeak/median reached 0.20488120458051834
Regressor for col: Oldpeak/upper reached -0.3681306876175209
Imput

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Fasting_blood_sugar_&lt;_120


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Resting_ecg
Fitting model for column: Exercise_induced_angina
Fitting model for column: Slope
Fitting model for column: Number_of_vessels_colored


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Thal


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Age
Fitting model for column: Trestbps
Fitting model for column: Cholesterol
Fitting model for column: Max_heart_rate
Fitting model for column: Oldpeak

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.8553571428571429, 'Recall': 0.7126068376068375, 'F1-score': 0.7579365079365079, 'Accuracy': 0.9016393442622951}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 25.466269064375837}
Cleaner: (PyODKNNOutlierDetection, DatawigImputation): {'roc_auc_score': 0.8922413793103449, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall': 0.7936422413793103, 



Classifier for col: Resting_ecg reached 0.6009450171821307
Classifier for col: Exercise_induced_angina reached 0.7408182989690721
Classifier for col: Slope reached 0.6994737972508591
Classifier for col: Number_of_vessels_colored reached 0.5541774054982818




Classifier for col: Thal reached 0.6786941580756014
Regressor for col: Age/lower reached -0.5947894156133896
Regressor for col: Age/median reached 0.05416972353506799
Regressor for col: Age/upper reached -0.7623191765007091
Regressor for col: Trestbps/lower reached -1.1067846874193266
Regressor for col: Trestbps/median reached -0.01387935835334575
Regressor for col: Trestbps/upper reached -0.7973860568250115
Regressor for col: Cholesterol/lower reached -1.099244994459577
Regressor for col: Cholesterol/median reached -0.07428004493645257
Regressor for col: Cholesterol/upper reached -0.819581305200832
Regressor for col: Max_heart_rate/lower reached -0.3417763630101154
Regressor for col: Max_heart_rate/median reached 0.09124630195991656
Regressor for col: Max_heart_rate/upper reached -0.538034295930859
Regressor for col: Oldpeak/lower reached -0.5505810393100579
Regressor for col: Oldpeak/median reached 0.13230387985072894
Regressor for col: Oldpeak/upper reached -0.32779454275773146
Impu

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Fasting_blood_sugar_&lt;_120


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Resting_ecg
Fitting model for column: Exercise_induced_angina
Fitting model for column: Slope
Fitting model for column: Number_of_vessels_colored


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Thal


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Age
Fitting model for column: Trestbps
Fitting model for column: Cholesterol
Fitting model for column: Max_heart_rate
Fitting model for column: Oldpeak

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.5979020979020979, 'Recall': 0.6794871794871795, 'F1-score': 0.5891527827011698, 'Accuracy': 0.6885245901639344}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 45.66663673744087}
Cleaner: (PyODIsolationForestOutlierDetection, DatawigImputation): {'roc_auc_score': 0.9030172413793105, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall



Classifier for col: Resting_ecg reached 0.5389819587628866
Classifier for col: Exercise_induced_angina reached 0.7304553264604812
Classifier for col: Slope reached 0.6113079896907216
Classifier for col: Number_of_vessels_colored reached 0.5493449312714777




Classifier for col: Thal reached 0.6527598797250859
Regressor for col: Age/lower reached -0.8067443592399213
Regressor for col: Age/median reached 0.2778816903218435
Regressor for col: Age/upper reached -0.686727567441659
Regressor for col: Trestbps/lower reached -0.8036333650305246
Regressor for col: Trestbps/median reached -0.030724489801689008
Regressor for col: Trestbps/upper reached -0.7612436832796231
Regressor for col: Cholesterol/lower reached -0.9793548710212072
Regressor for col: Cholesterol/median reached 0.012390888977747205
Regressor for col: Cholesterol/upper reached -1.2842113444478467
Regressor for col: Max_heart_rate/lower reached -0.5632948942029192
Regressor for col: Max_heart_rate/median reached 0.24411799477371854
Regressor for col: Max_heart_rate/upper reached -0.579510957502309
Regressor for col: Oldpeak/lower reached -0.4876447538751295
Regressor for col: Oldpeak/median reached 0.14563338857550734
Regressor for col: Oldpeak/upper reached -0.5607990881772965
Impu

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Fasting_blood_sugar_&lt;_120


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Resting_ecg
Fitting model for column: Exercise_induced_angina
Fitting model for column: Slope
Fitting model for column: Number_of_vessels_colored
Fitting model for column: Thal


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Age
Fitting model for column: Trestbps
Fitting model for column: Cholesterol
Fitting model for column: Max_heart_rate
Fitting model for column: Oldpeak

Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.6221804511278195, 'Recall': 0.7083333333333333, 'F1-score': 0.6291793313069909, 'Accuracy': 0.7377049180327869}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 51.22223906372765}
Cleaner: (PyODPCAOutlierDetection, DatawigImputation): {'roc_auc_score': 0.9051724137931034, 'classification_report': {'0': {'precision': 0.7428571428571429, 'recall': 0.896551724137931, 'f1-score': 0.8125, 'support': 29}, '1': {'precision': 0.8846153846153846, 'recall': 0.71875, 'f1-score': 0.7931034482758621, 'support': 32}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.8137362637362637, 'recall': 0.8076508620689655, 'f1-score': 0.



Classifier for col: Resting_ecg reached 0.5235180412371134
Classifier for col: Exercise_induced_angina reached 0.7614905498281787
Classifier for col: Slope reached 0.6368664089347079
Classifier for col: Number_of_vessels_colored reached 0.5854810996563574
Classifier for col: Thal reached 0.6112006013745704
Regressor for col: Age/lower reached -0.8164288711603134
Regressor for col: Age/median reached 0.21113711297890508
Regressor for col: Age/upper reached -0.7282433648317094
Regressor for col: Trestbps/lower reached -0.7817659875711944
Regressor for col: Trestbps/median reached -0.038596186015152245
Regressor for col: Trestbps/upper reached -0.9791124783675859
Regressor for col: Cholesterol/lower reached -0.9603449706947667
Regressor for col: Cholesterol/median reached 0.003785664658643771
Regressor for col: Cholesterol/upper reached -1.3121673263715705
Regressor for col: Max_heart_rate/lower reached -0.3840590700389154
Regressor for col: Max_heart_rate/median reached 0.231364961753678

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Fasting_blood_sugar_&lt;_120


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Resting_ecg
Fitting model for column: Exercise_induced_angina
Fitting model for column: Slope
Fitting model for column: Number_of_vessels_colored


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Thal


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Age
Fitting model for column: Trestbps
Fitting model for column: Cholesterol
Fitting model for column: Max_heart_rate
Fitting model for column: Oldpeak

Outlier detection method: PyODCBLOFOutlierDetection, Outlier Detection Score: {'Precision': 0.6053571428571429, 'Recall': 0.6891025641025641, 'F1-score': 0.6021739130434783, 'Accuracy': 0.7049180327868853}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 34.48704819059479}
Cleaner: (PyODCBLOFOutlierDetection, DatawigImputation): {'roc_auc_score': 0.9008620689655172, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall': 0.793642241379310

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 240.51202267100246}
Cleaner: (PyODSOSOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.9030172413793103, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall': 0.7936422413793103, 'f1-score': 0.7848032564450473, 'support': 61}, 'weighted avg': {'precision': 0.8167648272498781, 'recall': 0.7868852459016393, 'f1-score': 0.7837622617167515, 'support': 61}}}
Classifier for



Classifier for col: Thal reached 0.6581829896907216
Regressor for col: Age/lower reached -0.7724018460266779
Regressor for col: Age/median reached 0.06643033904200446
Regressor for col: Age/upper reached -0.9565356532648693
Regressor for col: Trestbps/lower reached -1.017871143603691
Regressor for col: Trestbps/median reached 0.018058682240952306
Regressor for col: Trestbps/upper reached -0.8782290128719775
Regressor for col: Cholesterol/lower reached -0.9916487820323737
Regressor for col: Cholesterol/median reached -0.0022938694666379553
Regressor for col: Cholesterol/upper reached -1.124129040226502
Regressor for col: Max_heart_rate/lower reached -0.6338288604695976
Regressor for col: Max_heart_rate/median reached 0.22090735436318604
Regressor for col: Max_heart_rate/upper reached -0.5008555415243203
Regressor for col: Oldpeak/lower reached -0.4630463526787607
Regressor for col: Oldpeak/median reached 0.11212159289198553
Regressor for col: Oldpeak/upper reached -0.6147565906836772

O

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Sex
Fitting model for column: Chest_pain_type


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Fasting_blood_sugar_&lt;_120


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Resting_ecg
Fitting model for column: Exercise_induced_angina
Fitting model for column: Slope
Fitting model for column: Number_of_vessels_colored


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Thal


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Age
Fitting model for column: Trestbps
Fitting model for column: Cholesterol
Fitting model for column: Max_heart_rate
Fitting model for column: Oldpeak


  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.4262295081967213, 'Recall': 0.5, 'F1-score': 0.4601769911504424, 'Accuracy': 0.8524590163934426}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 240.51202267100246}
Cleaner: (PyODSOSOutlierDetection, DatawigImputation): {'roc_auc_score': 0.9030172413793103, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall': 0.7936422413793103, 'f1-score': 0.7848032564450473, 'support': 61}, 'weighted avg': {'precision': 0.8167648272498781, 'recall': 0.7868852459016393, 'f1-score': 0.7837622617167515, 'support': 61}}}
Classifier for c



Classifier for col: Resting_ecg reached 0.5595468213058419
Classifier for col: Exercise_induced_angina reached 0.72009235395189
Classifier for col: Slope reached 0.6683311855670103
Classifier for col: Number_of_vessels_colored reached 0.5440829037800687




Classifier for col: Thal reached 0.6476589347079038
Regressor for col: Age/lower reached -0.5766472131082458
Regressor for col: Age/median reached 0.2749029189572614
Regressor for col: Age/upper reached -0.512825592188012
Regressor for col: Trestbps/lower reached -0.9223629271426538
Regressor for col: Trestbps/median reached -0.012085155480302145
Regressor for col: Trestbps/upper reached -1.0490360503147484
Regressor for col: Cholesterol/lower reached -0.9464269803669836
Regressor for col: Cholesterol/median reached 0.00032338067124837444
Regressor for col: Cholesterol/upper reached -0.7694006641146126
Regressor for col: Max_heart_rate/lower reached -0.5328393205938144
Regressor for col: Max_heart_rate/median reached 0.1856940404374045
Regressor for col: Max_heart_rate/upper reached -0.42616688130682756
Regressor for col: Oldpeak/lower reached -0.4525390837167441
Regressor for col: Oldpeak/median reached 0.1427290625990193
Regressor for col: Oldpeak/upper reached -0.5907599518908768
Co



Classifier for col: Resting_ecg reached 0.5385524054982818
Classifier for col: Exercise_induced_angina reached 0.7356636597938144
Classifier for col: Slope reached 0.6476589347079038
Classifier for col: Number_of_vessels_colored reached 0.518041237113402




Classifier for col: Thal reached 0.6786941580756014
Regressor for col: Age/lower reached -0.8031182870516235
Regressor for col: Age/median reached 0.17288635378728356
Regressor for col: Age/upper reached -0.7995581914307084
Regressor for col: Trestbps/lower reached -0.8472589439676947
Regressor for col: Trestbps/median reached 0.02853981711463699
Regressor for col: Trestbps/upper reached -1.1686735703845992
Regressor for col: Cholesterol/lower reached -1.081096698188462
Regressor for col: Cholesterol/median reached -0.04006640552041463
Regressor for col: Cholesterol/upper reached -1.3044263440956727
Regressor for col: Max_heart_rate/lower reached -0.49336978066985004
Regressor for col: Max_heart_rate/median reached 0.33145568771188527
Regressor for col: Max_heart_rate/upper reached -0.5673686031932373
Regressor for col: Oldpeak/lower reached -0.36030780210395796
Regressor for col: Oldpeak/median reached 0.20432012468912453
Regressor for col: Oldpeak/upper reached -0.43329448019799177
C

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Fasting_blood_sugar_&lt;_120


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Resting_ecg
Fitting model for column: Exercise_induced_angina
Fitting model for column: Slope
Fitting model for column: Number_of_vessels_colored
Fitting model for column: Thal


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: Age
Fitting model for column: Trestbps
Fitting model for column: Cholesterol
Fitting model for column: Max_heart_rate
Fitting model for column: Oldpeak

Outlier detection method: SklearnOutlierDetection, Outlier Detection Score: {'Precision': 0.7239304812834224, 'Recall': 0.8579059829059829, 'F1-score': 0.7556089743589743, 'Accuracy': 0.8360655737704918}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 35.012183648524065}
Cleaner: (SklearnOutlierDetection, DatawigImputation): {'roc_auc_score': 0.9148706896551724, 'classification_report': {'0': {'precision': 0.7105263157894737, 'recall': 0.9310344827586207, 'f1-score': 0.8059701492537312, 'support': 29}, '1': {'precision': 0.9130434782608695, 'recall': 0.65625, 'f1-score': 0.7636363636363634, 'support': 32}, 'accuracy': 0.7868852459016393, 'macro avg': {'precision': 0.8117848970251715, 'recall': 0.7936422413793103, 

INFO:root:Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/acute-inflammations


Imputed 24 values in column Age
Imputed 16 values in column Trestbps
Imputed 12 values in column Cholesterol
Imputed 25 values in column Max_heart_rate
Imputed 21 values in column Oldpeak

Best cleaning method:
Cleaning score: Cleaner: (PyODIsolationForestOutlierDetection, SklearnImputation): {'roc_auc_score': 0.915948275862069, 'classification_report': {'0': {'precision': 0.7297297297297297, 'recall': 0.9310344827586207, 'f1-score': 0.8181818181818181, 'support': 29}, '1': {'precision': 0.9166666666666666, 'recall': 0.6875, 'f1-score': 0.7857142857142857, 'support': 32}, 'accuracy': 0.8032786885245902, 'macro avg': {'precision': 0.8231981981981982, 'recall': 0.8092672413793103, 'f1-score': 0.801948051948052, 'support': 61}, 'weighted avg': {'precision': 0.8277950081228769, 'recall': 0.8032786885245902, 'f1-score': 0.8011496700021291, 'support': 61}}} 

Cleaning improved the overall score 





INFO:root:0.2079654s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/acute-inflammations
INFO:openml.datasets.dataset:pickle load data acute-inflammations
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Dataset: acute-inflammations
Found 5 categorical and 1 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    8.8s finished
  _warn_prf(average, modifier, msg_start, len(result))



Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 24 rows... 

	perturbation: GaussianNoise: {'column': 'V1', 'fraction': 0.15, 'sampling': 'MNAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-scor

  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:CategoricalEncoder for column V2                                found only 67 occurrences of value no
INFO:root:CategoricalEncoder for column V2                                found only 19 occurrences of value yes


Regressor for col: V1/upper reached 0.43926554051039735

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.120436716031803}
Cleaner: (NoOutlierDetection, SklearnImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}
Fitting model for column: V2


INFO:root:
INFO:root:Epoch[0] Batch [0-3]	Speed: 1385.72 samples/sec	cross-entropy=0.858397	V2-accuracy=0.671875
INFO:root:Epoch[0] Train-cross-entropy=0.819330
INFO:root:Epoch[0] Train-V2-accuracy=0.635417
INFO:root:Epoch[0] Time cost=0.080
INFO:root:Saved checkpoint to "imputer_model/model-0000.params"
INFO:root:Epoch[0] Validation-cross-entropy=0.646277
INFO:root:Epoch[0] Validation-V2-accuracy=0.875000
INFO:root:Epoch[1] Batch [0-3]	Speed: 1364.57 samples/sec	cross-entropy=0.424263	V2-accuracy=0.859375
INFO:root:Epoch[1] Train-cross-entropy=0.441194
INFO:root:Epoch[1] Train-V2-accuracy=0.843750
INFO:root:Epoch[1] Time cost=0.081
INFO:root:Saved checkpoint to "imputer_model/model-0001.params"
INFO:root:Epoch[1] Validation-cross-entropy=0.626115
INFO:root:Epoch[1] Validation-V2-accuracy=0.500000
INFO:root:Epoch[2] Batch [0-3]	Speed: 1173.78 samples/sec	cross-entropy=0.336970	V2-accuracy=0.796875
INFO:root:Epoch[2] Train-cross-entropy=0.361045
INFO:root:Epoch[2] Train-V2-accuracy=0.80

INFO:root:Epoch[20] Train-cross-entropy=0.199446
INFO:root:Epoch[20] Train-V2-accuracy=1.000000
INFO:root:Epoch[20] Time cost=0.094
INFO:root:Saved checkpoint to "imputer_model/model-0020.params"
INFO:root:Epoch[20] Validation-cross-entropy=0.333078
INFO:root:Epoch[20] Validation-V2-accuracy=1.000000
INFO:root:Epoch[21] Batch [0-3]	Speed: 1219.72 samples/sec	cross-entropy=0.158569	V2-accuracy=1.000000
INFO:root:Epoch[21] Train-cross-entropy=0.194850
INFO:root:Epoch[21] Train-V2-accuracy=1.000000
INFO:root:Epoch[21] Time cost=0.085
INFO:root:Saved checkpoint to "imputer_model/model-0021.params"
INFO:root:Epoch[21] Validation-cross-entropy=0.324085
INFO:root:Epoch[21] Validation-V2-accuracy=1.000000
INFO:root:Epoch[22] Batch [0-3]	Speed: 1110.20 samples/sec	cross-entropy=0.154730	V2-accuracy=1.000000
INFO:root:Epoch[22] Train-cross-entropy=0.190370
INFO:root:Epoch[22] Train-V2-accuracy=1.000000
INFO:root:Epoch[22] Time cost=0.089
INFO:root:Saved checkpoint to "imputer_model/model-0022.pa

INFO:root:Epoch[40] Time cost=0.094
INFO:root:Saved checkpoint to "imputer_model/model-0040.params"
INFO:root:Epoch[40] Validation-cross-entropy=0.194519
INFO:root:Epoch[40] Validation-V2-accuracy=1.000000
INFO:root:Epoch[41] Batch [0-3]	Speed: 1027.75 samples/sec	cross-entropy=0.099150	V2-accuracy=1.000000
INFO:root:Epoch[41] Train-cross-entropy=0.125557
INFO:root:Epoch[41] Train-V2-accuracy=1.000000
INFO:root:Epoch[41] Time cost=0.095
INFO:root:Saved checkpoint to "imputer_model/model-0041.params"
INFO:root:Epoch[41] Validation-cross-entropy=0.189627
INFO:root:Epoch[41] Validation-V2-accuracy=1.000000
INFO:root:Epoch[42] Batch [0-3]	Speed: 1178.25 samples/sec	cross-entropy=0.097052	V2-accuracy=1.000000
INFO:root:Epoch[42] Train-cross-entropy=0.123030
INFO:root:Epoch[42] Train-V2-accuracy=1.000000
INFO:root:Epoch[42] Time cost=0.087
INFO:root:Saved checkpoint to "imputer_model/model-0042.params"
INFO:root:Epoch[42] Validation-cross-entropy=0.184981
INFO:root:Epoch[42] Validation-V2-ac

INFO:root:Epoch[60] Validation-cross-entropy=0.121080
INFO:root:Epoch[60] Validation-V2-accuracy=1.000000
INFO:root:Epoch[61] Batch [0-3]	Speed: 989.17 samples/sec	cross-entropy=0.066893	V2-accuracy=1.000000
INFO:root:Epoch[61] Train-cross-entropy=0.086646
INFO:root:Epoch[61] Train-V2-accuracy=1.000000
INFO:root:Epoch[61] Time cost=0.096
INFO:root:Saved checkpoint to "imputer_model/model-0061.params"
INFO:root:Epoch[61] Validation-cross-entropy=0.118675
INFO:root:Epoch[61] Validation-V2-accuracy=1.000000
INFO:root:Epoch[62] Batch [0-3]	Speed: 1299.44 samples/sec	cross-entropy=0.065735	V2-accuracy=1.000000
INFO:root:Epoch[62] Train-cross-entropy=0.085141
INFO:root:Epoch[62] Train-V2-accuracy=1.000000
INFO:root:Epoch[62] Time cost=0.086
INFO:root:Saved checkpoint to "imputer_model/model-0062.params"
INFO:root:Epoch[62] Validation-cross-entropy=0.116318
INFO:root:Epoch[62] Validation-V2-accuracy=1.000000
INFO:root:Epoch[63] Batch [0-3]	Speed: 1104.13 samples/sec	cross-entropy=0.064629	V2-

INFO:root:Epoch[81] Batch [0-3]	Speed: 1004.94 samples/sec	cross-entropy=0.047444	V2-accuracy=1.000000
INFO:root:Epoch[81] Train-cross-entropy=0.063029
INFO:root:Epoch[81] Train-V2-accuracy=1.000000
INFO:root:Epoch[81] Time cost=0.095
INFO:root:Saved checkpoint to "imputer_model/model-0081.params"
INFO:root:Epoch[81] Validation-cross-entropy=0.078092
INFO:root:Epoch[81] Validation-V2-accuracy=1.000000
INFO:root:Epoch[82] Batch [0-3]	Speed: 1211.70 samples/sec	cross-entropy=0.046760	V2-accuracy=1.000000
INFO:root:Epoch[82] Train-cross-entropy=0.062083
INFO:root:Epoch[82] Train-V2-accuracy=1.000000
INFO:root:Epoch[82] Time cost=0.092
INFO:root:Saved checkpoint to "imputer_model/model-0082.params"
INFO:root:Epoch[82] Validation-cross-entropy=0.076678
INFO:root:Epoch[82] Validation-V2-accuracy=1.000000
INFO:root:Epoch[83] Batch [0-3]	Speed: 1046.58 samples/sec	cross-entropy=0.046029	V2-accuracy=1.000000
INFO:root:Epoch[83] Train-cross-entropy=0.061149
INFO:root:Epoch[83] Train-V2-accuracy=

Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V1


  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.120436716031803}
Cleaner: (NoOutlierDetection, DatawigImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.6190476190476191, 'Recall': 0.6190476190476191, 'F1-score': 0.6190476190476191, 'Accuracy': 0.8333333333333334}
Imputation method: MeanModeImputation, Impu

Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V1

Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.42105263157894735, 'Recall': 0.38095238095238093, 'F1-score': 0.4, 'Accuracy': 0.6666666666666666}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.8742057500011853}
Cleaner: (PyODPCAOutlierDetection, DatawigImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}

Outlier detection method: PyODCBLOFOutlierDetection, Outlier Det

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.120436716031803}
Cleaner: (PyODSOSOutlierDetection, MeanModeImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}
Classifier for col: V2 reached 0.986842105263158
Classifier for col: V3 reached 0.9210526315789473
Classifier for col: V4 reached 0.8947368421052632
Classifier for col: V5 reached 0.8421052631578947
Classifier for col: V6 reached 0.8421

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V1


  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: PyODSOSOutlierDetection, Outlier Detection Score: {'Precision': 0.4375, 'Recall': 0.5, 'F1-score': 0.4666666666666667, 'Accuracy': 0.875}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.120436716031803}
Cleaner: (PyODSOSOutlierDetection, DatawigImputation): {'roc_auc_score': 1.0, 'classification_report': {'no': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'yes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, 'accuracy': 1.0, 'macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}, 'weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 24}}}
Classifier for col: V2 reached 0.9473684210526316
Classifier for col: V3 reached 1.0
Classifier for col: V4 reached 0.9210526315789473
Classifier for col: V5 reached 0.9342105263157894
Classifier for col: V6 reached 0.7763157894736843
Reg

INFO:root:Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/hill-valley



Outlier detection method: SklearnOutlierDetection, Outlier Detection Score: {'Precision': 0.6388888888888888, 'Recall': 0.7380952380952381, 'F1-score': 0.6581196581196581, 'Accuracy': 0.7916666666666666}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.6714850812927455}
Cleaner: (SklearnOutlierDetection, DatawigImputation): {'roc_auc_score': 0.9166666666666667, 'classification_report': {'no': {'precision': 1.0, 'recall': 0.8333333333333334, 'f1-score': 0.9090909090909091, 'support': 12}, 'yes': {'precision': 0.8571428571428571, 'recall': 1.0, 'f1-score': 0.923076923076923, 'support': 12}, 'accuracy': 0.9166666666666666, 'macro avg': {'precision': 0.9285714285714286, 'recall': 0.9166666666666667, 'f1-score': 0.916083916083916, 'support': 24}, 'weighted avg': {'precision': 0.9285714285714285, 'recall': 0.9166666666666666, 'f1-score': 0.9160839160839161, 'support': 24}}}

Best cleaning meth

INFO:root:0.2458034s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/hill-valley
INFO:openml.datasets.dataset:pickle load data hill-valley
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Dataset: hill-valley
Found 0 categorical and 100 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   29.0s finished



Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V62', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V23', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V19', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V94', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 243 rows... 

	perturbation: GaussianNoise: {'column': 'V85', 'fraction': 0.15, 'sampling': 'MNAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_score': 0.5369692036358703, 'classification_report': {'0': {'precision': 0.5098039215686274, 'recall': 0.4444444444444444, 'f1-score': 0.4748858447488584, 'support': 117}, '1': {'precision': 0.5390070921985816, 'recall': 0.6031746031746031, 'f1-score'

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.42592592592592593, 'Recall': 0.5, 'F1-score': 0.46, 'Accuracy': 0.8518518518518519}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 856051217.1060634}
Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.5381223714557049, 'classification_report': {'0': {'precision': 0.5098039215686274, 'recall': 0.4444444444444444, 'f1-score': 0.4748858447488584, 'support': 117}, '1': {'precision': 0.5390070921985816, 'recall': 0.6031746031746031, 'f1-score': 0.5692883895131086, 'support': 126}, 'accuracy': 0.5267489711934157, 'macro avg': {'precision': 0.5244055068836044, 'recall': 0.5238095238095237, 'f1-score': 0.5220871171309835, 'support': 243}, 'weighted avg': {'precision': 0.5249463063397147, 'recall': 0.5267489711934157, 'f1-score': 0.5238353124043955, 'support': 243}}}
Regressor for col: V1/l

Regressor for col: V43/median reached 0.9819432292772436
Regressor for col: V43/upper reached 0.9840793831203847
Regressor for col: V44/lower reached 0.004240318318935232
Regressor for col: V44/median reached 0.9806965418481556
Regressor for col: V44/upper reached 0.9856065575953392
Regressor for col: V45/lower reached -0.0007592611456800884
Regressor for col: V45/median reached 0.9776498630014125
Regressor for col: V45/upper reached 0.982842298768926
Regressor for col: V46/lower reached 0.0030238935625656604
Regressor for col: V46/median reached 0.9839919095917453
Regressor for col: V46/upper reached 0.9888579500529207
Regressor for col: V47/lower reached -0.009727161179797328
Regressor for col: V47/median reached 0.9821829021861173
Regressor for col: V47/upper reached 0.9867338690958904
Regressor for col: V48/lower reached 0.0019981487759401246
Regressor for col: V48/median reached 0.976848567977659
Regressor for col: V48/upper reached 0.9867512366630407
Regressor for col: V49/lower 

Regressor for col: V91/median reached 0.9798305813821957
Regressor for col: V91/upper reached 0.9846522967974078
Regressor for col: V92/lower reached -0.018899662350772006
Regressor for col: V92/median reached 0.9802325219272481
Regressor for col: V92/upper reached 0.9847693325631727
Regressor for col: V93/lower reached -0.009699530267371859
Regressor for col: V93/median reached 0.984453012647541
Regressor for col: V93/upper reached 0.9838948471643078
Regressor for col: V94/lower reached -0.010236816829831685
Regressor for col: V94/median reached 0.9832578763205994
Regressor for col: V94/upper reached 0.9867963707143294
Regressor for col: V95/lower reached -0.021513675829908263
Regressor for col: V95/median reached 0.9770959498760721
Regressor for col: V95/upper reached 0.9860405347435076
Regressor for col: V96/lower reached -0.007160159145449596
Regressor for col: V96/median reached 0.9812462251725956
Regressor for col: V96/upper reached 0.9834064944182725
Regressor for col: V97/lower

  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:
INFO:root:Epoch[0] Batch [0-28]	Speed: 12989.50 samples/sec	cross-entropy=9.103411	V1-accuracy=0.000000
INFO:root:Epoch[0] Train-cross-entropy=5.059879
INFO:root:Epoch[0] Train-V1-accuracy=0.000000
INFO:root:Epoch[0] Time cost=0.070
INFO:root:Saved checkpoint to "imputer_model/model-0000.params"
INFO:root:Epoch[0] Validation-cross-entropy=0.301719
INFO:root:Epoch[0] Validation-V1-accuracy=0.000000
INFO:root:Epoch[1] Batch [0-28]	Speed: 11584.05 samples/sec	cross-entropy=0.073311	V1-accuracy=0.000000
INFO:root:Epoch[1] Train-cross-entropy=0.076012
INFO:root:Epoch[1] Train-V1-accuracy=0.000000
INFO:root:Epoch[1] Time cost=0.079
INFO:root:Saved checkpoint to "imputer_model/model-0001.params"
INFO:root:Epoch[1] Validation-cross-entropy=0.306025
INFO:root:Epoch[1] Validation-V1-accuracy=0.000000
INFO:root:Epoch[2] Batch [0-28]	Speed: 9427.20 samples/sec	cross-entropy=0.048466	V1-accuracy=0.000000
INFO:root:Epoch[2] Train-cros

INFO:root:Epoch[19] Train-V1-accuracy=0.000000
INFO:root:Epoch[19] Time cost=0.093
INFO:root:Saved checkpoint to "imputer_model/model-0019.params"
INFO:root:Epoch[19] Validation-cross-entropy=0.199708
INFO:root:Epoch[19] Validation-V1-accuracy=0.000000
INFO:root:Epoch[20] Batch [0-28]	Speed: 10345.70 samples/sec	cross-entropy=0.067078	V1-accuracy=0.000000
INFO:root:Epoch[20] Train-cross-entropy=0.072184
INFO:root:Epoch[20] Train-V1-accuracy=0.000000
INFO:root:Epoch[20] Time cost=0.092
INFO:root:Saved checkpoint to "imputer_model/model-0020.params"
INFO:root:Epoch[20] Validation-cross-entropy=0.189388
INFO:root:Epoch[20] Validation-V1-accuracy=0.000000
INFO:root:Epoch[21] Batch [0-28]	Speed: 12578.22 samples/sec	cross-entropy=0.064511	V1-accuracy=0.000000
INFO:root:Epoch[21] Train-cross-entropy=0.071216
INFO:root:Epoch[21] Train-V1-accuracy=0.000000
INFO:root:Epoch[21] Time cost=0.080
INFO:root:Saved checkpoint to "imputer_model/model-0021.params"
INFO:root:Epoch[21] Validation-cross-en

INFO:root:Saved checkpoint to "imputer_model/model-0039.params"
INFO:root:Epoch[39] Validation-cross-entropy=0.152251
INFO:root:Epoch[39] Validation-V1-accuracy=0.000000
INFO:root:Epoch[40] Batch [0-28]	Speed: 12567.46 samples/sec	cross-entropy=0.043380	V1-accuracy=0.000000
INFO:root:Epoch[40] Train-cross-entropy=0.043701
INFO:root:Epoch[40] Train-V1-accuracy=0.000000
INFO:root:Epoch[40] Time cost=0.078
INFO:root:Saved checkpoint to "imputer_model/model-0040.params"
INFO:root:Epoch[40] Validation-cross-entropy=0.146563
INFO:root:Epoch[40] Validation-V1-accuracy=0.000000
INFO:root:Epoch[41] Batch [0-28]	Speed: 11862.98 samples/sec	cross-entropy=0.041919	V1-accuracy=0.000000
INFO:root:Epoch[41] Train-cross-entropy=0.041856
INFO:root:Epoch[41] Train-V1-accuracy=0.000000
INFO:root:Epoch[41] Time cost=0.081
INFO:root:Saved checkpoint to "imputer_model/model-0041.params"
INFO:root:Epoch[41] Validation-cross-entropy=0.143526
INFO:root:Epoch[41] Validation-V1-accuracy=0.000000
INFO:root:Epoch[

Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for column: V13
Fitting model for column: V14
Fitting model for column: V15
Fitting model for column: V16
Fitting model for column: V17
Fitting model for column: V18
Fitting model for column: V19
Fitting model for column: V20
Fitting model for column: V21
Fitting model for column: V22
Fitting model for column: V23
Fitting model for column: V24
Fitting model for column: V25
Fitting model for column: V26
Fitting model for column: V27
Fitting model for column: V28
Fitting model for column: V29
Fitting model for column: V30
Fitting model for column: V31
Fitting model for column: V32
Fitting model for column: V33
Fitting model for column: V34
Fitting model for 

  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.42592592592592593, 'Recall': 0.5, 'F1-score': 0.46, 'Accuracy': 0.8518518518518519}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 856051217.1060634}
Cleaner: (NoOutlierDetection, DatawigImputation): {'roc_auc_score': 0.5381223714557049, 'classification_report': {'0': {'precision': 0.5098039215686274, 'recall': 0.4444444444444444, 'f1-score': 0.4748858447488584, 'support': 117}, '1': {'precision': 0.5390070921985816, 'recall': 0.6031746031746031, 'f1-score': 0.5692883895131086, 'support': 126}, 'accuracy': 0.5267489711934157, 'macro avg': {'precision': 0.5244055068836044, 'recall': 0.5238095238095237, 'f1-score': 0.5220871171309835, 'support': 243}, 'weighted avg': {'precision': 0.5249463063397147, 'recall': 0.5267489711934157, 'f1-score': 0.5238353124043955, 'support': 243}}}

Outlier detection method

Regressor for col: V37/upper reached 0.9874627620661733
Regressor for col: V38/lower reached 0.018599936949284523
Regressor for col: V38/median reached 0.9792763630964259
Regressor for col: V38/upper reached 0.9871170417670911
Regressor for col: V39/lower reached 0.003472771222534754
Regressor for col: V39/median reached 0.9818332358256416
Regressor for col: V39/upper reached 0.9830188248044878
Regressor for col: V40/lower reached 0.00805131097784667
Regressor for col: V40/median reached 0.9862703986088992
Regressor for col: V40/upper reached 0.9881432979218918
Regressor for col: V41/lower reached 0.01161418328679753
Regressor for col: V41/median reached 0.9794868759607084
Regressor for col: V41/upper reached 0.9841122921146438
Regressor for col: V42/lower reached 0.007408879673761306
Regressor for col: V42/median reached 0.976157685510151
Regressor for col: V42/upper reached 0.9834928463191015
Regressor for col: V43/lower reached 0.005441431508807271
Regressor for col: V43/median reac

Regressor for col: V85/upper reached 0.9850902583505804
Regressor for col: V86/lower reached -0.0017265292472734561
Regressor for col: V86/median reached 0.983494047617425
Regressor for col: V86/upper reached 0.9776994437233726
Regressor for col: V87/lower reached 0.0001708838964943893
Regressor for col: V87/median reached 0.9790651182557012
Regressor for col: V87/upper reached 0.9854045631720934
Regressor for col: V88/lower reached -0.0025869074112889745
Regressor for col: V88/median reached 0.978134784033108
Regressor for col: V88/upper reached 0.984602368868047
Regressor for col: V89/lower reached 0.0017265635782626187
Regressor for col: V89/median reached 0.9857482149664556
Regressor for col: V89/upper reached 0.9922216548642575
Regressor for col: V90/lower reached -0.0009675925325359214
Regressor for col: V90/median reached 0.9839932044013154
Regressor for col: V90/upper reached 0.9842480362966246
Regressor for col: V91/lower reached -0.003925589265781959
Regressor for col: V91/me

Fitting model for column: V47
Fitting model for column: V48
Fitting model for column: V49
Fitting model for column: V50
Fitting model for column: V51
Fitting model for column: V52
Fitting model for column: V53
Fitting model for column: V54
Fitting model for column: V55
Fitting model for column: V56
Fitting model for column: V57
Fitting model for column: V58
Fitting model for column: V59
Fitting model for column: V60
Fitting model for column: V61
Fitting model for column: V62
Fitting model for column: V63
Fitting model for column: V64
Fitting model for column: V65
Fitting model for column: V66
Fitting model for column: V67
Fitting model for column: V68
Fitting model for column: V69
Fitting model for column: V70
Fitting model for column: V71
Fitting model for column: V72
Fitting model for column: V73
Fitting model for column: V74
Fitting model for column: V75
Fitting model for column: V76
Fitting model for column: V77
Fitting model for column: V78
Fitting model for column: V79
Fitting mo

Regressor for col: V27/upper reached 0.9876213879532867
Regressor for col: V28/lower reached 0.021218520080321594
Regressor for col: V28/median reached 0.9806492242340346
Regressor for col: V28/upper reached 0.9814391779604085
Regressor for col: V29/lower reached -0.0027957229605298783
Regressor for col: V29/median reached 0.987496987273435
Regressor for col: V29/upper reached 0.9899152899861124
Regressor for col: V30/lower reached -0.0017216037148469199
Regressor for col: V30/median reached 0.9834066781214392
Regressor for col: V30/upper reached 0.9873085020859563
Regressor for col: V31/lower reached 0.024868659145683203
Regressor for col: V31/median reached 0.9883940435320002
Regressor for col: V31/upper reached 0.9846479971157802
Regressor for col: V32/lower reached 0.02050740808356271
Regressor for col: V32/median reached 0.983078663153227
Regressor for col: V32/upper reached 0.9822359896003108
Regressor for col: V33/lower reached 0.013133329205253494
Regressor for col: V33/median 

Regressor for col: V76/lower reached 0.010587956148380762
Regressor for col: V76/median reached 0.988336487894105
Regressor for col: V76/upper reached 0.9890284512987185
Regressor for col: V77/lower reached 0.017030267793697262
Regressor for col: V77/median reached 0.9873326559303033
Regressor for col: V77/upper reached 0.9888678892595133
Regressor for col: V78/lower reached 0.02030966190351391
Regressor for col: V78/median reached 0.9886282817945091
Regressor for col: V78/upper reached 0.9898617232748363
Regressor for col: V79/lower reached 0.009230374894824145
Regressor for col: V79/median reached 0.9879237400011162
Regressor for col: V79/upper reached 0.987996858149112
Regressor for col: V80/lower reached 0.0113010393288751
Regressor for col: V80/median reached 0.9866907773894237
Regressor for col: V80/upper reached 0.9881282481893441
Regressor for col: V81/lower reached 0.01221390437463754
Regressor for col: V81/median reached 0.9883247845883905
Regressor for col: V81/upper reached

Fitting model for column: V1
Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for column: V13
Fitting model for column: V14
Fitting model for column: V15
Fitting model for column: V16
Fitting model for column: V17
Fitting model for column: V18
Fitting model for column: V19
Fitting model for column: V20
Fitting model for column: V21
Fitting model for column: V22
Fitting model for column: V23
Fitting model for column: V24
Fitting model for column: V25
Fitting model for column: V26
Fitting model for column: V27
Fitting model for column: V28
Fitting model for column: V29
Fitting model for column: V30
Fitting model for column: V31
Fitting model for column: V32
Fitting model for column: V33
Fitting model for c

Regressor for col: V19/upper reached 0.980602055854646
Regressor for col: V20/lower reached 0.020655074456957245
Regressor for col: V20/median reached 0.9621943561080614
Regressor for col: V20/upper reached 0.9698883465246334
Regressor for col: V21/lower reached 0.008577870115635822
Regressor for col: V21/median reached 0.9682786993930594
Regressor for col: V21/upper reached 0.9688148248844218
Regressor for col: V22/lower reached 0.02468699654107298
Regressor for col: V22/median reached 0.9645602634134927
Regressor for col: V22/upper reached 0.9796274756094081
Regressor for col: V23/lower reached 0.028802672883340097
Regressor for col: V23/median reached 0.9712739702643909
Regressor for col: V23/upper reached 0.9708103786017046
Regressor for col: V24/lower reached 0.012937693319478805
Regressor for col: V24/median reached 0.9696160795322755
Regressor for col: V24/upper reached 0.972112722747497
Regressor for col: V25/lower reached 0.021535708779076346
Regressor for col: V25/median reac

Regressor for col: V68/lower reached 0.003497112951586112
Regressor for col: V68/median reached 0.9772845255792575
Regressor for col: V68/upper reached 0.9874848242890186
Regressor for col: V69/lower reached 0.01076490925684892
Regressor for col: V69/median reached 0.9801633336467411
Regressor for col: V69/upper reached 0.9859559864438344
Regressor for col: V70/lower reached 0.0207668319482624
Regressor for col: V70/median reached 0.9725187204613476
Regressor for col: V70/upper reached 0.9853592541550504
Regressor for col: V71/lower reached 0.014076111991057572
Regressor for col: V71/median reached 0.9832917607704998
Regressor for col: V71/upper reached 0.9836210798906901
Regressor for col: V72/lower reached 0.025474056420029145
Regressor for col: V72/median reached 0.9772515497549532
Regressor for col: V72/upper reached 0.9867321776506454
Regressor for col: V73/lower reached 0.025028148791168603
Regressor for col: V73/median reached 0.9768387883311407
Regressor for col: V73/upper reac

Cleaner: (PyODPCAOutlierDetection, SklearnImputation): {'roc_auc_score': 0.5105820105820106, 'classification_report': {'0': {'precision': 0.5068493150684932, 'recall': 0.3162393162393162, 'f1-score': 0.3894736842105263, 'support': 117}, '1': {'precision': 0.5294117647058824, 'recall': 0.7142857142857143, 'f1-score': 0.6081081081081081, 'support': 126}, 'accuracy': 0.522633744855967, 'macro avg': {'precision': 0.5181305398871878, 'recall': 0.5152625152625152, 'f1-score': 0.4987908961593172, 'support': 243}, 'weighted avg': {'precision': 0.518548363028621, 'recall': 0.522633744855967, 'f1-score': 0.5028396817870502, 'support': 243}}}
Fitting model for column: V1
Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting m

Regressor for col: V16/lower reached 0.0016511158556203887
Regressor for col: V16/median reached 0.970522479182863
Regressor for col: V16/upper reached 0.9805643830860505
Regressor for col: V17/lower reached 0.004074936054971612
Regressor for col: V17/median reached 0.9690721364215452
Regressor for col: V17/upper reached 0.9852330754813109
Regressor for col: V18/lower reached 0.0008960274047399835
Regressor for col: V18/median reached 0.966427638904438
Regressor for col: V18/upper reached 0.9861076984159332
Regressor for col: V19/lower reached 0.0025135993721275857
Regressor for col: V19/median reached 0.9813655372818257
Regressor for col: V19/upper reached 0.9862213806364746
Regressor for col: V20/lower reached 0.00122428157845772
Regressor for col: V20/median reached 0.98022407563755
Regressor for col: V20/upper reached 0.9825961914665255
Regressor for col: V21/lower reached -0.006499474670641101
Regressor for col: V21/median reached 0.9730600382135688
Regressor for col: V21/upper re

Regressor for col: V64/lower reached 0.007025053679675598
Regressor for col: V64/median reached 0.9780008059775851
Regressor for col: V64/upper reached 0.9875003974474438
Regressor for col: V65/lower reached -0.003909297496540232
Regressor for col: V65/median reached 0.9798507776299983
Regressor for col: V65/upper reached 0.9885210718569051
Regressor for col: V66/lower reached -0.0077382500283616484
Regressor for col: V66/median reached 0.9850342990400511
Regressor for col: V66/upper reached 0.9894195987383644
Regressor for col: V67/lower reached -0.006320963147132708
Regressor for col: V67/median reached 0.9855016483248231
Regressor for col: V67/upper reached 0.9886508520929567
Regressor for col: V68/lower reached -0.0034037463767571108
Regressor for col: V68/median reached 0.9847074707447566
Regressor for col: V68/upper reached 0.9888111790107978
Regressor for col: V69/lower reached -0.004486136480342906
Regressor for col: V69/median reached 0.9817959450425673
Regressor for col: V69/

Cleaner: (PyODCBLOFOutlierDetection, SklearnImputation): {'roc_auc_score': 0.5076312576312577, 'classification_report': {'0': {'precision': 0.488, 'recall': 0.5213675213675214, 'f1-score': 0.5041322314049588, 'support': 117}, '1': {'precision': 0.5254237288135594, 'recall': 0.49206349206349204, 'f1-score': 0.5081967213114755, 'support': 126}, 'accuracy': 0.5061728395061729, 'macro avg': {'precision': 0.5067118644067796, 'recall': 0.5067155067155067, 'f1-score': 0.5061644763582172, 'support': 243}, 'weighted avg': {'precision': 0.5074048964218456, 'recall': 0.5061728395061729, 'f1-score': 0.5062397446898192, 'support': 243}}}
Fitting model for column: V1
Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model fo

Regressor for col: V16/lower reached 0.032319368090012124
Regressor for col: V16/median reached 0.9776837500516049
Regressor for col: V16/upper reached 0.9858766764014122
Regressor for col: V17/lower reached 0.03577245002593238
Regressor for col: V17/median reached 0.9749775658498683
Regressor for col: V17/upper reached 0.9866514201921854
Regressor for col: V18/lower reached 0.03147803433779295
Regressor for col: V18/median reached 0.9648714457764535
Regressor for col: V18/upper reached 0.9799772232733748
Regressor for col: V19/lower reached 0.03344310521196431
Regressor for col: V19/median reached 0.9806286595392071
Regressor for col: V19/upper reached 0.9842639584152658
Regressor for col: V20/lower reached 0.040491919986651304
Regressor for col: V20/median reached 0.9757587254057574
Regressor for col: V20/upper reached 0.9815870839349911
Regressor for col: V21/lower reached 0.03703592262620786
Regressor for col: V21/median reached 0.9725253080846021
Regressor for col: V21/upper reach

Regressor for col: V64/median reached 0.9828667403509883
Regressor for col: V64/upper reached 0.987421016173946
Regressor for col: V65/lower reached 0.03503092847946432
Regressor for col: V65/median reached 0.9826091537165412
Regressor for col: V65/upper reached 0.9900592913088724
Regressor for col: V66/lower reached 0.04161231528849363
Regressor for col: V66/median reached 0.9891882917648347
Regressor for col: V66/upper reached 0.9879717751657086
Regressor for col: V67/lower reached 0.031649618600863794
Regressor for col: V67/median reached 0.9893217718825469
Regressor for col: V67/upper reached 0.9911376793662321
Regressor for col: V68/lower reached 0.040620216805581644
Regressor for col: V68/median reached 0.9872716270381344
Regressor for col: V68/upper reached 0.9900610366670108
Regressor for col: V69/lower reached 0.0395131844973754
Regressor for col: V69/median reached 0.9813275276033657
Regressor for col: V69/upper reached 0.9881582064772962
Regressor for col: V70/lower reached 

Cleaner: (PyODSOSOutlierDetection, SklearnImputation): {'roc_auc_score': 0.5353412020078686, 'classification_report': {'0': {'precision': 0.5092592592592593, 'recall': 0.4700854700854701, 'f1-score': 0.4888888888888889, 'support': 117}, '1': {'precision': 0.5407407407407407, 'recall': 0.5793650793650794, 'f1-score': 0.5593869731800766, 'support': 126}, 'accuracy': 0.5267489711934157, 'macro avg': {'precision': 0.525, 'recall': 0.5247252747252747, 'f1-score': 0.5241379310344827, 'support': 243}, 'weighted avg': {'precision': 0.5255829903978052, 'recall': 0.5267489711934157, 'f1-score': 0.5254434511139492, 'support': 243}}}
Fitting model for column: V1
Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for c

Regressor for col: V22/median reached 0.9693921338630025
Regressor for col: V22/upper reached 0.9865972340522258
Regressor for col: V23/lower reached 0.026947925972370224
Regressor for col: V23/median reached 0.9669881743055239
Regressor for col: V23/upper reached 0.9871820709675212
Regressor for col: V24/lower reached 0.02580887824853617
Regressor for col: V24/median reached 0.9671763188970803
Regressor for col: V24/upper reached 0.9889950831720613
Regressor for col: V25/lower reached 0.03429498284751725
Regressor for col: V25/median reached 0.9688153850399062
Regressor for col: V25/upper reached 0.9841735481221139
Regressor for col: V26/lower reached 0.03240710722446083
Regressor for col: V26/median reached 0.9748459231636588
Regressor for col: V26/upper reached 0.9894077716344203
Regressor for col: V27/lower reached 0.022662529353156324
Regressor for col: V27/median reached 0.9768527797036751
Regressor for col: V27/upper reached 0.9882277116669854
Regressor for col: V28/lower reache

Regressor for col: V70/upper reached 0.9860320716003792
Regressor for col: V71/lower reached 0.017411285195424564
Regressor for col: V71/median reached 0.9769224623606589
Regressor for col: V71/upper reached 0.9895233836577673
Regressor for col: V72/lower reached 0.02051878719232264
Regressor for col: V72/median reached 0.9726978695797716
Regressor for col: V72/upper reached 0.9884176386703729
Regressor for col: V73/lower reached 0.03526850227914563
Regressor for col: V73/median reached 0.9734093774467214
Regressor for col: V73/upper reached 0.9833464893561878
Regressor for col: V74/lower reached 0.030071309258638435
Regressor for col: V74/median reached 0.9712138740140874
Regressor for col: V74/upper reached 0.9873848689255864
Regressor for col: V75/lower reached 0.019140755846003665
Regressor for col: V75/median reached 0.9785374116741155
Regressor for col: V75/upper reached 0.9872994173995864
Regressor for col: V76/lower reached 0.023573899475242244
Regressor for col: V76/median rea

Column V71 contained 0 nans before, now 106
Column V72 contained 0 nans before, now 110
Column V73 contained 0 nans before, now 111
Column V74 contained 0 nans before, now 122
Column V75 contained 0 nans before, now 114
Column V76 contained 0 nans before, now 108
Column V77 contained 0 nans before, now 114
Column V78 contained 0 nans before, now 127
Column V79 contained 0 nans before, now 113
Column V80 contained 0 nans before, now 118
Column V81 contained 0 nans before, now 117
Column V82 contained 0 nans before, now 89
Column V83 contained 0 nans before, now 107
Column V84 contained 0 nans before, now 107
Column V85 contained 0 nans before, now 133
Column V86 contained 0 nans before, now 106
Column V87 contained 0 nans before, now 120
Column V88 contained 0 nans before, now 106
Column V89 contained 0 nans before, now 107
Column V90 contained 0 nans before, now 120
Column V91 contained 0 nans before, now 104
Column V92 contained 0 nans before, now 133
Column V93 contained 0 nans befor

Regressor for col: V35/upper reached 0.9867072562558379
Regressor for col: V36/lower reached -0.018272832071544398
Regressor for col: V36/median reached 0.9827085920006748
Regressor for col: V36/upper reached 0.9853529502743441
Regressor for col: V37/lower reached -0.022217418131506572
Regressor for col: V37/median reached 0.9824482197751465
Regressor for col: V37/upper reached 0.9879872578281872
Regressor for col: V38/lower reached -0.01524379626928729
Regressor for col: V38/median reached 0.9826491504720309
Regressor for col: V38/upper reached 0.9881451652171278
Regressor for col: V39/lower reached -0.016038446888050673
Regressor for col: V39/median reached 0.9843046577644188
Regressor for col: V39/upper reached 0.9872108173475007
Regressor for col: V40/lower reached -0.014384288526097189
Regressor for col: V40/median reached 0.9819947527634543
Regressor for col: V40/upper reached 0.986655132125531
Regressor for col: V41/lower reached -0.010841695593630242
Regressor for col: V41/medi

Regressor for col: V83/upper reached 0.9860378536883903
Regressor for col: V84/lower reached -0.021796861040424664
Regressor for col: V84/median reached 0.9897386448629901
Regressor for col: V84/upper reached 0.9870302336521849
Regressor for col: V85/lower reached -0.013019379186933633
Regressor for col: V85/median reached 0.9775651604449287
Regressor for col: V85/upper reached 0.9822016288206892
Regressor for col: V86/lower reached -0.014317107787571226
Regressor for col: V86/median reached 0.9864095155391737
Regressor for col: V86/upper reached 0.98773847139138
Regressor for col: V87/lower reached -0.011768759745557467
Regressor for col: V87/median reached 0.9869358271049167
Regressor for col: V87/upper reached 0.9837530514589796
Regressor for col: V88/lower reached -0.022962105121627996
Regressor for col: V88/median reached 0.9769724325719762
Regressor for col: V88/upper reached 0.9699545610240083
Regressor for col: V89/lower reached -0.015659297288629692
Regressor for col: V89/medi

Imputed 104 values in column V59
Imputed 104 values in column V60
Imputed 93 values in column V61
Imputed 110 values in column V62
Imputed 94 values in column V63
Imputed 104 values in column V64
Imputed 99 values in column V65
Imputed 100 values in column V66
Imputed 112 values in column V67
Imputed 108 values in column V68
Imputed 105 values in column V69
Imputed 117 values in column V70
Imputed 112 values in column V71
Imputed 120 values in column V72
Imputed 116 values in column V73
Imputed 102 values in column V74
Imputed 100 values in column V75
Imputed 120 values in column V76
Imputed 122 values in column V77
Imputed 122 values in column V78
Imputed 122 values in column V79
Imputed 134 values in column V80
Imputed 97 values in column V81
Imputed 122 values in column V82
Imputed 101 values in column V83
Imputed 98 values in column V84
Imputed 137 values in column V85
Imputed 101 values in column V86
Imputed 100 values in column V87
Imputed 93 values in column V88
Imputed 120 valu

Regressor for col: V35/median reached 0.985934489965596
Regressor for col: V35/upper reached 0.9903740869439315
Regressor for col: V36/lower reached 0.008534679882763298
Regressor for col: V36/median reached 0.9755109032528277
Regressor for col: V36/upper reached 0.9803911300900651
Regressor for col: V37/lower reached 0.009121382215754315
Regressor for col: V37/median reached 0.9838318709912761
Regressor for col: V37/upper reached 0.9884512074514344
Regressor for col: V38/lower reached 0.030311316442663216
Regressor for col: V38/median reached 0.9800091026484813
Regressor for col: V38/upper reached 0.9865786175328761
Regressor for col: V39/lower reached 0.004444886230498657
Regressor for col: V39/median reached 0.9879252113188977
Regressor for col: V39/upper reached 0.9883986564843332
Regressor for col: V40/lower reached 0.011556935990730377
Regressor for col: V40/median reached 0.9868759612642092
Regressor for col: V40/upper reached 0.9863942795215641
Regressor for col: V41/lower reac

Regressor for col: V83/median reached 0.9813151905154734
Regressor for col: V83/upper reached 0.9892789239023576
Regressor for col: V84/lower reached 0.015310615569619601
Regressor for col: V84/median reached 0.9886190520684995
Regressor for col: V84/upper reached 0.9897027684853472
Regressor for col: V85/lower reached 0.0013440727925282392
Regressor for col: V85/median reached 0.9851645583543984
Regressor for col: V85/upper reached 0.9831403721447063
Regressor for col: V86/lower reached 0.013421147264203515
Regressor for col: V86/median reached 0.984266596315233
Regressor for col: V86/upper reached 0.9834251224770725
Regressor for col: V87/lower reached 0.012438656554407124
Regressor for col: V87/median reached 0.9826902074776702
Regressor for col: V87/upper reached 0.9811348253577512
Regressor for col: V88/lower reached 0.018435098482515733
Regressor for col: V88/median reached 0.9805308664181687
Regressor for col: V88/upper reached 0.9860230704692416
Regressor for col: V89/lower rea

Fitting model for column: V28
Fitting model for column: V29
Fitting model for column: V30
Fitting model for column: V31
Fitting model for column: V32
Fitting model for column: V33
Fitting model for column: V34
Fitting model for column: V35
Fitting model for column: V36
Fitting model for column: V37
Fitting model for column: V38
Fitting model for column: V39
Fitting model for column: V40
Fitting model for column: V41
Fitting model for column: V42
Fitting model for column: V43
Fitting model for column: V44
Fitting model for column: V45
Fitting model for column: V46
Fitting model for column: V47
Fitting model for column: V48
Fitting model for column: V49
Fitting model for column: V50
Fitting model for column: V51
Fitting model for column: V52
Fitting model for column: V53
Fitting model for column: V54
Fitting model for column: V55
Fitting model for column: V56
Fitting model for column: V57
Fitting model for column: V58
Fitting model for column: V59
Fitting model for column: V60
Fitting mo

Regressor for col: V30/median reached 0.9822284426867648
Regressor for col: V30/upper reached 0.9903593159015125
Regressor for col: V31/lower reached 0.004760141798470341
Regressor for col: V31/median reached 0.9823532449616244
Regressor for col: V31/upper reached 0.9850335784757092
Regressor for col: V32/lower reached 0.00810175813748698
Regressor for col: V32/median reached 0.9811946426276941
Regressor for col: V32/upper reached 0.9855533444175364
Regressor for col: V33/lower reached 0.014571939335650153
Regressor for col: V33/median reached 0.98497081583878
Regressor for col: V33/upper reached 0.9888811277118265
Regressor for col: V34/lower reached 0.020127912639132273
Regressor for col: V34/median reached 0.9861399156001329
Regressor for col: V34/upper reached 0.9853622595931121
Regressor for col: V35/lower reached 0.015154609174677258
Regressor for col: V35/median reached 0.9893389619627541
Regressor for col: V35/upper reached 0.9903303731905071
Regressor for col: V36/lower reache

Regressor for col: V78/median reached 0.9874761532079722
Regressor for col: V78/upper reached 0.9878421349393576
Regressor for col: V79/lower reached 0.0111897990749808
Regressor for col: V79/median reached 0.9904962489559608
Regressor for col: V79/upper reached 0.9893794331590886
Regressor for col: V80/lower reached 0.016137643364590615
Regressor for col: V80/median reached 0.988960350644432
Regressor for col: V80/upper reached 0.9868493530000704
Regressor for col: V81/lower reached 0.011469737367351907
Regressor for col: V81/median reached 0.9911126621064915
Regressor for col: V81/upper reached 0.9878046228325421
Regressor for col: V82/lower reached 0.0035154293133102255
Regressor for col: V82/median reached 0.9854726477911571
Regressor for col: V82/upper reached 0.9845157588219924
Regressor for col: V83/lower reached 0.007708688681978659
Regressor for col: V83/median reached 0.9872022152859223
Regressor for col: V83/upper reached 0.984405513827181
Regressor for col: V84/lower reache

Fitting model for column: V2
Fitting model for column: V3
Fitting model for column: V4
Fitting model for column: V5
Fitting model for column: V6
Fitting model for column: V7
Fitting model for column: V8
Fitting model for column: V9
Fitting model for column: V10
Fitting model for column: V11
Fitting model for column: V12
Fitting model for column: V13
Fitting model for column: V14
Fitting model for column: V15
Fitting model for column: V16
Fitting model for column: V17
Fitting model for column: V18
Fitting model for column: V19
Fitting model for column: V20
Fitting model for column: V21
Fitting model for column: V22
Fitting model for column: V23
Fitting model for column: V24
Fitting model for column: V25
Fitting model for column: V26
Fitting model for column: V27
Fitting model for column: V28
Fitting model for column: V29
Fitting model for column: V30
Fitting model for column: V31
Fitting model for column: V32
Fitting model for column: V33
Fitting model for column: V34
Fitting model for 

INFO:root:Starting [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/thoracic_surgery



Best cleaning method:
Cleaning score: Cleaner: (SklearnOutlierDetection, DatawigImputation): {'roc_auc_score': 0.5698005698005698, 'classification_report': {'0': {'precision': 0.5182481751824818, 'recall': 0.6068376068376068, 'f1-score': 0.5590551181102362, 'support': 117}, '1': {'precision': 0.5660377358490566, 'recall': 0.47619047619047616, 'f1-score': 0.5172413793103449, 'support': 126}, 'accuracy': 0.5390946502057613, 'macro avg': {'precision': 0.5421429555157692, 'recall': 0.5415140415140415, 'f1-score': 0.5381482487102905, 'support': 243}, 'weighted avg': {'precision': 0.5430279473799651, 'recall': 0.5390946502057613, 'f1-score': 0.5373739202139962, 'support': 243}}} 

Cleaning improved the overall score 





INFO:root:0.3095865s taken for [get] request for the URL https://www.openml.org/api/v1/xml/data/list/limit/10000/offset/0/status/active/data_name/thoracic_surgery
INFO:openml.datasets.dataset:pickle load data thoracic_surgery
Level 25:autogluon.core.utils.utils:No path specified. Models will be saved in: "AutogluonModels/ag-20210419_050547/"
INFO:autogluon.tabular.learner.default_learner:Beginning AutoGluon training ...
INFO:autogluon.tabular.learner.default_learner:AutoGluon will save models to "AutogluonModels/ag-20210419_050547/"
INFO:autogluon.tabular.learner.default_learner:AutoGluon Version:  0.1.0
INFO:autogluon.tabular.learner.default_learner:Train Data Rows:    376
INFO:autogluon.tabular.learner.default_learner:Train Data Columns: 16
INFO:autogluon.tabular.learner.default_learner:Preprocessing data ...
Level 25:autogluon.core.utils.utils:AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
INFO:autogluon.core.utils.utils:	2 uni

Dataset: thoracic_surgery
Found 13 categorical and 3 numeric features 



INFO:autogluon.tabular.trainer.auto_trainer:Automatically generating train/validation split with holdout_frac=0.2, Train Rows: 300, Val Rows: 76
INFO:autogluon.tabular.trainer.abstract_trainer:Fitting model: RandomForestGini ...
INFO:autogluon.tabular.trainer.abstract_trainer:	0.8684	 = Validation accuracy score
INFO:autogluon.tabular.trainer.abstract_trainer:	0.67s	 = Training runtime
INFO:autogluon.tabular.trainer.abstract_trainer:	0.08s	 = Validation runtime
INFO:autogluon.tabular.trainer.abstract_trainer:Fitting model: RandomForestEntr ...
INFO:autogluon.tabular.trainer.abstract_trainer:	0.8684	 = Validation accuracy score
INFO:autogluon.tabular.trainer.abstract_trainer:	0.63s	 = Training runtime
INFO:autogluon.tabular.trainer.abstract_trainer:	0.07s	 = Validation runtime
INFO:autogluon.tabular.trainer.abstract_trainer:Fitting model: ExtraTreesGini ...
INFO:autogluon.tabular.trainer.abstract_trainer:	0.8684	 = Validation accuracy score
INFO:autogluon.tabular.trainer.abstract_traine

Epoch 27: early stopping
█

INFO:autogluon.tabular.trainer.abstract_trainer:	0.8553	 = Validation accuracy score
INFO:autogluon.tabular.trainer.abstract_trainer:	11.79s	 = Training runtime
INFO:autogluon.tabular.trainer.abstract_trainer:	0.18s	 = Validation runtime
INFO:autogluon.tabular.trainer.abstract_trainer:Fitting model: LightGBMLarge ...
INFO:autogluon.tabular.trainer.abstract_trainer:	0.8553	 = Validation accuracy score
INFO:autogluon.tabular.trainer.abstract_trainer:	0.94s	 = Training runtime
INFO:autogluon.tabular.trainer.abstract_trainer:	0.02s	 = Validation runtime


█

INFO:autogluon.tabular.trainer.abstract_trainer:Fitting model: WeightedEnsemble_L2 ...
INFO:autogluon.tabular.trainer.abstract_trainer:	0.8684	 = Validation accuracy score
INFO:autogluon.tabular.trainer.abstract_trainer:	0.32s	 = Training runtime
INFO:autogluon.tabular.trainer.abstract_trainer:	0.0s	 = Validation runtime
INFO:autogluon.tabular.learner.default_learner:AutoGluon training complete, total runtime = 19.32s ...
INFO:root:TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20210419_050547/")
INFO:autogluon.tabular.learner.abstract_learner:Evaluation: accuracy on test data: 0.8297872340425532
INFO:autogluon.tabular.learner.abstract_learner:Evaluations on test data:
INFO:autogluon.tabular.learner.abstract_learner:{
    "accuracy": 0.8297872340425532,
    "accuracy_score": 0.8297872340425532,
    "balanced_accuracy_score": 0.5,
    "matthews_corrcoef": 0.0,
    "f1_score": 0.8297872340425532
}
  _warn_prf(average, modifier, msg_start, len(


Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MNAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE5', 'fraction': 0.15, 'sampling': 'MAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'AGE', 'fraction': 0.15, 'sampling': 'MCAR'}

Generating corrupted training data on 94 rows... 

	perturbation: GaussianNoise: {'column': 'PRE4', 'fraction': 0.15, 'sampling': 'MCAR'}

Applying cleaners... 



  _warn_prf(average, modifier, msg_start, len(result))
INFO:autogluon.tabular.learner.abstract_learner:Evaluation: accuracy on test data: 0.8297872340425532
INFO:autogluon.tabular.learner.abstract_learner:Evaluations on test data:
INFO:autogluon.tabular.learner.abstract_learner:{
    "accuracy": 0.8297872340425532,
    "accuracy_score": 0.8297872340425532,
    "balanced_accuracy_score": 0.5,
    "matthews_corrcoef": 0.0,
    "f1_score": 0.8297872340425532
}
  _warn_prf(average, modifier, msg_start, len(result))
INFO:autogluon.tabular.learner.abstract_learner:Detailed (per-class) classification report:
INFO:autogluon.tabular.learner.abstract_learner:{
    "F": {
        "precision": 0.8297872340425532,
        "recall": 1.0,
        "f1-score": 0.9069767441860465,
        "support": 78
    },
    "T": {
        "precision": 0.0,
        "recall": 0.0,
        "f1-score": 0.0,
        "support": 16
    },
    "accuracy": 0.8297872340425532,
    "macro avg": {
        "precision": 0.41489

PPP score no cleaning: {'roc_auc_score': 0.6298076923076923, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}
PPP scores with cleaning: 

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.9415315722764932}




Cleaner: (NoOutlierDetection, MeanModeImputation): {'roc_auc_score': 0.6314102564102564, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}
Classifier for col: DGN reached 0.6533333333333333
Classifier for col: PRE6 reached 0.8133333333333332
Classifier for col: PRE7 reached 0.9066666666666667
Classifier for col: PRE8 reached 0.8266666666666667
Classifier for col: PRE9 reached 0.9
Classifier for col: PRE10 reached 0.8533333333333333
Classifier for col: PRE11 reached 0.8466666666666667
Classifier for col: PRE14 reached 0.52
Classifier for col: PRE17 reached 0.929999999999999

  _warn_prf(average, modifier, msg_start, len(result))
INFO:autogluon.tabular.learner.abstract_learner:Evaluation: accuracy on test data: 0.8297872340425532
INFO:autogluon.tabular.learner.abstract_learner:Evaluations on test data:
INFO:autogluon.tabular.learner.abstract_learner:{
    "accuracy": 0.8297872340425532,
    "accuracy_score": 0.8297872340425532,
    "balanced_accuracy_score": 0.5,
    "matthews_corrcoef": 0.0,
    "f1_score": 0.8297872340425532
}
  _warn_prf(average, modifier, msg_start, len(result))
INFO:autogluon.tabular.learner.abstract_learner:Detailed (per-class) classification report:
INFO:autogluon.tabular.learner.abstract_learner:{
    "F": {
        "precision": 0.8297872340425532,
        "recall": 1.0,
        "f1-score": 0.9069767441860465,
        "support": 78
    },
    "T": {
        "precision": 0.0,
        "recall": 0.0,
        "f1-score": 0.0,
        "support": 16
    },
    "accuracy": 0.8297872340425532,
    "macro avg": {
        "precision": 0.41489

Regressor for col: AGE/upper reached -0.7871854845171947

Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: SklearnImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.9415315722764932}


INFO:root:CategoricalEncoder for column DGN                                found only 38 occurrences of value DGN2
INFO:root:CategoricalEncoder for column DGN                                found only 36 occurrences of value DGN4
INFO:root:CategoricalEncoder for column DGN                                found only 9 occurrences of value DGN5
INFO:root:CategoricalEncoder for column DGN                                found only 3 occurrences of value DGN6
INFO:root:CategoricalEncoder for column DGN                                found only 1 occurrences of value DGN1
INFO:root:CategoricalEncoder for column DGN                                found only 1 occurrences of value DGN8


Cleaner: (NoOutlierDetection, SklearnImputation): {'roc_auc_score': 0.6314102564102564, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}
Fitting model for column: DGN


INFO:root:
INFO:root:Epoch[0] Batch [0-11]	Speed: 1236.00 samples/sec	cross-entropy=1.338936	DGN-accuracy=0.692708
INFO:root:Epoch[0] Train-cross-entropy=1.171412
INFO:root:Epoch[0] Train-DGN-accuracy=0.713068
INFO:root:Epoch[0] Time cost=0.313
INFO:root:Saved checkpoint to "imputer_model/model-0000.params"
INFO:root:Epoch[0] Validation-cross-entropy=0.768192
INFO:root:Epoch[0] Validation-DGN-accuracy=0.791667
INFO:root:Epoch[1] Batch [0-11]	Speed: 1339.39 samples/sec	cross-entropy=0.826072	DGN-accuracy=0.760417
INFO:root:Epoch[1] Train-cross-entropy=0.830458
INFO:root:Epoch[1] Train-DGN-accuracy=0.747159
INFO:root:Epoch[1] Time cost=0.277
INFO:root:Saved checkpoint to "imputer_model/model-0001.params"
INFO:root:Epoch[1] Validation-cross-entropy=0.770790
INFO:root:Epoch[1] Validation-DGN-accuracy=0.770833
INFO:root:Epoch[2] Batch [0-11]	Speed: 1303.36 samples/sec	cross-entropy=0.810278	DGN-accuracy=0.760417
INFO:root:Epoch[2] Train-cross-entropy=0.813691
INFO:root:Epoch[2] Train-DGN-ac

INFO:root:Epoch[19] Validation-cross-entropy=0.686205
INFO:root:Epoch[19] Validation-DGN-accuracy=0.791667
INFO:root:Epoch[20] Batch [0-11]	Speed: 1298.11 samples/sec	cross-entropy=0.739805	DGN-accuracy=0.760417
INFO:root:Epoch[20] Train-cross-entropy=0.738772
INFO:root:Epoch[20] Train-DGN-accuracy=0.752841
INFO:root:Epoch[20] Time cost=0.323
INFO:root:Saved checkpoint to "imputer_model/model-0020.params"
INFO:root:Epoch[20] Validation-cross-entropy=0.685567
INFO:root:Epoch[20] Validation-DGN-accuracy=0.791667
INFO:root:Epoch[21] Batch [0-11]	Speed: 1013.86 samples/sec	cross-entropy=0.737973	DGN-accuracy=0.760417
INFO:root:Epoch[21] Train-cross-entropy=0.736826
INFO:root:Epoch[21] Train-DGN-accuracy=0.752841
INFO:root:Epoch[21] Time cost=0.332
INFO:root:Saved checkpoint to "imputer_model/model-0021.params"
INFO:root:Epoch[21] Validation-cross-entropy=0.684625
INFO:root:Epoch[21] Validation-DGN-accuracy=0.791667
INFO:root:Epoch[22] Batch [0-11]	Speed: 1038.76 samples/sec	cross-entropy=0

INFO:root:Epoch[39] Validation-DGN-accuracy=0.791667
INFO:root:Epoch[40] Batch [0-11]	Speed: 1235.70 samples/sec	cross-entropy=0.708389	DGN-accuracy=0.760417
INFO:root:Epoch[40] Train-cross-entropy=0.704177
INFO:root:Epoch[40] Train-DGN-accuracy=0.752841
INFO:root:Epoch[40] Time cost=0.300
INFO:root:Saved checkpoint to "imputer_model/model-0040.params"
INFO:root:Epoch[40] Validation-cross-entropy=0.680767
INFO:root:Epoch[40] Validation-DGN-accuracy=0.791667
INFO:root:Epoch[41] Batch [0-11]	Speed: 1107.72 samples/sec	cross-entropy=0.707197	DGN-accuracy=0.760417
INFO:root:Epoch[41] Train-cross-entropy=0.702860
INFO:root:Epoch[41] Train-DGN-accuracy=0.752841
INFO:root:Epoch[41] Time cost=0.308
INFO:root:Saved checkpoint to "imputer_model/model-0041.params"
INFO:root:Epoch[41] Validation-cross-entropy=0.682110
INFO:root:Epoch[41] Validation-DGN-accuracy=0.791667
INFO:root:Epoch[42] Batch [0-11]	Speed: 1064.50 samples/sec	cross-entropy=0.705791	DGN-accuracy=0.760417
INFO:root:Epoch[42] Trai

Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9
Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14
Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Outlier detection method: NoOutlierDetection, Outlier Detection Score: {'Precision': 0.425531914893617, 'Recall': 0.5, 'F1-score': 0.4597701149425288, 'Accuracy': 0.851063829787234}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 1.9415315722764932}
Cleaner: (NoOutlierDetection, DatawigImputation): {'roc_auc_score': 0.6314102564102564, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0



Classifier for col: DGN reached 0.6833333333333333
Classifier for col: PRE6 reached 0.82
Classifier for col: PRE7 reached 0.92
Classifier for col: PRE8 reached 0.8333333333333333
Classifier for col: PRE9 reached 0.9266666666666667
Classifier for col: PRE10 reached 0.8833333333333333
Classifier for col: PRE11 reached 0.8566666666666667
Classifier for col: PRE14 reached 0.5233333333333333
Classifier for col: PRE17 reached 0.9133333333333333
Classifier for col: PRE19 reached nan




Classifier for col: PRE25 reached 0.9766666666666667
Classifier for col: PRE30 reached 0.8033333333333332
Classifier for col: PRE32 reached 0.9833333333333334
Regressor for col: PRE4/lower reached 0.26749047884748195
Regressor for col: PRE4/median reached 0.646181648622075
Regressor for col: PRE4/upper reached 0.2714664242289103
Regressor for col: PRE5/lower reached -0.043374150943944145
Regressor for col: PRE5/median reached -0.13286548753213656
Regressor for col: PRE5/upper reached -0.15108923534716273
Regressor for col: AGE/lower reached -0.8602293575256841
Regressor for col: AGE/median reached 0.15309386025426885
Regressor for col: AGE/upper reached -0.9101255691976636
Imputed 18 values in column PRE4
Imputed 4 values in column PRE5
Imputed 3 values in column AGE

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.85453216374269, 'Recall': 0.9330357142857143, 'F1-score': 0.8870192307692308, 'Accuracy': 0.9361702127659575}
Imputation method: 

  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (PyODKNNOutlierDetection, SklearnImputation): {'roc_auc_score': 0.6109775641025641, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}
Fitting model for column: DGN


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9
Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14
Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODKNNOutlierDetection, Outlier Detection Score: {'Precision': 0.85453216374269, 'Recall': 0.9330357142857143, 'F1-score': 0.8870192307692308, 'Accuracy': 0.9361702127659575}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.0265080316018372}


  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (PyODKNNOutlierDetection, DatawigImputation): {'roc_auc_score': 0.6322115384615385, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.6719348659003831, 'Recall': 0.8205357142857144, 'F1-score': 0.6730434782608696, 'Accuracy': 0.7446808510638298}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.46485561323316577}
Cleaner: (PyODIsolationForestOutlierDetection, Mean



Classifier for col: DGN reached 0.6966666666666667
Classifier for col: PRE6 reached 0.8333333333333333
Classifier for col: PRE7 reached 0.9299999999999999
Classifier for col: PRE8 reached 0.8333333333333334
Classifier for col: PRE9 reached 0.9299999999999999
Classifier for col: PRE10 reached 0.9
Classifier for col: PRE11 reached 0.87
Classifier for col: PRE14 reached 0.5233333333333334
Classifier for col: PRE17 reached 0.9266666666666667
Classifier for col: PRE19 reached 0.9933333333333333
Classifier for col: PRE25 reached 0.9833333333333334
Classifier for col: PRE30 reached 0.8200000000000001
Classifier for col: PRE32 reached 0.9866666666666666
Regressor for col: PRE4/lower reached 0.2518246825968417
Regressor for col: PRE4/median reached 0.7090769088830099
Regressor for col: PRE4/upper reached 0.4700858585233926
Regressor for col: PRE5/lower reached -0.05617432911489073
Regressor for col: PRE5/median reached 0.04688510108324373
Regressor for col: PRE5/upper reached -0.050244415201896

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE6


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE7


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE8


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE9
Fitting model for column: PRE10
Fitting model for column: PRE11
Fitting model for column: PRE14
Fitting model for column: PRE17


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE19
Fitting model for column: PRE25


  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: PRE30
Fitting model for column: PRE32
Fitting model for column: PRE4
Fitting model for column: PRE5
Fitting model for column: AGE

Outlier detection method: PyODIsolationForestOutlierDetection, Outlier Detection Score: {'Precision': 0.675, 'Recall': 0.8375, 'F1-score': 0.6622443338861249, 'Accuracy': 0.723404255319149}
Imputation method: DatawigImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.36555904878949647}


  _warn_prf(average, modifier, msg_start, len(result))


Cleaner: (PyODIsolationForestOutlierDetection, DatawigImputation): {'roc_auc_score': 0.6294070512820512, 'classification_report': {'F': {'precision': 0.8297872340425532, 'recall': 1.0, 'f1-score': 0.9069767441860465, 'support': 78}, 'T': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 16}, 'accuracy': 0.8297872340425532, 'macro avg': {'precision': 0.4148936170212766, 'recall': 0.5, 'f1-score': 0.45348837209302323, 'support': 94}, 'weighted avg': {'precision': 0.688546853779991, 'recall': 0.8297872340425532, 'f1-score': 0.7525977238990599, 'support': 94}}}

Outlier detection method: PyODPCAOutlierDetection, Outlier Detection Score: {'Precision': 0.6575757575757576, 'Recall': 0.8017857142857143, 'F1-score': 0.6452830188679246, 'Accuracy': 0.7127659574468085}
Imputation method: MeanModeImputation, Imputation Score: {'Precision': nan, 'Recall': nan, 'F1-score': nan, 'Accuracy': nan, 'Mean Squared Error': 0.409574559420426}
Cleaner: (PyODPCAOutlierDetection, MeanModeImputation



Classifier for col: DGN reached 0.7266666666666666
Classifier for col: PRE6 reached 0.8733333333333333
Classifier for col: PRE7 reached 0.95
Classifier for col: PRE8 reached 0.85
Classifier for col: PRE9 reached 0.95
Classifier for col: PRE10 reached 0.8666666666666667
Classifier for col: PRE11 reached 0.8500000000000001


KeyboardInterrupt: 

In [None]:
save_obj(ind_results, "gaussian/ind_results")

## Scaling

In [None]:
corruptions = [[Scaling]]

In [None]:
ind_results = {}

for fraction in fractions:
    ind_results[fraction] = []
    for learner, param_grid in models.items():
        for corruption in corruptions:
            for dataset in datasets:
                try:
                    ind_results[fraction].append(run_experiment(dataset, learner, param_grid, corruption, fraction, cleaners, 5))
                except ConnectionError:
                    print(f'Connection refused for dataset: {dataset}')
                    continue
                except ValueError:
                    print("Something went wrong with a value :(")
                    continue

In [None]:
save_obj(ind_results, "scaling/ind_results")

## Missing

In [None]:
corruptions = [[MissingValues]]

In [None]:
ind_results = {}

for fraction in fractions:
    ind_results[fraction] = []
    for learner, param_grid in models.items():
        for corruption in corruptions:
            for dataset in datasets:
                try:
                    ind_results[fraction].append(run_experiment(dataset, learner, param_grid, corruption, fraction, cleaners, 5))
                except ConnectionError:
                    print(f'Connection refused for dataset: {dataset}')
                    continue
                except ValueError:
                    print("Something went wrong with a value :(")
                    continue

In [None]:
save_obj(ind_results, "missing/ind_results")

## Categorical Shift

In [None]:
corruptions = [[CategoricalShift]]

In [None]:
ind_results = {}

for fraction in fractions:
    ind_results[fraction] = []
    for learner, param_grid in models.items():
        for corruption in corruptions:
            for dataset in datasets:
                try:
                    ind_results[fraction].append(run_experiment(dataset, learner, param_grid, corruption, fraction, cleaners, 5))
                except ConnectionError:
                    print(f'Connection refused for dataset: {dataset}')
                    continue
                except ValueError:
                    print("Something went wrong with a value :(")
                    continue

In [None]:
save_obj(ind_results, "categorical_shift/ind_results")