In [1]:
# mount drive for access to the files
from google.colab import drive

drive.mount("/content/drive")

# all the drive the files are present in "/content/drive/My Drive"
!ls "/content/drive/My Drive/Beuth Uni/Master Thesis"

import sys
sys.path.append('/content/drive/My Drive/Beuth Uni/Master Thesis/jenga')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 Data
'Data Quality in ML Production Systems.pdf'
'Datawig: Missing Value Imputation for Tables.pdf'
 Declaration
 Images
 jenga
 jenga.pdf
 MICE_Multivariate_Imputation_by_Chained_Equations_.pdf


In [2]:
!pip install openml
!pip install pyod

!pip install mxnet autogluon
!pip install mxnet-mkl --pre --upgrade

Requirement already up-to-date: mxnet-mkl in /usr/local/lib/python3.6/dist-packages (1.6.0)


In [3]:
import random
import numpy as np
import pandas as pd

from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

from jenga.basis import Dataset
from jenga.corruptions.generic import MissingValues, SwappedValues
from jenga.corruptions.numerical import Scaling, GaussianNoise
from jenga.cleaning.ppp import PipelinePerformancePrediction
from jenga.cleaning.outlier_detection import NoOutlierDetection, PyODKNN, PyODIsolationForest
from jenga.cleaning.imputation import NoImputation, MeanModeImputation, AutoGluonImputation
from jenga.cleaning.clean import Clean

In [4]:
seed = 10

In [5]:
def run_experiment(dataset_name, learner, param_grid, corruptions, fraction, cleaners, num_repetitions):
    
    ## dataset
    dataset = Dataset(seed, dataset_name)
    
    all_data = dataset.all_data
    attribute_names = dataset.attribute_names
    attribute_types = dataset.attribute_types
    
    ## categorical and numerical features
    categorical_columns = dataset.categorical_columns
    numerical_columns = dataset.numerical_columns
    print(f"Found {len(categorical_columns)} categorical and {len(numerical_columns)} numeric features \n")
    
    ## train and test data
    df_train, lab_train, df_test, lab_test = dataset.get_train_test_data()
    
    
    ## pipeline performance prediction (ppp)
    ppp = PipelinePerformancePrediction(seed, df_train, lab_train, df_test, lab_test, categorical_columns, numerical_columns, learner, param_grid)
    ppp_model = ppp.fit_ppp(df_train)
    
    ## generate corrpted data
    df_corrupted, perturbations, cols_perturbed, summary_col_corrupt = ppp.get_corrupted(df_test, corruptions, fraction, num_repetitions)
    
    ## cleaning
    clean = Clean(df_train, df_corrupted, categorical_columns, numerical_columns, ppp, ppp_model, cleaners)
    df_cleaned, corrupted_score_ppp, best_cleaning_score, cleaner_scores_ppp, summary_cleaners = clean(df_train, df_corrupted)
    
    ## results
    result = {
        'ppp_score_model': ppp.predict_score_ppp(ppp_model, df_test),
        'ppp_score_corrupted': corrupted_score_ppp,
        'ppp_score_cleaned': best_cleaning_score,
        'ppp_scores_cleaners': cleaner_scores_ppp
    }
#     print('\n'.join([f'{key}:{val}' for key, val in result.items()]))
    
    ## summary
    summary = {
        'dataset': dataset_name,
        'model': learner,
        'corruptions': summary_col_corrupt,
        'cleaners': summary_cleaners,
        'result': result
    }
#     print('\n\n\n\n'.join([f'{key}:{val}' for key, val in summary.items()]))
    
    return summary #summary_col_corrupt, result

In [6]:
datasets = [
    'parkinsons',
    'heart-statlog',
    'credit-g'
]

In [7]:
## model parameters
## models is a dict where key = leaner & value = param_grid
models = {SGDClassifier(loss='log'): {'learner__max_iter': [500, 1000, 5000], 
                                         'learner__penalty': ['l2', 'l1', 'elasticnet'], 
                                         'learner__alpha': [0.0001, 0.001, 0.01, 0.1]
                                        }, 
          RandomForestClassifier():{'learner__n_estimators': [100, 200, 500], 
                                    'learner__max_depth': [5, 10, 15]
                                   }
         }

## make dict of multiple leraners and corresponding param_grids

In [8]:
corruptions = [MissingValues, SwappedValues, Scaling, GaussianNoise]

In [9]:
fractions = np.random.uniform(0, 1, 1) # np.random.uniform(0, 1, 3)

In [10]:
cleaners = [
    (NoOutlierDetection, MeanModeImputation),
    (NoOutlierDetection, AutoGluonImputation),
    (PyODKNN, NoImputation),
    (PyODKNN, MeanModeImputation),
    (PyODKNN, AutoGluonImputation),
    (PyODIsolationForest, NoImputation),
    (PyODIsolationForest, MeanModeImputation),
    (PyODIsolationForest, AutoGluonImputation)
]

In [11]:
%%time
for _ in range(2):
  print("\n\n..................................ITERATION..................................\n")
  ind_results = []

  for dataset in datasets:
      for learner, param_grid in models.items():
          for fraction in fractions:
              ind_results.append(run_experiment(dataset, learner, param_grid, [MissingValues], fraction, [(PyODKNN, AutoGluonImputation)], 5))



..................................ITERATION..................................



Saved dataset 1488: parkinsons to file /root/.openml/cache/org/openml/www/datasets/1488/dataset.pkl.py3


Dataset: parkinsons
Found 0 categorical and 22 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    3.0s finished



Generating corrupted training data on 39 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'V16', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'V15', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'V9', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.9837662337662337, 'classification_report': {'1': {'precision': 0.6875, 'recall': 1.0, 'f1-score': 0.8148148148148148, 'support': 11}, '2': {'precision': 1.0, 'recall': 0.8214285714285714, 'f1-score': 0.9019607843137255, 'support': 28}, 'accuracy': 0.8717948717948718, 'macro avg': {'precisio

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102214/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102214/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
NumExpr defaulting to 2 threads.
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.1s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-21.472	 = Validation root_mean_squared_erro

[1000]	train_set's rmse: 1.20743e-05	valid_set's rmse: 0.0014217


	-0.0014	 = Validation root_mean_squared_error score
	1.21s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0009	 = Validation root_mean_squared_error score
	0.59s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 6.36s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102239/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102239/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 0.000129447	valid_set's rmse: 0.0030499


	-0.003	 = Validation root_mean_squared_error score
	0.98s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0018	 = Validation root_mean_squared_error score
	0.5s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.32s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102316/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102316/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performan

[1000]	train_set's rmse: 0.00139469	valid_set's rmse: 0.0302898


	-0.0214	 = Validation root_mean_squared_error score
	3.78s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.014	 = Validation root_mean_squared_error score
	3.78s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...
	-0.028	 = Validation root_mean_squared_error score
	0.39s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.014	 = Validation root_mean_squared_error score
	0.58s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 11.32s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102401/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102401/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float6

[1000]	train_set's rmse: 0.00455953	valid_set's rmse: 0.0585091
[2000]	train_set's rmse: 0.00165458	valid_set's rmse: 0.0582908


	-0.0583	 = Validation root_mean_squared_error score
	0.58s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: CatboostRegressor ...
	-0.0522	 = Validation root_mean_squared_error score
	1.11s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0448	 = Validation root_mean_squared_error score
	1.45s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...


[1000]	train_set's rmse: 0.00027099	valid_set's rmse: 0.0578018


	-0.0578	 = Validation root_mean_squared_error score
	0.86s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0434	 = Validation root_mean_squared_error score
	0.47s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.03s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102415/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102415/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 4.10915e-05	valid_set's rmse: 0.0639365


	-0.0639	 = Validation root_mean_squared_error score
	0.96s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0574	 = Validation root_mean_squared_error score
	0.5s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.35s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102436/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102436/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performa

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9967532467532467, 'classification_report': {'1': {'precision': 0.7857142857142857, 'recall': 1.0, 'f1-score': 0.88, 'support': 11}, '2': {'precision': 1.0, 'recall': 0.8928571428571429, 'f1-score': 0.9433962264150945, 'support': 28}, 'accuracy': 0.9230769230769231, 'macro avg': {'precision': 0.8928571428571428, 'recall': 0.9464285714285714, 'f1-score': 0.9116981132075472, 'support': 39}, 'weighted avg': {'precision': 0.9395604395604394, 'recall': 0.9230769230769231, 'f1-score': 0.9255152394775038, 'support': 39}}}


No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102452/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102452/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-21.4135	 = Validation root_mean_squared_error score
	0.62s	 = Training runt

[1000]	train_set's rmse: 1.20743e-05	valid_set's rmse: 0.0014217


	-0.0014	 = Validation root_mean_squared_error score
	1.25s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0006	 = Validation root_mean_squared_error score
	0.48s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 8.57s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102519/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102519/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 0.000129447	valid_set's rmse: 0.0030499


	-0.003	 = Validation root_mean_squared_error score
	0.97s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0018	 = Validation root_mean_squared_error score
	0.56s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 10.9s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102558/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102558/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.00139469	valid_set's rmse: 0.0302898


	-0.0214	 = Validation root_mean_squared_error score
	3.81s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0184	 = Validation root_mean_squared_error score
	2.25s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...
	-0.028	 = Validation root_mean_squared_error score
	0.38s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0175	 = Validation root_mean_squared_error score
	0.46s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.66s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102645/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102645/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float

[1000]	train_set's rmse: 0.00455953	valid_set's rmse: 0.0585091
[2000]	train_set's rmse: 0.00165458	valid_set's rmse: 0.0582908


	-0.0583	 = Validation root_mean_squared_error score
	0.59s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: CatboostRegressor ...
	-0.0522	 = Validation root_mean_squared_error score
	1.11s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0502	 = Validation root_mean_squared_error score
	2.04s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...


[1000]	train_set's rmse: 0.00027099	valid_set's rmse: 0.0578018


	-0.0578	 = Validation root_mean_squared_error score
	0.87s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0459	 = Validation root_mean_squared_error score
	0.54s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.72s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102701/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102701/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 4.10915e-05	valid_set's rmse: 0.0639365


	-0.0639	 = Validation root_mean_squared_error score
	0.99s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0593	 = Validation root_mean_squared_error score
	0.49s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.33s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102723/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102723/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9967532467532467, 'classification_report': {'1': {'precision': 0.7857142857142857, 'recall': 1.0, 'f1-score': 0.88, 'support': 11}, '2': {'precision': 1.0, 'recall': 0.8928571428571429, 'f1-score': 0.9433962264150945, 'support': 28}, 'accuracy': 0.9230769230769231, 'macro avg': {'precision': 0.8928571428571428, 'recall': 0.9464285714285714, 'f1-score': 0.9116981132075472, 'support': 39}, 'weighted avg': {'precision': 0.9395604395604394, 'recall': 0.9230769230769231, 'f1-score': 0.9255152394775038, 'support': 39}}} 

Cleaning improved the overall score 





Data pickle file already exists and is up to date.


Dataset: parkinsons
Found 0 categorical and 22 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   14.9s finished



Generating corrupted training data on 39 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'V16', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'V15', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'V9', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.9967532467532467, 'classification_report': {'1': {'precision': 1.0, 'recall': 0.9090909090909091, 'f1-score': 0.9523809523809523, 'support': 11}, '2': {'precision': 0.9655172413793104, 'recall': 1.0, 'f1-score': 0.9824561403508771, 'support': 28}, 'accuracy': 0.9743589743589743, 'macro avg'

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102756/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102756/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-21.7821	 = Validation root_mean_squared_error score
	0.63s	 = Training runt

[1000]	train_set's rmse: 1.20743e-05	valid_set's rmse: 0.0014217


	-0.0014	 = Validation root_mean_squared_error score
	1.22s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0008	 = Validation root_mean_squared_error score
	0.5s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.86s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102821/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102821/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.08s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.000129447	valid_set's rmse: 0.0030499


	-0.003	 = Validation root_mean_squared_error score
	0.97s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0018	 = Validation root_mean_squared_error score
	0.47s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.74s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102857/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102857/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.00139469	valid_set's rmse: 0.0302898


	-0.0214	 = Validation root_mean_squared_error score
	3.79s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0446	 = Validation root_mean_squared_error score
	3.83s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...
	-0.028	 = Validation root_mean_squared_error score
	0.39s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.018	 = Validation root_mean_squared_error score
	0.51s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 11.28s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102945/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102945/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float

[1000]	train_set's rmse: 0.00455953	valid_set's rmse: 0.0585091
[2000]	train_set's rmse: 0.00165458	valid_set's rmse: 0.0582908


	-0.0583	 = Validation root_mean_squared_error score
	0.59s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: CatboostRegressor ...
	-0.0522	 = Validation root_mean_squared_error score
	1.1s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0508	 = Validation root_mean_squared_error score
	1.64s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...


[1000]	train_set's rmse: 0.00027099	valid_set's rmse: 0.0578018


	-0.0578	 = Validation root_mean_squared_error score
	0.86s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0472	 = Validation root_mean_squared_error score
	0.47s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.22s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_102959/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_102959/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 4.10915e-05	valid_set's rmse: 0.0639365


	-0.0639	 = Validation root_mean_squared_error score
	0.99s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0589	 = Validation root_mean_squared_error score
	0.48s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.26s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103021/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103021/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9967532467532467, 'classification_report': {'1': {'precision': 0.9090909090909091, 'recall': 0.9090909090909091, 'f1-score': 0.9090909090909091, 'support': 11}, '2': {'precision': 0.9642857142857143, 'recall': 0.9642857142857143, 'f1-score': 0.9642857142857143, 'support': 28}, 'accuracy': 0.9487179487179487, 'macro avg': {'precision': 0.9366883116883117, 'recall': 0.9366883116883117, 'f1-score': 0.9366883116883117, 'support': 39}, 'weighted avg': {'precision': 0.9487179487179487, 'recall': 0.9487179487179487, 'f1-score': 0.9487179487179487, 'support': 39}}}


No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103038/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103038/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-21.4135	 = Validation root_mean_squared_error score
	0.62s	 = Training runt

[1000]	train_set's rmse: 1.20743e-05	valid_set's rmse: 0.0014217


	-0.0014	 = Validation root_mean_squared_error score
	1.21s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0009	 = Validation root_mean_squared_error score
	0.48s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 6.16s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103101/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103101/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 0.000129447	valid_set's rmse: 0.0030499


	-0.003	 = Validation root_mean_squared_error score
	0.98s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0017	 = Validation root_mean_squared_error score
	0.55s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.16s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103139/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103139/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.08s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.00139469	valid_set's rmse: 0.0302898


	-0.0214	 = Validation root_mean_squared_error score
	3.78s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0123	 = Validation root_mean_squared_error score
	2.26s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...
	-0.028	 = Validation root_mean_squared_error score
	0.39s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0123	 = Validation root_mean_squared_error score
	0.49s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.69s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103225/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103225/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float

[1000]	train_set's rmse: 0.00455953	valid_set's rmse: 0.0585091
[2000]	train_set's rmse: 0.00165458	valid_set's rmse: 0.0582908


	-0.0583	 = Validation root_mean_squared_error score
	0.61s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: CatboostRegressor ...
	-0.0522	 = Validation root_mean_squared_error score
	1.13s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0511	 = Validation root_mean_squared_error score
	1.47s	 = Training runtime
	0.02s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...


[1000]	train_set's rmse: 0.00027099	valid_set's rmse: 0.0578018


	-0.0578	 = Validation root_mean_squared_error score
	0.91s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0491	 = Validation root_mean_squared_error score
	0.5s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.39s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103240/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103240/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 4.10915e-05	valid_set's rmse: 0.0639365


	-0.0639	 = Validation root_mean_squared_error score
	0.99s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0561	 = Validation root_mean_squared_error score
	0.52s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.41s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103302/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103302/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9967532467532467, 'classification_report': {'1': {'precision': 0.9090909090909091, 'recall': 0.9090909090909091, 'f1-score': 0.9090909090909091, 'support': 11}, '2': {'precision': 0.9642857142857143, 'recall': 0.9642857142857143, 'f1-score': 0.9642857142857143, 'support': 28}, 'accuracy': 0.9487179487179487, 'macro avg': {'precision': 0.9366883116883117, 'recall': 0.9366883116883117, 'f1-score': 0.9366883116883117, 'support': 39}, 'weighted avg': {'precision': 0.9487179487179487, 'recall': 0.9487179487179487, 'f1-score': 0.9487179487179487, 'support': 39}}} 

Cleaning didnt't improve the overall score 





Saved dataset 53: heart-statlog to file /root/.openml/cache/org/openml/www/datasets/53/dataset.pkl.py3


Dataset: heart-statlog
Found 0 categorical and 13 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    2.8s finished



Generating corrupted training data on 54 rows... 

	perturbation: MissingValues: {'column': 'age', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'maximum_heart_rate_achieved', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'age', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'maximum_heart_rate_achieved', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'serum_cholestoral', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.9354395604395604, 'classification_report': {'absent': {'precision': 0.7878787878787878, 'recall': 1.0, 'f1-score': 0.8813559322033898, 'support': 26}, 'present': {'precision': 1.0, 'recall': 0.75, 'f1-score': 0.857142857142857

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103323/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103323/
AutoGluon Version:  0.0.12
Train Data Rows:    216
Train Data Columns: 13
Preprocessing data ...
Feature Generator processed 216 data points with 12 features
Original Features (raw dtypes):
	float64 features: 12
Original Features (inferred dtypes):
	float features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 12
Final Features:
	float features: 12
	Data preprocessing and feature engineering runtime = 0.05s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-9.0409	 = Validation root_mean_squared_error score
	0.62s	 = Training runti

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9354395604395604, 'classification_report': {'absent': {'precision': 0.8620689655172413, 'recall': 0.9615384615384616, 'f1-score': 0.9090909090909091, 'support': 26}, 'present': {'precision': 0.96, 'recall': 0.8571428571428571, 'f1-score': 0.9056603773584904, 'support': 28}, 'accuracy': 0.9074074074074074, 'macro avg': {'precision': 0.9110344827586206, 'recall': 0.9093406593406593, 'f1-score': 0.9073756432246998, 'support': 54}, 'weighted avg': {'precision': 0.9128480204342273, 'recall': 0.9074074074074074, 'f1-score': 0.9073121148592846, 'support': 54}}}


Feature Generator processed 216 data points with 12 features
Original Features (raw dtypes):
	float64 features: 12
Original Features (inferred dtypes):
	float features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 12
Final Features:
	float features: 12
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-8.9114	 = Validation root_mean_squared_error score
	0.62s	 = Training runtime
	0.11s	 = Validation runtime
Fitting model: ExtraTreesRegressorMSE ...
	-8.7067	 = Validation root_mean_squared_error score
	0.4s	 = Training runtime
	0.11s	 = Validation runtime
Fitting model: KNeighborsRegressorUnif ...
	-9.6402	 = Validation root_mean_squared_error score
	0.01s	 = 


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9354395604395604, 'classification_report': {'absent': {'precision': 0.8620689655172413, 'recall': 0.9615384615384616, 'f1-score': 0.9090909090909091, 'support': 26}, 'present': {'precision': 0.96, 'recall': 0.8571428571428571, 'f1-score': 0.9056603773584904, 'support': 28}, 'accuracy': 0.9074074074074074, 'macro avg': {'precision': 0.9110344827586206, 'recall': 0.9093406593406593, 'f1-score': 0.9073756432246998, 'support': 54}, 'weighted avg': {'precision': 0.9128480204342273, 'recall': 0.9074074074074074, 'f1-score': 0.9073121148592846, 'support': 54}}} 

Cleaning didnt't improve the overall score 





Data pickle file already exists and is up to date.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


Dataset: heart-statlog
Found 0 categorical and 13 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   13.8s finished



Generating corrupted training data on 54 rows... 

	perturbation: MissingValues: {'column': 'age', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'maximum_heart_rate_achieved', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'age', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'maximum_heart_rate_achieved', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'serum_cholestoral', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.9340659340659341, 'classification_report': {'absent': {'precision': 0.8, 'recall': 0.9230769230769231, 'f1-score': 0.8571428571428571, 'support': 26}, 'present': {'precision': 0.9166666666666666, 'recall': 0.7857142857142857, 

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103533/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103533/
AutoGluon Version:  0.0.12
Train Data Rows:    216
Train Data Columns: 13
Preprocessing data ...
Feature Generator processed 216 data points with 12 features
Original Features (raw dtypes):
	float64 features: 12
Original Features (inferred dtypes):
	float features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 12
Final Features:
	float features: 12
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-8.9864	 = Validation root_mean_squared_error score
	0.61s	 = Training runti

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9258241758241758, 'classification_report': {'absent': {'precision': 0.8214285714285714, 'recall': 0.8846153846153846, 'f1-score': 0.8518518518518519, 'support': 26}, 'present': {'precision': 0.8846153846153846, 'recall': 0.8214285714285714, 'f1-score': 0.8518518518518519, 'support': 28}, 'accuracy': 0.8518518518518519, 'macro avg': {'precision': 0.853021978021978, 'recall': 0.853021978021978, 'f1-score': 0.8518518518518519, 'support': 54}, 'weighted avg': {'precision': 0.8541921041921041, 'recall': 0.8518518518518519, 'f1-score': 0.8518518518518519, 'support': 54}}}


Feature Generator processed 216 data points with 12 features
Original Features (raw dtypes):
	float64 features: 12
Original Features (inferred dtypes):
	float features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 12
Final Features:
	float features: 12
	Data preprocessing and feature engineering runtime = 0.05s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-8.9114	 = Validation root_mean_squared_error score
	0.62s	 = Training runtime
	0.11s	 = Validation runtime
Fitting model: ExtraTreesRegressorMSE ...
	-8.7067	 = Validation root_mean_squared_error score
	0.41s	 = Training runtime
	0.11s	 = Validation runtime
Fitting model: KNeighborsRegressorUnif ...
	-9.6402	 = Validation root_mean_squared_error score
	0.01s	 =


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9258241758241758, 'classification_report': {'absent': {'precision': 0.8214285714285714, 'recall': 0.8846153846153846, 'f1-score': 0.8518518518518519, 'support': 26}, 'present': {'precision': 0.8846153846153846, 'recall': 0.8214285714285714, 'f1-score': 0.8518518518518519, 'support': 28}, 'accuracy': 0.8518518518518519, 'macro avg': {'precision': 0.853021978021978, 'recall': 0.853021978021978, 'f1-score': 0.8518518518518519, 'support': 54}, 'weighted avg': {'precision': 0.8541921041921041, 'recall': 0.8518518518518519, 'f1-score': 0.8518518518518519, 'support': 54}}} 

Cleaning didnt't improve the overall score 





Saved dataset 31: credit-g to file /root/.openml/cache/org/openml/www/datasets/31/dataset.pkl.py3
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


Dataset: credit-g
Found 13 categorical and 7 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    6.2s finished



Generating corrupted training data on 200 rows... 

	perturbation: MissingValues: {'column': 'credit_amount', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'other_payment_plans', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'duration', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'property_magnitude', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'credit_history', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.8080878915381019, 'classification_report': {'bad': {'precision': 0.6415094339622641, 'recall': 0.5483870967741935, 'f1-score': 0.591304347826087, 'support': 62}, 'good': {'precision': 0.8095238095238095, 'recall': 0.86231884057971

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_103738/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_103738/
AutoGluon Version:  0.0.12
Train Data Rows:    800
Train Data Columns: 20
Preprocessing data ...
Train Data Class Count: 4
Feature Generator processed 800 data points with 19 features
Original Features (raw dtypes):
	float64 features: 7
	object features: 12
Original Features (inferred dtypes):
	float features: 7
	object features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 7
	category features: 12
Final Features:
	float features: 7
	category features: 12
	Data preprocessing and feature engineering runtime = 0.11s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.8080878915381019, 'classification_report': {'bad': {'precision': 0.6415094339622641, 'recall': 0.5483870967741935, 'f1-score': 0.591304347826087, 'support': 62}, 'good': {'precision': 0.8095238095238095, 'recall': 0.8623188405797102, 'f1-score': 0.8350877192982457, 'support': 138}, 'accuracy': 0.765, 'macro avg': {'precision': 0.7255166217430369, 'recall': 0.7053529686769519, 'f1-score': 0.7131960335621663, 'support': 200}, 'weighted avg': {'precision': 0.7574393530997305, 'recall': 0.765, 'f1-score': 0.7595148741418766, 'support': 200}}}


No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_104454/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_104454/
AutoGluon Version:  0.0.12
Train Data Rows:    800
Train Data Columns: 20
Preprocessing data ...
Train Data Class Count: 4
Feature Generator processed 800 data points with 19 features
Original Features (raw dtypes):
	float64 features: 7
	object features: 12
Original Features (inferred dtypes):
	float features: 7
	object features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 7
	category features: 12
Final Features:
	float features: 7
	category features: 12
	Data preprocessing and feature engineering runtime = 0.11s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.8080878915381019, 'classification_report': {'bad': {'precision': 0.6415094339622641, 'recall': 0.5483870967741935, 'f1-score': 0.591304347826087, 'support': 62}, 'good': {'precision': 0.8095238095238095, 'recall': 0.8623188405797102, 'f1-score': 0.8350877192982457, 'support': 138}, 'accuracy': 0.765, 'macro avg': {'precision': 0.7255166217430369, 'recall': 0.7053529686769519, 'f1-score': 0.7131960335621663, 'support': 200}, 'weighted avg': {'precision': 0.7574393530997305, 'recall': 0.765, 'f1-score': 0.7595148741418766, 'support': 200}}} 

Cleaning didnt't improve the overall score 





Data pickle file already exists and is up to date.


Dataset: credit-g
Found 13 categorical and 7 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   18.8s finished



Generating corrupted training data on 200 rows... 

	perturbation: MissingValues: {'column': 'credit_amount', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'other_payment_plans', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'duration', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'property_magnitude', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'credit_history', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.7823749415614774, 'classification_report': {'bad': {'precision': 0.7096774193548387, 'recall': 0.3548387096774194, 'f1-score': 0.4731182795698926, 'support': 62}, 'good': {'precision': 0.7633136094674556, 'recall': 0.9347826086956

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_105309/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_105309/
AutoGluon Version:  0.0.12
Train Data Rows:    800
Train Data Columns: 20
Preprocessing data ...
Train Data Class Count: 4
Feature Generator processed 800 data points with 19 features
Original Features (raw dtypes):
	float64 features: 7
	object features: 12
Original Features (inferred dtypes):
	float features: 7
	object features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 7
	category features: 12
Final Features:
	float features: 7
	category features: 12
	Data preprocessing and feature engineering runtime = 0.11s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.7823749415614774, 'classification_report': {'bad': {'precision': 0.7096774193548387, 'recall': 0.3548387096774194, 'f1-score': 0.4731182795698926, 'support': 62}, 'good': {'precision': 0.7633136094674556, 'recall': 0.9347826086956522, 'f1-score': 0.8403908794788274, 'support': 138}, 'accuracy': 0.755, 'macro avg': {'precision': 0.7364955144111471, 'recall': 0.6448106591865358, 'f1-score': 0.65675457952436, 'support': 200}, 'weighted avg': {'precision': 0.7466863905325444, 'recall': 0.755, 'f1-score': 0.7265363735070577, 'support': 200}}}


No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_105959/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_105959/
AutoGluon Version:  0.0.12
Train Data Rows:    800
Train Data Columns: 20
Preprocessing data ...
Train Data Class Count: 4
Feature Generator processed 800 data points with 19 features
Original Features (raw dtypes):
	float64 features: 7
	object features: 12
Original Features (inferred dtypes):
	float features: 7
	object features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 7
	category features: 12
Final Features:
	float features: 7
	category features: 12
	Data preprocessing and feature engineering runtime = 0.12s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.7823749415614774, 'classification_report': {'bad': {'precision': 0.7096774193548387, 'recall': 0.3548387096774194, 'f1-score': 0.4731182795698926, 'support': 62}, 'good': {'precision': 0.7633136094674556, 'recall': 0.9347826086956522, 'f1-score': 0.8403908794788274, 'support': 138}, 'accuracy': 0.755, 'macro avg': {'precision': 0.7364955144111471, 'recall': 0.6448106591865358, 'f1-score': 0.65675457952436, 'support': 200}, 'weighted avg': {'precision': 0.7466863905325444, 'recall': 0.755, 'f1-score': 0.7265363735070577, 'support': 200}}} 

Cleaning didnt't improve the overall score 





..................................ITERATION..................................



Data pickle file already exists and is up to date.


Dataset: parkinsons
Found 0 categorical and 22 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    2.9s finished



Generating corrupted training data on 39 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'V16', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'V15', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'V9', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.9870129870129869, 'classification_report': {'1': {'precision': 0.7857142857142857, 'recall': 1.0, 'f1-score': 0.88, 'support': 11}, '2': {'precision': 1.0, 'recall': 0.8928571428571429, 'f1-score': 0.9433962264150945, 'support': 28}, 'accuracy': 0.9230769230769231, 'macro avg': {'precision'

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_110704/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_110704/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-21.472	 = Validation root_mean_squared_error score
	0.62s	 = Training runti

[1000]	train_set's rmse: 1.20743e-05	valid_set's rmse: 0.0014217


	-0.0014	 = Validation root_mean_squared_error score
	1.22s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0007	 = Validation root_mean_squared_error score
	0.48s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 8.02s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_110730/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_110730/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 0.000129447	valid_set's rmse: 0.0030499


	-0.003	 = Validation root_mean_squared_error score
	0.99s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0018	 = Validation root_mean_squared_error score
	0.58s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.38s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_110805/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_110805/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.09s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.00139469	valid_set's rmse: 0.0302898


	-0.0214	 = Validation root_mean_squared_error score
	3.81s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0449	 = Validation root_mean_squared_error score
	3.8s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...
	-0.028	 = Validation root_mean_squared_error score
	0.4s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.018	 = Validation root_mean_squared_error score
	0.46s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 11.25s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_110853/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_110853/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64

[1000]	train_set's rmse: 0.00455953	valid_set's rmse: 0.0585091
[2000]	train_set's rmse: 0.00165458	valid_set's rmse: 0.0582908


	-0.0583	 = Validation root_mean_squared_error score
	0.59s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: CatboostRegressor ...
	-0.0522	 = Validation root_mean_squared_error score
	1.1s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0461	 = Validation root_mean_squared_error score
	1.57s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...


[1000]	train_set's rmse: 0.00027099	valid_set's rmse: 0.0578018


	-0.0578	 = Validation root_mean_squared_error score
	0.84s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0442	 = Validation root_mean_squared_error score
	0.53s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.18s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_110908/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_110908/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.08s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 4.10915e-05	valid_set's rmse: 0.0639365


	-0.0639	 = Validation root_mean_squared_error score
	0.98s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0589	 = Validation root_mean_squared_error score
	0.57s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.39s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_110930/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_110930/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.08s ...
AutoGluon will gauge predictive perform

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9902597402597403, 'classification_report': {'1': {'precision': 0.9166666666666666, 'recall': 1.0, 'f1-score': 0.9565217391304348, 'support': 11}, '2': {'precision': 1.0, 'recall': 0.9642857142857143, 'f1-score': 0.9818181818181818, 'support': 28}, 'accuracy': 0.9743589743589743, 'macro avg': {'precision': 0.9583333333333333, 'recall': 0.9821428571428572, 'f1-score': 0.9691699604743083, 'support': 39}, 'weighted avg': {'precision': 0.9764957264957264, 'recall': 0.9743589743589743, 'f1-score': 0.9746832877267659, 'support': 39}}}


No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_110946/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_110946/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-21.4135	 = Validation root_mean_squared_error score
	0.63s	 = Training runt

[1000]	train_set's rmse: 1.20743e-05	valid_set's rmse: 0.0014217


	-0.0014	 = Validation root_mean_squared_error score
	1.21s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0008	 = Validation root_mean_squared_error score
	0.5s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.43s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111011/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111011/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.000129447	valid_set's rmse: 0.0030499


	-0.003	 = Validation root_mean_squared_error score
	0.98s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0018	 = Validation root_mean_squared_error score
	0.58s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.43s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111050/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111050/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.00139469	valid_set's rmse: 0.0302898


	-0.0214	 = Validation root_mean_squared_error score
	3.74s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0131	 = Validation root_mean_squared_error score
	2.48s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...
	-0.028	 = Validation root_mean_squared_error score
	0.38s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0131	 = Validation root_mean_squared_error score
	0.5s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.85s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111136/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111136/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float6

[1000]	train_set's rmse: 0.00455953	valid_set's rmse: 0.0585091
[2000]	train_set's rmse: 0.00165458	valid_set's rmse: 0.0582908


	-0.0583	 = Validation root_mean_squared_error score
	0.58s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: CatboostRegressor ...
	-0.0522	 = Validation root_mean_squared_error score
	1.11s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0506	 = Validation root_mean_squared_error score
	1.78s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...


[1000]	train_set's rmse: 0.00027099	valid_set's rmse: 0.0578018


	-0.0578	 = Validation root_mean_squared_error score
	0.85s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0462	 = Validation root_mean_squared_error score
	0.43s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.3s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111150/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111150/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 4.10915e-05	valid_set's rmse: 0.0639365


	-0.0639	 = Validation root_mean_squared_error score
	0.98s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0602	 = Validation root_mean_squared_error score
	0.56s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 6.98s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111212/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111212/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9902597402597403, 'classification_report': {'1': {'precision': 0.9166666666666666, 'recall': 1.0, 'f1-score': 0.9565217391304348, 'support': 11}, '2': {'precision': 1.0, 'recall': 0.9642857142857143, 'f1-score': 0.9818181818181818, 'support': 28}, 'accuracy': 0.9743589743589743, 'macro avg': {'precision': 0.9583333333333333, 'recall': 0.9821428571428572, 'f1-score': 0.9691699604743083, 'support': 39}, 'weighted avg': {'precision': 0.9764957264957264, 'recall': 0.9743589743589743, 'f1-score': 0.9746832877267659, 'support': 39}}} 

Cleaning improved the overall score 





Data pickle file already exists and is up to date.


Dataset: parkinsons
Found 0 categorical and 22 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   14.8s finished



Generating corrupted training data on 39 rows... 

	perturbation: MissingValues: {'column': 'V2', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'V16', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'V1', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'V15', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'V9', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.9967532467532467, 'classification_report': {'1': {'precision': 1.0, 'recall': 0.9090909090909091, 'f1-score': 0.9523809523809523, 'support': 11}, '2': {'precision': 0.9655172413793104, 'recall': 1.0, 'f1-score': 0.9824561403508771, 'support': 28}, 'accuracy': 0.9743589743589743, 'macro avg'

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111243/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111243/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-21.6076	 = Validation root_mean_squared_error score
	0.62s	 = Training runt

[1000]	train_set's rmse: 1.20743e-05	valid_set's rmse: 0.0014217


	-0.0014	 = Validation root_mean_squared_error score
	1.23s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0007	 = Validation root_mean_squared_error score
	0.5s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.42s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111308/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111308/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.08s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.000129447	valid_set's rmse: 0.0030499


	-0.003	 = Validation root_mean_squared_error score
	0.97s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0017	 = Validation root_mean_squared_error score
	0.49s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.19s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111342/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111342/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.00139469	valid_set's rmse: 0.0302898


	-0.0214	 = Validation root_mean_squared_error score
	3.72s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0499	 = Validation root_mean_squared_error score
	2.47s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...
	-0.028	 = Validation root_mean_squared_error score
	0.37s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.018	 = Validation root_mean_squared_error score
	0.53s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.92s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111427/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111427/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float6

[1000]	train_set's rmse: 0.00455953	valid_set's rmse: 0.0585091
[2000]	train_set's rmse: 0.00165458	valid_set's rmse: 0.0582908


	-0.0583	 = Validation root_mean_squared_error score
	0.57s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: CatboostRegressor ...
	-0.0522	 = Validation root_mean_squared_error score
	1.07s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0476	 = Validation root_mean_squared_error score
	1.52s	 = Training runtime
	0.02s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...


[1000]	train_set's rmse: 0.00027099	valid_set's rmse: 0.0578018


	-0.0578	 = Validation root_mean_squared_error score
	0.85s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0465	 = Validation root_mean_squared_error score
	0.46s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.0s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111441/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111441/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 4.10915e-05	valid_set's rmse: 0.0639365


	-0.0639	 = Validation root_mean_squared_error score
	0.96s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0592	 = Validation root_mean_squared_error score
	0.5s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.54s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111502/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111502/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.08s ...
AutoGluon will gauge predictive performa

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9967532467532467, 'classification_report': {'1': {'precision': 1.0, 'recall': 0.9090909090909091, 'f1-score': 0.9523809523809523, 'support': 11}, '2': {'precision': 0.9655172413793104, 'recall': 1.0, 'f1-score': 0.9824561403508771, 'support': 28}, 'accuracy': 0.9743589743589743, 'macro avg': {'precision': 0.9827586206896552, 'recall': 0.9545454545454546, 'f1-score': 0.9674185463659147, 'support': 39}, 'weighted avg': {'precision': 0.9752431476569408, 'recall': 0.9743589743589743, 'f1-score': 0.9739733950260265, 'support': 39}}}


No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111518/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111518/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-21.4135	 = Validation root_mean_squared_error score
	0.62s	 = Training runt

[1000]	train_set's rmse: 1.20743e-05	valid_set's rmse: 0.0014217


	-0.0014	 = Validation root_mean_squared_error score
	1.19s	 = Training runtime
	0.02s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0008	 = Validation root_mean_squared_error score
	0.47s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.89s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111543/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111543/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 0.000129447	valid_set's rmse: 0.0030499


	-0.003	 = Validation root_mean_squared_error score
	0.95s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0018	 = Validation root_mean_squared_error score
	0.54s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9.13s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111618/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111618/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive performa

[1000]	train_set's rmse: 0.00139469	valid_set's rmse: 0.0302898


	-0.0214	 = Validation root_mean_squared_error score
	3.68s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.0448	 = Validation root_mean_squared_error score
	2.96s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...
	-0.028	 = Validation root_mean_squared_error score
	0.39s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.018	 = Validation root_mean_squared_error score
	0.54s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 10.35s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111704/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111704/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float

[1000]	train_set's rmse: 0.00455953	valid_set's rmse: 0.0585091
[2000]	train_set's rmse: 0.00165458	valid_set's rmse: 0.0582908


	-0.0583	 = Validation root_mean_squared_error score
	0.59s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: CatboostRegressor ...
	-0.0522	 = Validation root_mean_squared_error score
	1.07s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetRegressor ...
	-0.051	 = Validation root_mean_squared_error score
	1.81s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMRegressorCustom ...


[1000]	train_set's rmse: 0.00027099	valid_set's rmse: 0.0578018


	-0.0578	 = Validation root_mean_squared_error score
	0.86s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0471	 = Validation root_mean_squared_error score
	0.51s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 7.36s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111718/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111718/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.07s ...
AutoGluon will gauge predictive perform

[1000]	train_set's rmse: 4.10915e-05	valid_set's rmse: 0.0639365


	-0.0639	 = Validation root_mean_squared_error score
	0.94s	 = Training runtime
	0.01s	 = Validation runtime
Fitting model: weighted_ensemble_k0_l1 ...
	-0.0605	 = Validation root_mean_squared_error score
	0.47s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 6.77s ...
No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111739/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111739/
AutoGluon Version:  0.0.12
Train Data Rows:    156
Train Data Columns: 22
Preprocessing data ...
Feature Generator processed 156 data points with 21 features
Original Features (raw dtypes):
	float64 features: 21
Original Features (inferred dtypes):
	float features: 21
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 21
Final Features:
	float features: 21
	Data preprocessing and feature engineering runtime = 0.08s ...
AutoGluon will gauge predictive perform


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9967532467532467, 'classification_report': {'1': {'precision': 1.0, 'recall': 0.9090909090909091, 'f1-score': 0.9523809523809523, 'support': 11}, '2': {'precision': 0.9655172413793104, 'recall': 1.0, 'f1-score': 0.9824561403508771, 'support': 28}, 'accuracy': 0.9743589743589743, 'macro avg': {'precision': 0.9827586206896552, 'recall': 0.9545454545454546, 'f1-score': 0.9674185463659147, 'support': 39}, 'weighted avg': {'precision': 0.9752431476569408, 'recall': 0.9743589743589743, 'f1-score': 0.9739733950260265, 'support': 39}}} 

Cleaning didnt't improve the overall score 





Data pickle file already exists and is up to date.


Dataset: heart-statlog
Found 0 categorical and 13 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    2.7s finished



Generating corrupted training data on 54 rows... 

	perturbation: MissingValues: {'column': 'age', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'maximum_heart_rate_achieved', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'age', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'maximum_heart_rate_achieved', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'serum_cholestoral', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.9354395604395604, 'classification_report': {'absent': {'precision': 0.7878787878787878, 'recall': 1.0, 'f1-score': 0.8813559322033898, 'support': 26}, 'present': {'precision': 1.0, 'recall': 0.75, 'f1-score': 0.857142857142857

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_111758/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_111758/
AutoGluon Version:  0.0.12
Train Data Rows:    216
Train Data Columns: 13
Preprocessing data ...
Feature Generator processed 216 data points with 12 features
Original Features (raw dtypes):
	float64 features: 12
Original Features (inferred dtypes):
	float features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 12
Final Features:
	float features: 12
	Data preprocessing and feature engineering runtime = 0.05s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-9.0409	 = Validation root_mean_squared_error score
	0.63s	 = Training runti

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9354395604395604, 'classification_report': {'absent': {'precision': 0.8620689655172413, 'recall': 0.9615384615384616, 'f1-score': 0.9090909090909091, 'support': 26}, 'present': {'precision': 0.96, 'recall': 0.8571428571428571, 'f1-score': 0.9056603773584904, 'support': 28}, 'accuracy': 0.9074074074074074, 'macro avg': {'precision': 0.9110344827586206, 'recall': 0.9093406593406593, 'f1-score': 0.9073756432246998, 'support': 54}, 'weighted avg': {'precision': 0.9128480204342273, 'recall': 0.9074074074074074, 'f1-score': 0.9073121148592846, 'support': 54}}}


Feature Generator processed 216 data points with 12 features
Original Features (raw dtypes):
	float64 features: 12
Original Features (inferred dtypes):
	float features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 12
Final Features:
	float features: 12
	Data preprocessing and feature engineering runtime = 0.05s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-8.9114	 = Validation root_mean_squared_error score
	0.63s	 = Training runtime
	0.11s	 = Validation runtime
Fitting model: ExtraTreesRegressorMSE ...
	-8.7067	 = Validation root_mean_squared_error score
	0.4s	 = Training runtime
	0.11s	 = Validation runtime
Fitting model: KNeighborsRegressorUnif ...
	-9.6402	 = Validation root_mean_squared_error score
	0.01s	 = 


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9354395604395604, 'classification_report': {'absent': {'precision': 0.8620689655172413, 'recall': 0.9615384615384616, 'f1-score': 0.9090909090909091, 'support': 26}, 'present': {'precision': 0.96, 'recall': 0.8571428571428571, 'f1-score': 0.9056603773584904, 'support': 28}, 'accuracy': 0.9074074074074074, 'macro avg': {'precision': 0.9110344827586206, 'recall': 0.9093406593406593, 'f1-score': 0.9073756432246998, 'support': 54}, 'weighted avg': {'precision': 0.9128480204342273, 'recall': 0.9074074074074074, 'f1-score': 0.9073121148592846, 'support': 54}}} 

Cleaning didnt't improve the overall score 





Data pickle file already exists and is up to date.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


Dataset: heart-statlog
Found 0 categorical and 13 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   13.3s finished



Generating corrupted training data on 54 rows... 

	perturbation: MissingValues: {'column': 'age', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'maximum_heart_rate_achieved', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'age', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'maximum_heart_rate_achieved', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'serum_cholestoral', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.9381868131868132, 'classification_report': {'absent': {'precision': 0.8125, 'recall': 1.0, 'f1-score': 0.896551724137931, 'support': 26}, 'present': {'precision': 1.0, 'recall': 0.7857142857142857, 'f1-score': 0.88, 'support':

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_112004/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_112004/
AutoGluon Version:  0.0.12
Train Data Rows:    216
Train Data Columns: 13
Preprocessing data ...
Feature Generator processed 216 data points with 12 features
Original Features (raw dtypes):
	float64 features: 12
Original Features (inferred dtypes):
	float features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 12
Final Features:
	float features: 12
	Data preprocessing and feature engineering runtime = 0.05s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-9.0894	 = Validation root_mean_squared_error score
	0.52s	 = Training runti

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9326923076923077, 'classification_report': {'absent': {'precision': 0.8064516129032258, 'recall': 0.9615384615384616, 'f1-score': 0.8771929824561403, 'support': 26}, 'present': {'precision': 0.9565217391304348, 'recall': 0.7857142857142857, 'f1-score': 0.8627450980392156, 'support': 28}, 'accuracy': 0.8703703703703703, 'macro avg': {'precision': 0.8814866760168303, 'recall': 0.8736263736263736, 'f1-score': 0.869969040247678, 'support': 54}, 'weighted avg': {'precision': 0.8842657524284453, 'recall': 0.8703703703703703, 'f1-score': 0.8697014868325497, 'support': 54}}}


Feature Generator processed 216 data points with 12 features
Original Features (raw dtypes):
	float64 features: 12
Original Features (inferred dtypes):
	float features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 12
Final Features:
	float features: 12
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performance using evaluation metric: root_mean_squared_error
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: root_mean_squared_error
Fitting model: RandomForestRegressorMSE ...
	-8.9114	 = Validation root_mean_squared_error score
	0.62s	 = Training runtime
	0.11s	 = Validation runtime
Fitting model: ExtraTreesRegressorMSE ...
	-8.7067	 = Validation root_mean_squared_error score
	0.4s	 = Training runtime
	0.11s	 = Validation runtime
Fitting model: KNeighborsRegressorUnif ...
	-9.6402	 = Validation root_mean_squared_error score
	0.01s	 = 


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.9326923076923077, 'classification_report': {'absent': {'precision': 0.8064516129032258, 'recall': 0.9615384615384616, 'f1-score': 0.8771929824561403, 'support': 26}, 'present': {'precision': 0.9565217391304348, 'recall': 0.7857142857142857, 'f1-score': 0.8627450980392156, 'support': 28}, 'accuracy': 0.8703703703703703, 'macro avg': {'precision': 0.8814866760168303, 'recall': 0.8736263736263736, 'f1-score': 0.869969040247678, 'support': 54}, 'weighted avg': {'precision': 0.8842657524284453, 'recall': 0.8703703703703703, 'f1-score': 0.8697014868325497, 'support': 54}}} 

Cleaning didnt't improve the overall score 





Data pickle file already exists and is up to date.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


Dataset: credit-g
Found 13 categorical and 7 numeric features 

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    6.0s finished



Generating corrupted training data on 200 rows... 

	perturbation: MissingValues: {'column': 'credit_amount', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'other_payment_plans', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'duration', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'property_magnitude', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'credit_history', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.8080878915381019, 'classification_report': {'bad': {'precision': 0.6415094339622641, 'recall': 0.5483870967741935, 'f1-score': 0.591304347826087, 'support': 62}, 'good': {'precision': 0.8095238095238095, 'recall': 0.86231884057971

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_112201/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_112201/
AutoGluon Version:  0.0.12
Train Data Rows:    800
Train Data Columns: 20
Preprocessing data ...
Train Data Class Count: 4
Feature Generator processed 800 data points with 19 features
Original Features (raw dtypes):
	float64 features: 7
	object features: 12
Original Features (inferred dtypes):
	float features: 7
	object features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 7
	category features: 12
Final Features:
	float features: 7
	category features: 12
	Data preprocessing and feature engineering runtime = 0.11s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.8080878915381019, 'classification_report': {'bad': {'precision': 0.6415094339622641, 'recall': 0.5483870967741935, 'f1-score': 0.591304347826087, 'support': 62}, 'good': {'precision': 0.8095238095238095, 'recall': 0.8623188405797102, 'f1-score': 0.8350877192982457, 'support': 138}, 'accuracy': 0.765, 'macro avg': {'precision': 0.7255166217430369, 'recall': 0.7053529686769519, 'f1-score': 0.7131960335621663, 'support': 200}, 'weighted avg': {'precision': 0.7574393530997305, 'recall': 0.765, 'f1-score': 0.7595148741418766, 'support': 200}}}


No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_112849/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_112849/
AutoGluon Version:  0.0.12
Train Data Rows:    800
Train Data Columns: 20
Preprocessing data ...
Train Data Class Count: 4
Feature Generator processed 800 data points with 19 features
Original Features (raw dtypes):
	float64 features: 7
	object features: 12
Original Features (inferred dtypes):
	float features: 7
	object features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 7
	category features: 12
Final Features:
	float features: 7
	category features: 12
	Data preprocessing and feature engineering runtime = 0.11s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.8080878915381019, 'classification_report': {'bad': {'precision': 0.6415094339622641, 'recall': 0.5483870967741935, 'f1-score': 0.591304347826087, 'support': 62}, 'good': {'precision': 0.8095238095238095, 'recall': 0.8623188405797102, 'f1-score': 0.8350877192982457, 'support': 138}, 'accuracy': 0.765, 'macro avg': {'precision': 0.7255166217430369, 'recall': 0.7053529686769519, 'f1-score': 0.7131960335621663, 'support': 200}, 'weighted avg': {'precision': 0.7574393530997305, 'recall': 0.765, 'f1-score': 0.7595148741418766, 'support': 200}}} 

Cleaning didnt't improve the overall score 





Data pickle file already exists and is up to date.


Dataset: credit-g
Found 13 categorical and 7 numeric features 

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   19.0s finished



Generating corrupted training data on 200 rows... 

	perturbation: MissingValues: {'column': 'credit_amount', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'other_payment_plans', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}
	perturbation: MissingValues: {'column': 'duration', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MNAR'}
	perturbation: MissingValues: {'column': 'property_magnitude', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MCAR'}
	perturbation: MissingValues: {'column': 'credit_history', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}

Applying cleaners... 

PPP score no cleaning: {'roc_auc_acore': 0.7823749415614774, 'classification_report': {'bad': {'precision': 0.7096774193548387, 'recall': 0.3548387096774194, 'f1-score': 0.4731182795698926, 'support': 62}, 'good': {'precision': 0.7633136094674556, 'recall': 0.9347826086956

No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_113613/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_113613/
AutoGluon Version:  0.0.12
Train Data Rows:    800
Train Data Columns: 20
Preprocessing data ...
Train Data Class Count: 4
Feature Generator processed 800 data points with 19 features
Original Features (raw dtypes):
	float64 features: 7
	object features: 12
Original Features (inferred dtypes):
	float features: 7
	object features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 7
	category features: 12
Final Features:
	float features: 7
	category features: 12
	Data preprocessing and feature engineering runtime = 0.11s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini

Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.7823749415614774, 'classification_report': {'bad': {'precision': 0.7096774193548387, 'recall': 0.3548387096774194, 'f1-score': 0.4731182795698926, 'support': 62}, 'good': {'precision': 0.7633136094674556, 'recall': 0.9347826086956522, 'f1-score': 0.8403908794788274, 'support': 138}, 'accuracy': 0.755, 'macro avg': {'precision': 0.7364955144111471, 'recall': 0.6448106591865358, 'f1-score': 0.65675457952436, 'support': 200}, 'weighted avg': {'precision': 0.7466863905325444, 'recall': 0.755, 'f1-score': 0.7265363735070577, 'support': 200}}}


No output_directory specified. Models will be saved in: AutogluonModels/ag-20200728_114306/
Beginning AutoGluon training ...
AutoGluon will save models to AutogluonModels/ag-20200728_114306/
AutoGluon Version:  0.0.12
Train Data Rows:    800
Train Data Columns: 20
Preprocessing data ...
Train Data Class Count: 4
Feature Generator processed 800 data points with 19 features
Original Features (raw dtypes):
	float64 features: 7
	object features: 12
Original Features (inferred dtypes):
	float features: 7
	object features: 12
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 7
	category features: 12
Final Features:
	float features: 7
	category features: 12
	Data preprocessing and feature engineering runtime = 0.1s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini 


Best cleaning method:
Cleaning score: Cleaner: {'outlier_detection': PyODKNN, 'imputation': AutoGluonImputation}: {'roc_auc_acore': 0.7823749415614774, 'classification_report': {'bad': {'precision': 0.7096774193548387, 'recall': 0.3548387096774194, 'f1-score': 0.4731182795698926, 'support': 62}, 'good': {'precision': 0.7633136094674556, 'recall': 0.9347826086956522, 'f1-score': 0.8403908794788274, 'support': 138}, 'accuracy': 0.755, 'macro avg': {'precision': 0.7364955144111471, 'recall': 0.6448106591865358, 'f1-score': 0.65675457952436, 'support': 200}, 'weighted avg': {'precision': 0.7466863905325444, 'recall': 0.755, 'f1-score': 0.7265363735070577, 'support': 200}}} 

Cleaning didnt't improve the overall score 



CPU times: user 1h 48min 14s, sys: 5min 43s, total: 1h 53min 58s
Wall time: 1h 27min 53s


In [12]:
ind_results

[{'cleaners': [{'Imputation method': AutoGluonImputation,
    'Outlier detection method': PyODKNN,
    'PPP score with cleaning': {'classification_report': {'1': {'f1-score': 0.9565217391304348,
       'precision': 0.9166666666666666,
       'recall': 1.0,
       'support': 11},
      '2': {'f1-score': 0.9818181818181818,
       'precision': 1.0,
       'recall': 0.9642857142857143,
       'support': 28},
      'accuracy': 0.9743589743589743,
      'macro avg': {'f1-score': 0.9691699604743083,
       'precision': 0.9583333333333333,
       'recall': 0.9821428571428572,
       'support': 39},
      'weighted avg': {'f1-score': 0.9746832877267659,
       'precision': 0.9764957264957264,
       'recall': 0.9743589743589743,
       'support': 39}},
     'roc_auc_acore': 0.9902597402597403}}],
  'corruptions': defaultdict(list,
              {('V9',): [MissingValues: {'column': 'V9', 'fraction': 0.426920161499486, 'na_value': nan, 'missingness': 'MAR'}]}),
  'dataset': 'parkinsons',
  'mode

In [13]:
# %%time
# for _ in range(1):
#   print("\n\n..................................ITERATION..................................\n")
#   results = []

#   for dataset in datasets:
#       for learner, param_grid in models.items():
#           for fraction in fractions:
#               results.append(run_experiment(dataset, learner, param_grid, corruptions, fraction, cleaners, 5))

In [14]:
# results