## Adjustment for Google Colab

In [None]:
# mount drive for access to the
from google.colab import drive

In [None]:
drive.mount("/content/drive")

In [None]:
# all the drive the files are present in "/content/drive/My Drive"
!ls "/content/drive/My Drive/Beuth Uni/Master Thesis"

In [None]:
import sys
sys.path.append('/content/drive/My Drive/Beuth Uni/Master Thesis/jenga')

In [None]:
#! pip install openml

In [28]:
import numpy as np
from sklearn.linear_model import SGDClassifier

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

from jenga.basis import Dataset
from jenga.cleaning.ppp import PipelinePerformancePrediction
from jenga.cleaning.clean import Clean

In [2]:
seed = 10

## Dataset

In [3]:
dataset = Dataset(seed, "credit-g")

Dataset 'credit-g', target: 'class'
**Author**: Dr. Hans Hofmann  

**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)) - 1994    

**Please cite**: [UCI](https://archive.ics.uci.edu/ml/citation_policy.html)



**German Credit data**  

This dataset classifies people described by a set of attributes as good or bad credit risks.



This dataset comes with a cost matrix: 

``` 

      Good  Bad (predicted)  

Good   0    1   (actual)  

Bad    5    0  

```



It is worse to class a customer as go

Attribute types: 


Unnamed: 0,attribute_names,categorical_indicator
0,checking_status,True
1,duration,False
2,credit_history,True
3,purpose,True
4,credit_amount,False
5,savings_status,True
6,employment,True
7,installment_commitment,False
8,personal_status,True
9,other_parties,True


In [4]:
all_data = dataset.all_data
all_data

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes,good
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes,bad
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes,good
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes,good
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes,bad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,no checking,12.0,existing paid,furniture/equipment,1736.0,<100,4<=X<7,3.0,female div/dep/mar,none,4.0,real estate,31.0,none,own,1.0,unskilled resident,1.0,none,yes,good
996,<0,30.0,existing paid,used car,3857.0,<100,1<=X<4,4.0,male div/sep,none,4.0,life insurance,40.0,none,own,1.0,high qualif/self emp/mgmt,1.0,yes,yes,good
997,no checking,12.0,existing paid,radio/tv,804.0,<100,>=7,4.0,male single,none,4.0,car,38.0,none,own,1.0,skilled,1.0,none,yes,good
998,<0,45.0,existing paid,radio/tv,1845.0,<100,1<=X<4,4.0,male single,none,4.0,no known property,23.0,none,for free,1.0,skilled,1.0,yes,yes,bad


In [5]:
attribute_names = dataset.attribute_names
attribute_names

['checking_status',
 'duration',
 'credit_history',
 'purpose',
 'credit_amount',
 'savings_status',
 'employment',
 'installment_commitment',
 'personal_status',
 'other_parties',
 'residence_since',
 'property_magnitude',
 'age',
 'other_payment_plans',
 'housing',
 'existing_credits',
 'job',
 'num_dependents',
 'own_telephone',
 'foreign_worker']

In [6]:
attribute_types = dataset.attribute_types
attribute_types

Unnamed: 0,attribute_names,categorical_indicator
0,checking_status,True
1,duration,False
2,credit_history,True
3,purpose,True
4,credit_amount,False
5,savings_status,True
6,employment,True
7,installment_commitment,False
8,personal_status,True
9,other_parties,True


### Categorical and Numerical Features

In [7]:
categorical_columns = dataset.categorical_columns
categorical_columns

['checking_status',
 'credit_history',
 'purpose',
 'savings_status',
 'employment',
 'personal_status',
 'other_parties',
 'property_magnitude',
 'other_payment_plans',
 'housing',
 'job',
 'own_telephone',
 'foreign_worker']

In [8]:
numerical_columns = dataset.numerical_columns
numerical_columns

['duration',
 'credit_amount',
 'installment_commitment',
 'residence_since',
 'age',
 'existing_credits',
 'num_dependents']

In [9]:
print(f"Found {len(categorical_columns)} categorical and {len(numerical_columns)} numeric features")

Found 13 categorical and 7 numeric features


## Model

### Model parameters

In [10]:
## model parameters
learner = SGDClassifier(max_iter=1000)
param_grid = {
    'learner__loss': ['log'],
    'learner__penalty': ['l2', 'l1', 'elasticnet'],
    'learner__alpha': [0.0001, 0.001, 0.01, 0.1]
}

### Preprocessing Pipeline

In [11]:
## preprocessing pipeline for both numerical and categorical columns

# preprocessing pipeline for numerical columns
transformer_numeric = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value=0)),
    ('standard_scale', StandardScaler())
])

# preprocessing pipeline for categorical columns
transformer_categorical = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='__NA__')),
    ('one_hot_encode', OneHotEncoder(handle_unknown='ignore'))
])

# preprocessor
feature_transform = ColumnTransformer(transformers=[
    ('categorical_features', transformer_categorical, categorical_columns),
    ('numerical_features', transformer_numeric, numerical_columns)
])

### Prediction Pipeline

In [12]:
## prediction pipeline: append classifier (learner) to the preprocessing pipeline
pipeline = Pipeline([
    ('features', feature_transform),
    ('learner', learner)
])

### Train and Test Data

In [13]:
train_data, train_labels, test_data, test_labels = dataset.get_train_test_data()

display(train_data.head())
print(train_labels[0:5])

display(test_data.head())
print(test_labels[0:5])

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
188,<0,12.0,existing paid,radio/tv,674.0,100<=X<500,4<=X<7,4.0,male mar/wid,none,1.0,life insurance,20.0,none,own,1.0,skilled,1.0,none,yes
194,0<=X<200,45.0,existing paid,radio/tv,3031.0,100<=X<500,1<=X<4,4.0,male single,guarantor,4.0,life insurance,21.0,none,rent,1.0,skilled,1.0,none,yes
225,no checking,36.0,no credits/all paid,repairs,2613.0,<100,1<=X<4,4.0,male single,none,2.0,car,27.0,none,own,2.0,skilled,1.0,none,yes
580,0<=X<200,18.0,critical/other existing credit,new car,1056.0,<100,>=7,3.0,male single,guarantor,3.0,real estate,30.0,bank,own,2.0,skilled,1.0,none,yes
428,no checking,9.0,existing paid,furniture/equipment,1313.0,<100,>=7,1.0,male single,none,4.0,car,20.0,none,own,1.0,skilled,1.0,none,yes


['bad' 'bad' 'good' 'bad' 'good']


Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
841,no checking,21.0,delayed previously,used car,2993.0,<100,1<=X<4,3.0,male single,none,2.0,real estate,28.0,stores,own,2.0,unskilled resident,1.0,none,yes
956,>=200,30.0,critical/other existing credit,radio/tv,3656.0,no known savings,>=7,4.0,male single,none,4.0,life insurance,49.0,stores,own,2.0,unskilled resident,1.0,none,yes
544,no checking,12.0,critical/other existing credit,new car,1255.0,<100,>=7,4.0,male single,none,4.0,real estate,61.0,none,own,2.0,unskilled resident,1.0,none,yes
173,0<=X<200,8.0,existing paid,radio/tv,1414.0,<100,1<=X<4,4.0,male single,guarantor,2.0,real estate,33.0,none,own,1.0,skilled,1.0,none,no
759,<0,12.0,critical/other existing credit,new car,691.0,<100,>=7,4.0,male single,none,3.0,life insurance,35.0,none,own,2.0,skilled,1.0,none,yes


['good' 'good' 'good' 'good' 'bad']


## PPP: Pipeline Performance Prediction

In [14]:
ppp = PipelinePerformancePrediction(seed, train_data, train_labels, test_data, test_labels, categorical_columns, numerical_columns, learner, param_grid, pipeline)

In [15]:
# model trained on original train data
ppp_model = ppp.fit_ppp(train_data)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done  53 out of  60 | elapsed:    7.4s remaining:    0.9s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    7.5s finished


In [16]:
# score on original test data
ppp_model_score = ppp.predict_score_ppp(ppp_model, test_data)
ppp_model_score

0.8093735390369332

## Corruptions

In [17]:
# generate corrpted test data
test_data_corrupted, perturbations, cols_perturbed = ppp.get_corrupted(test_data)

Generating corrupted training data on 200 rows...
Applying perturbations...
GaussianNoise: {'column': 'residence_since', 'fraction': 0.25} on column ['residence_since']
MissingValues: {'column': 'credit_history', 'fraction': 0.5, 'na_value': nan, 'missingness': 'MAR'} on column ['credit_history']
GaussianNoise: {'column': 'duration', 'fraction': 0.25} on column ['duration']
MissingValues: {'column': 'credit_amount', 'fraction': 0.75, 'na_value': nan, 'missingness': 'MAR'} on column ['credit_amount']
SwappedValues: {'column_a': 'housing', 'column_b': 'personal_status', 'fraction': 0.25} on column ['housing', 'personal_status']


## Cleaning

In [18]:
clean = Clean(train_data, test_data_corrupted, categorical_columns, numerical_columns, ppp, ppp_model)

In [19]:
test_data_cleaned, score_no_cleaning, cleaner_scores_ppp = clean(train_data, test_data_corrupted)

2020-06-23 23:46:39,593 [INFO]  CategoricalEncoder for column checking_status                                found only 44 occurrences of value >=200


PPP score no cleaning: 0.7850631136044881
PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A68EEF0>: 0.7850631136044881
PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A6A7C50>: 0.7911407199625993
Fitting model for column: checking_status


2020-06-23 23:46:40,439 [INFO]  
2020-06-23 23:46:40,812 [INFO]  Epoch[0] Batch [0-23]	Speed: 1095.01 samples/sec	cross-entropy=1.273606	checking_status-accuracy=0.364583
2020-06-23 23:46:41,197 [INFO]  Epoch[0] Train-cross-entropy=1.233387
2020-06-23 23:46:41,199 [INFO]  Epoch[0] Train-checking_status-accuracy=0.368056
2020-06-23 23:46:41,200 [INFO]  Epoch[0] Time cost=0.753
2020-06-23 23:46:41,210 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:46:41,271 [INFO]  Epoch[0] Validation-cross-entropy=1.030261
2020-06-23 23:46:41,272 [INFO]  Epoch[0] Validation-checking_status-accuracy=0.550000
2020-06-23 23:46:41,632 [INFO]  Epoch[1] Batch [0-23]	Speed: 1080.29 samples/sec	cross-entropy=1.132693	checking_status-accuracy=0.442708
2020-06-23 23:46:41,933 [INFO]  Epoch[1] Train-cross-entropy=1.134775
2020-06-23 23:46:41,934 [INFO]  Epoch[1] Train-checking_status-accuracy=0.441667
2020-06-23 23:46:41,937 [INFO]  Epoch[1] Time cost=0.664
2020-06-23 23:46:41,944 [INF

Fitting model for column: credit_history


2020-06-23 23:46:51,172 [INFO]  
2020-06-23 23:46:51,602 [INFO]  Epoch[0] Batch [0-23]	Speed: 978.72 samples/sec	cross-entropy=1.097994	credit_history-accuracy=0.572917
2020-06-23 23:46:51,903 [INFO]  Epoch[0] Train-cross-entropy=1.055214
2020-06-23 23:46:51,905 [INFO]  Epoch[0] Train-credit_history-accuracy=0.619444
2020-06-23 23:46:51,906 [INFO]  Epoch[0] Time cost=0.727
2020-06-23 23:46:51,916 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:46:51,976 [INFO]  Epoch[0] Validation-cross-entropy=0.863597
2020-06-23 23:46:51,977 [INFO]  Epoch[0] Validation-credit_history-accuracy=0.700000
2020-06-23 23:46:52,376 [INFO]  Epoch[1] Batch [0-23]	Speed: 973.51 samples/sec	cross-entropy=0.869200	credit_history-accuracy=0.705729
2020-06-23 23:46:52,729 [INFO]  Epoch[1] Train-cross-entropy=0.898504
2020-06-23 23:46:52,731 [INFO]  Epoch[1] Train-credit_history-accuracy=0.701389
2020-06-23 23:46:52,733 [INFO]  Epoch[1] Time cost=0.755
2020-06-23 23:46:52,745 [INFO]  Sav

Fitting model for column: purpose


2020-06-23 23:46:57,864 [INFO]  
2020-06-23 23:46:58,387 [INFO]  Epoch[0] Batch [0-23]	Speed: 780.89 samples/sec	cross-entropy=1.862968	purpose-accuracy=0.255208
2020-06-23 23:46:58,779 [INFO]  Epoch[0] Train-cross-entropy=1.784728
2020-06-23 23:46:58,780 [INFO]  Epoch[0] Train-purpose-accuracy=0.287500
2020-06-23 23:46:58,782 [INFO]  Epoch[0] Time cost=0.907
2020-06-23 23:46:58,793 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:46:58,852 [INFO]  Epoch[0] Validation-cross-entropy=1.736570
2020-06-23 23:46:58,854 [INFO]  Epoch[0] Validation-purpose-accuracy=0.362500
2020-06-23 23:46:59,300 [INFO]  Epoch[1] Batch [0-23]	Speed: 866.16 samples/sec	cross-entropy=1.605714	purpose-accuracy=0.294271
2020-06-23 23:46:59,664 [INFO]  Epoch[1] Train-cross-entropy=1.607384
2020-06-23 23:46:59,666 [INFO]  Epoch[1] Train-purpose-accuracy=0.326389
2020-06-23 23:46:59,667 [INFO]  Epoch[1] Time cost=0.811
2020-06-23 23:46:59,681 [INFO]  Saved checkpoint to "imputer_model\mod

Fitting model for column: savings_status


2020-06-23 23:47:11,280 [INFO]  
2020-06-23 23:47:11,669 [INFO]  Epoch[0] Batch [0-23]	Speed: 1039.39 samples/sec	cross-entropy=1.113137	savings_status-accuracy=0.635417
2020-06-23 23:47:12,012 [INFO]  Epoch[0] Train-cross-entropy=1.168631
2020-06-23 23:47:12,014 [INFO]  Epoch[0] Train-savings_status-accuracy=0.601389
2020-06-23 23:47:12,016 [INFO]  Epoch[0] Time cost=0.731
2020-06-23 23:47:12,024 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:47:12,089 [INFO]  Epoch[0] Validation-cross-entropy=1.262121
2020-06-23 23:47:12,091 [INFO]  Epoch[0] Validation-savings_status-accuracy=0.487500
2020-06-23 23:47:12,476 [INFO]  Epoch[1] Batch [0-23]	Speed: 1065.88 samples/sec	cross-entropy=0.976565	savings_status-accuracy=0.658854
2020-06-23 23:47:12,790 [INFO]  Epoch[1] Train-cross-entropy=1.068405
2020-06-23 23:47:12,791 [INFO]  Epoch[1] Train-savings_status-accuracy=0.613889
2020-06-23 23:47:12,793 [INFO]  Epoch[1] Time cost=0.700
2020-06-23 23:47:12,802 [INFO]  S

Fitting model for column: employment


2020-06-23 23:47:19,176 [INFO]  
2020-06-23 23:47:19,552 [INFO]  Epoch[0] Batch [0-23]	Speed: 1100.31 samples/sec	cross-entropy=1.455901	employment-accuracy=0.348958
2020-06-23 23:47:19,871 [INFO]  Epoch[0] Train-cross-entropy=1.389834
2020-06-23 23:47:19,873 [INFO]  Epoch[0] Train-employment-accuracy=0.386111
2020-06-23 23:47:19,876 [INFO]  Epoch[0] Time cost=0.693
2020-06-23 23:47:19,885 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:47:19,950 [INFO]  Epoch[0] Validation-cross-entropy=1.207753
2020-06-23 23:47:19,952 [INFO]  Epoch[0] Validation-employment-accuracy=0.475000
2020-06-23 23:47:20,337 [INFO]  Epoch[1] Batch [0-23]	Speed: 1002.67 samples/sec	cross-entropy=1.273658	employment-accuracy=0.419271
2020-06-23 23:47:20,650 [INFO]  Epoch[1] Train-cross-entropy=1.271087
2020-06-23 23:47:20,652 [INFO]  Epoch[1] Train-employment-accuracy=0.434722
2020-06-23 23:47:20,655 [INFO]  Epoch[1] Time cost=0.700
2020-06-23 23:47:20,664 [INFO]  Saved checkpoint to "

Fitting model for column: personal_status


2020-06-23 23:47:28,561 [INFO]  
2020-06-23 23:47:29,113 [INFO]  Epoch[0] Batch [0-23]	Speed: 949.74 samples/sec	cross-entropy=1.077722	personal_status-accuracy=0.570312
2020-06-23 23:47:29,524 [INFO]  Epoch[0] Train-cross-entropy=0.997937
2020-06-23 23:47:29,527 [INFO]  Epoch[0] Train-personal_status-accuracy=0.588889
2020-06-23 23:47:29,529 [INFO]  Epoch[0] Time cost=0.960
2020-06-23 23:47:29,550 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:47:29,652 [INFO]  Epoch[0] Validation-cross-entropy=0.904025
2020-06-23 23:47:29,655 [INFO]  Epoch[0] Validation-personal_status-accuracy=0.587500
2020-06-23 23:47:30,139 [INFO]  Epoch[1] Batch [0-23]	Speed: 803.89 samples/sec	cross-entropy=0.897072	personal_status-accuracy=0.614583
2020-06-23 23:47:30,466 [INFO]  Epoch[1] Train-cross-entropy=0.883001
2020-06-23 23:47:30,468 [INFO]  Epoch[1] Train-personal_status-accuracy=0.629167
2020-06-23 23:47:30,469 [INFO]  Epoch[1] Time cost=0.813
2020-06-23 23:47:30,478 [INFO]

Fitting model for column: other_parties


2020-06-23 23:47:37,916 [INFO]  
2020-06-23 23:47:38,331 [INFO]  Epoch[0] Batch [0-23]	Speed: 978.68 samples/sec	cross-entropy=0.558261	other_parties-accuracy=0.867188
2020-06-23 23:47:38,631 [INFO]  Epoch[0] Train-cross-entropy=0.474525
2020-06-23 23:47:38,634 [INFO]  Epoch[0] Train-other_parties-accuracy=0.884722
2020-06-23 23:47:38,637 [INFO]  Epoch[0] Time cost=0.714
2020-06-23 23:47:38,647 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:47:38,708 [INFO]  Epoch[0] Validation-cross-entropy=0.339532
2020-06-23 23:47:38,709 [INFO]  Epoch[0] Validation-other_parties-accuracy=0.912500
2020-06-23 23:47:39,038 [INFO]  Epoch[1] Batch [0-23]	Speed: 1182.64 samples/sec	cross-entropy=0.355308	other_parties-accuracy=0.898438
2020-06-23 23:47:39,346 [INFO]  Epoch[1] Train-cross-entropy=0.351977
2020-06-23 23:47:39,348 [INFO]  Epoch[1] Train-other_parties-accuracy=0.901389
2020-06-23 23:47:39,350 [INFO]  Epoch[1] Time cost=0.639
2020-06-23 23:47:39,360 [INFO]  Saved c

Fitting model for column: property_magnitude


2020-06-23 23:47:45,329 [INFO]  
2020-06-23 23:47:45,741 [INFO]  Epoch[0] Batch [0-23]	Speed: 970.94 samples/sec	cross-entropy=1.286015	property_magnitude-accuracy=0.367188
2020-06-23 23:47:46,064 [INFO]  Epoch[0] Train-cross-entropy=1.257152
2020-06-23 23:47:46,066 [INFO]  Epoch[0] Train-property_magnitude-accuracy=0.388889
2020-06-23 23:47:46,068 [INFO]  Epoch[0] Time cost=0.733
2020-06-23 23:47:46,077 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:47:46,139 [INFO]  Epoch[0] Validation-cross-entropy=1.115152
2020-06-23 23:47:46,142 [INFO]  Epoch[0] Validation-property_magnitude-accuracy=0.475000
2020-06-23 23:47:46,608 [INFO]  Epoch[1] Batch [0-23]	Speed: 843.36 samples/sec	cross-entropy=1.137664	property_magnitude-accuracy=0.429688
2020-06-23 23:47:46,931 [INFO]  Epoch[1] Train-cross-entropy=1.151788
2020-06-23 23:47:46,933 [INFO]  Epoch[1] Train-property_magnitude-accuracy=0.444444
2020-06-23 23:47:46,935 [INFO]  Epoch[1] Time cost=0.789
2020-06-23 23:4

2020-06-23 23:47:57,091 [INFO]  Epoch[13] Time cost=0.757
2020-06-23 23:47:57,100 [INFO]  Saved checkpoint to "imputer_model\model-0013.params"
2020-06-23 23:47:57,176 [INFO]  Epoch[13] Validation-cross-entropy=1.048917
2020-06-23 23:47:57,179 [INFO]  Epoch[13] Validation-property_magnitude-accuracy=0.525000
2020-06-23 23:47:57,538 [INFO]  Epoch[14] Batch [0-23]	Speed: 1108.05 samples/sec	cross-entropy=0.917490	property_magnitude-accuracy=0.591146
2020-06-23 23:47:57,853 [INFO]  Epoch[14] Train-cross-entropy=0.923863
2020-06-23 23:47:57,856 [INFO]  Epoch[14] Train-property_magnitude-accuracy=0.600000
2020-06-23 23:47:57,859 [INFO]  Epoch[14] Time cost=0.678
2020-06-23 23:47:57,867 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:47:57,931 [INFO]  Epoch[14] Validation-cross-entropy=1.047743
2020-06-23 23:47:57,933 [INFO]  Epoch[14] Validation-property_magnitude-accuracy=0.525000
2020-06-23 23:47:58,340 [INFO]  Epoch[15] Batch [0-23]	Speed: 947.23 samples/sec	c

Fitting model for column: other_payment_plans


2020-06-23 23:48:08,065 [INFO]  
2020-06-23 23:48:08,461 [INFO]  Epoch[0] Batch [0-23]	Speed: 1020.60 samples/sec	cross-entropy=0.732956	other_payment_plans-accuracy=0.786458
2020-06-23 23:48:08,790 [INFO]  Epoch[0] Train-cross-entropy=0.671067
2020-06-23 23:48:08,792 [INFO]  Epoch[0] Train-other_payment_plans-accuracy=0.794444
2020-06-23 23:48:08,794 [INFO]  Epoch[0] Time cost=0.722
2020-06-23 23:48:08,802 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:48:08,869 [INFO]  Epoch[0] Validation-cross-entropy=0.513591
2020-06-23 23:48:08,872 [INFO]  Epoch[0] Validation-other_payment_plans-accuracy=0.850000
2020-06-23 23:48:09,284 [INFO]  Epoch[1] Batch [0-23]	Speed: 934.13 samples/sec	cross-entropy=0.540247	other_payment_plans-accuracy=0.817708
2020-06-23 23:48:09,570 [INFO]  Epoch[1] Train-cross-entropy=0.549400
2020-06-23 23:48:09,572 [INFO]  Epoch[1] Train-other_payment_plans-accuracy=0.811111
2020-06-23 23:48:09,576 [INFO]  Epoch[1] Time cost=0.701
2020-06-2

Fitting model for column: housing


2020-06-23 23:48:15,730 [INFO]  
2020-06-23 23:48:16,134 [INFO]  Epoch[0] Batch [0-23]	Speed: 990.52 samples/sec	cross-entropy=0.808385	housing-accuracy=0.690104
2020-06-23 23:48:16,422 [INFO]  Epoch[0] Train-cross-entropy=0.731997
2020-06-23 23:48:16,425 [INFO]  Epoch[0] Train-housing-accuracy=0.713889
2020-06-23 23:48:16,427 [INFO]  Epoch[0] Time cost=0.690
2020-06-23 23:48:16,435 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:48:16,492 [INFO]  Epoch[0] Validation-cross-entropy=0.558736
2020-06-23 23:48:16,495 [INFO]  Epoch[0] Validation-housing-accuracy=0.750000
2020-06-23 23:48:16,850 [INFO]  Epoch[1] Batch [0-23]	Speed: 1101.45 samples/sec	cross-entropy=0.630810	housing-accuracy=0.752604
2020-06-23 23:48:17,140 [INFO]  Epoch[1] Train-cross-entropy=0.609902
2020-06-23 23:48:17,143 [INFO]  Epoch[1] Train-housing-accuracy=0.761111
2020-06-23 23:48:17,145 [INFO]  Epoch[1] Time cost=0.647
2020-06-23 23:48:17,153 [INFO]  Saved checkpoint to "imputer_model\mo

Fitting model for column: job


2020-06-23 23:48:25,437 [INFO]  
2020-06-23 23:48:25,867 [INFO]  Epoch[0] Batch [0-23]	Speed: 953.87 samples/sec	cross-entropy=1.073862	job-accuracy=0.580729
2020-06-23 23:48:26,176 [INFO]  Epoch[0] Train-cross-entropy=0.953120
2020-06-23 23:48:26,178 [INFO]  Epoch[0] Train-job-accuracy=0.629167
2020-06-23 23:48:26,181 [INFO]  Epoch[0] Time cost=0.738
2020-06-23 23:48:26,190 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:48:26,256 [INFO]  Epoch[0] Validation-cross-entropy=0.747497
2020-06-23 23:48:26,259 [INFO]  Epoch[0] Validation-job-accuracy=0.700000
2020-06-23 23:48:26,885 [INFO]  Epoch[1] Batch [0-23]	Speed: 606.88 samples/sec	cross-entropy=0.874068	job-accuracy=0.627604
2020-06-23 23:48:27,269 [INFO]  Epoch[1] Train-cross-entropy=0.820803
2020-06-23 23:48:27,271 [INFO]  Epoch[1] Train-job-accuracy=0.656944
2020-06-23 23:48:27,273 [INFO]  Epoch[1] Time cost=1.012
2020-06-23 23:48:27,284 [INFO]  Saved checkpoint to "imputer_model\model-0001.params"
2020

Fitting model for column: own_telephone


2020-06-23 23:48:33,946 [INFO]  
2020-06-23 23:48:34,334 [INFO]  Epoch[0] Batch [0-23]	Speed: 1027.81 samples/sec	cross-entropy=0.677253	own_telephone-accuracy=0.627604
2020-06-23 23:48:34,673 [INFO]  Epoch[0] Train-cross-entropy=0.630811
2020-06-23 23:48:34,676 [INFO]  Epoch[0] Train-own_telephone-accuracy=0.645833
2020-06-23 23:48:34,678 [INFO]  Epoch[0] Time cost=0.724
2020-06-23 23:48:34,686 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:48:34,745 [INFO]  Epoch[0] Validation-cross-entropy=0.550049
2020-06-23 23:48:34,748 [INFO]  Epoch[0] Validation-own_telephone-accuracy=0.700000
2020-06-23 23:48:35,194 [INFO]  Epoch[1] Batch [0-23]	Speed: 864.13 samples/sec	cross-entropy=0.548234	own_telephone-accuracy=0.713542
2020-06-23 23:48:35,592 [INFO]  Epoch[1] Train-cross-entropy=0.550855
2020-06-23 23:48:35,597 [INFO]  Epoch[1] Train-own_telephone-accuracy=0.698611
2020-06-23 23:48:35,601 [INFO]  Epoch[1] Time cost=0.849
2020-06-23 23:48:35,615 [INFO]  Saved c

Fitting model for column: foreign_worker


2020-06-23 23:48:43,438 [INFO]  
2020-06-23 23:48:43,817 [INFO]  Epoch[0] Batch [0-23]	Speed: 1051.24 samples/sec	cross-entropy=0.353857	foreign_worker-accuracy=0.934896
2020-06-23 23:48:44,113 [INFO]  Epoch[0] Train-cross-entropy=0.243133
2020-06-23 23:48:44,116 [INFO]  Epoch[0] Train-foreign_worker-accuracy=0.952778
2020-06-23 23:48:44,119 [INFO]  Epoch[0] Time cost=0.674
2020-06-23 23:48:44,186 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:48:44,246 [INFO]  Epoch[0] Validation-cross-entropy=0.110697
2020-06-23 23:48:44,248 [INFO]  Epoch[0] Validation-foreign_worker-accuracy=0.975000
2020-06-23 23:48:44,580 [INFO]  Epoch[1] Batch [0-23]	Speed: 1178.84 samples/sec	cross-entropy=0.172081	foreign_worker-accuracy=0.950521
2020-06-23 23:48:44,865 [INFO]  Epoch[1] Train-cross-entropy=0.148957
2020-06-23 23:48:44,868 [INFO]  Epoch[1] Train-foreign_worker-accuracy=0.961111
2020-06-23 23:48:44,871 [INFO]  Epoch[1] Time cost=0.621
2020-06-23 23:48:44,907 [INFO]  S

2020-06-23 23:48:54,020 [INFO]  No improvement detected for 5 epochs compared to 0.09458904415369034 last error obtained: 0.09840793516486883, stopping here
2020-06-23 23:48:54,023 [INFO]  
  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: duration


2020-06-23 23:48:55,942 [INFO]  
2020-06-23 23:48:56,276 [INFO]  Epoch[0] Batch [0-23]	Speed: 1205.83 samples/sec	cross-entropy=12.460812	duration-accuracy=0.000000
2020-06-23 23:48:56,552 [INFO]  Epoch[0] Train-cross-entropy=10.290644
2020-06-23 23:48:56,555 [INFO]  Epoch[0] Train-duration-accuracy=0.000000
2020-06-23 23:48:56,558 [INFO]  Epoch[0] Time cost=0.606
2020-06-23 23:48:56,565 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:48:56,625 [INFO]  Epoch[0] Validation-cross-entropy=8.455888
2020-06-23 23:48:56,627 [INFO]  Epoch[0] Validation-duration-accuracy=0.000000
2020-06-23 23:48:56,927 [INFO]  Epoch[1] Batch [0-23]	Speed: 1307.71 samples/sec	cross-entropy=8.202595	duration-accuracy=0.000000
2020-06-23 23:48:57,192 [INFO]  Epoch[1] Train-cross-entropy=7.889361
2020-06-23 23:48:57,195 [INFO]  Epoch[1] Train-duration-accuracy=0.000000
2020-06-23 23:48:57,198 [INFO]  Epoch[1] Time cost=0.568
2020-06-23 23:48:57,207 [INFO]  Saved checkpoint to "imputer_

Fitting model for column: credit_amount


2020-06-23 23:49:04,306 [INFO]  
2020-06-23 23:49:04,711 [INFO]  Epoch[0] Batch [0-23]	Speed: 1026.33 samples/sec	cross-entropy=11.334109	credit_amount-accuracy=0.000000
2020-06-23 23:49:04,998 [INFO]  Epoch[0] Train-cross-entropy=9.346551
2020-06-23 23:49:05,001 [INFO]  Epoch[0] Train-credit_amount-accuracy=0.000000
2020-06-23 23:49:05,004 [INFO]  Epoch[0] Time cost=0.686
2020-06-23 23:49:05,011 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:49:05,068 [INFO]  Epoch[0] Validation-cross-entropy=7.911843
2020-06-23 23:49:05,071 [INFO]  Epoch[0] Validation-credit_amount-accuracy=0.000000
2020-06-23 23:49:05,397 [INFO]  Epoch[1] Batch [0-23]	Speed: 1194.12 samples/sec	cross-entropy=7.507112	credit_amount-accuracy=0.000000
2020-06-23 23:49:05,688 [INFO]  Epoch[1] Train-cross-entropy=6.808864
2020-06-23 23:49:05,691 [INFO]  Epoch[1] Train-credit_amount-accuracy=0.000000
2020-06-23 23:49:05,694 [INFO]  Epoch[1] Time cost=0.620
2020-06-23 23:49:05,701 [INFO]  Saved

Fitting model for column: installment_commitment


2020-06-23 23:49:11,824 [INFO]  
2020-06-23 23:49:12,177 [INFO]  Epoch[0] Batch [0-23]	Speed: 1123.20 samples/sec	cross-entropy=13.983709	installment_commitment-accuracy=0.000000
2020-06-23 23:49:12,536 [INFO]  Epoch[0] Train-cross-entropy=14.161158
2020-06-23 23:49:12,539 [INFO]  Epoch[0] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:49:12,542 [INFO]  Epoch[0] Time cost=0.710
2020-06-23 23:49:12,552 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:49:12,649 [INFO]  Epoch[0] Validation-cross-entropy=12.532399
2020-06-23 23:49:12,652 [INFO]  Epoch[0] Validation-installment_commitment-accuracy=0.000000
2020-06-23 23:49:13,046 [INFO]  Epoch[1] Batch [0-23]	Speed: 1031.04 samples/sec	cross-entropy=12.136464	installment_commitment-accuracy=0.000000
2020-06-23 23:49:13,311 [INFO]  Epoch[1] Train-cross-entropy=12.638519
2020-06-23 23:49:13,314 [INFO]  Epoch[1] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:49:13,317 [INFO]  Epoch[1] Time

2020-06-23 23:49:21,487 [INFO]  Epoch[13] Train-cross-entropy=10.216516
2020-06-23 23:49:21,493 [INFO]  Epoch[13] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:49:21,496 [INFO]  Epoch[13] Time cost=0.623
2020-06-23 23:49:21,505 [INFO]  Saved checkpoint to "imputer_model\model-0013.params"
2020-06-23 23:49:21,571 [INFO]  Epoch[13] Validation-cross-entropy=10.845759
2020-06-23 23:49:21,575 [INFO]  Epoch[13] Validation-installment_commitment-accuracy=0.000000
2020-06-23 23:49:21,918 [INFO]  Epoch[14] Batch [0-23]	Speed: 1135.34 samples/sec	cross-entropy=9.764279	installment_commitment-accuracy=0.000000
2020-06-23 23:49:22,180 [INFO]  Epoch[14] Train-cross-entropy=10.158093
2020-06-23 23:49:22,183 [INFO]  Epoch[14] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:49:22,188 [INFO]  Epoch[14] Time cost=0.610
2020-06-23 23:49:22,210 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:49:22,294 [INFO]  Epoch[14] Validation-cross-entropy=10.847

Fitting model for column: residence_since


2020-06-23 23:49:31,651 [INFO]  
2020-06-23 23:49:31,967 [INFO]  Epoch[0] Batch [0-23]	Speed: 1265.74 samples/sec	cross-entropy=15.772583	residence_since-accuracy=0.000000
2020-06-23 23:49:32,228 [INFO]  Epoch[0] Train-cross-entropy=15.872130
2020-06-23 23:49:32,231 [INFO]  Epoch[0] Train-residence_since-accuracy=0.000000
2020-06-23 23:49:32,233 [INFO]  Epoch[0] Time cost=0.572
2020-06-23 23:49:32,241 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:49:32,297 [INFO]  Epoch[0] Validation-cross-entropy=13.442101
2020-06-23 23:49:32,299 [INFO]  Epoch[0] Validation-residence_since-accuracy=0.000000
2020-06-23 23:49:32,590 [INFO]  Epoch[1] Batch [0-23]	Speed: 1366.01 samples/sec	cross-entropy=14.111906	residence_since-accuracy=0.000000
2020-06-23 23:49:32,858 [INFO]  Epoch[1] Train-cross-entropy=14.454210
2020-06-23 23:49:32,860 [INFO]  Epoch[1] Train-residence_since-accuracy=0.000000
2020-06-23 23:49:32,863 [INFO]  Epoch[1] Time cost=0.560
2020-06-23 23:49:32,872

Fitting model for column: age


2020-06-23 23:49:37,444 [INFO]  
2020-06-23 23:49:37,760 [INFO]  Epoch[0] Batch [0-23]	Speed: 1259.33 samples/sec	cross-entropy=14.743043	age-accuracy=0.000000
2020-06-23 23:49:38,045 [INFO]  Epoch[0] Train-cross-entropy=14.527403
2020-06-23 23:49:38,048 [INFO]  Epoch[0] Train-age-accuracy=0.000000
2020-06-23 23:49:38,051 [INFO]  Epoch[0] Time cost=0.599
2020-06-23 23:49:38,064 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:49:38,142 [INFO]  Epoch[0] Validation-cross-entropy=18.740842
2020-06-23 23:49:38,145 [INFO]  Epoch[0] Validation-age-accuracy=0.000000
2020-06-23 23:49:38,451 [INFO]  Epoch[1] Batch [0-23]	Speed: 1267.98 samples/sec	cross-entropy=13.188428	age-accuracy=0.000000
2020-06-23 23:49:38,709 [INFO]  Epoch[1] Train-cross-entropy=13.191465
2020-06-23 23:49:38,711 [INFO]  Epoch[1] Train-age-accuracy=0.000000
2020-06-23 23:49:38,716 [INFO]  Epoch[1] Time cost=0.568
2020-06-23 23:49:38,724 [INFO]  Saved checkpoint to "imputer_model\model-0001.param

2020-06-23 23:49:48,555 [INFO]  Epoch[14] Time cost=0.610
2020-06-23 23:49:48,565 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:49:48,628 [INFO]  Epoch[14] Validation-cross-entropy=15.500333
2020-06-23 23:49:48,631 [INFO]  Epoch[14] Validation-age-accuracy=0.000000
2020-06-23 23:49:48,910 [INFO]  Epoch[15] Batch [0-23]	Speed: 1405.61 samples/sec	cross-entropy=10.293360	age-accuracy=0.000000
2020-06-23 23:49:49,216 [INFO]  Epoch[15] Train-cross-entropy=10.283104
2020-06-23 23:49:49,219 [INFO]  Epoch[15] Train-age-accuracy=0.000000
2020-06-23 23:49:49,222 [INFO]  Epoch[15] Time cost=0.588
2020-06-23 23:49:49,231 [INFO]  Saved checkpoint to "imputer_model\model-0015.params"
2020-06-23 23:49:49,291 [INFO]  Epoch[15] Validation-cross-entropy=15.553640
2020-06-23 23:49:49,295 [INFO]  Epoch[15] Validation-age-accuracy=0.000000
2020-06-23 23:49:49,609 [INFO]  Epoch[16] Batch [0-23]	Speed: 1238.20 samples/sec	cross-entropy=10.222141	age-accuracy=0.000000
2020-06-23

Fitting model for column: existing_credits


2020-06-23 23:49:53,255 [INFO]  
2020-06-23 23:49:53,616 [INFO]  Epoch[0] Batch [0-23]	Speed: 1121.46 samples/sec	cross-entropy=15.613809	existing_credits-accuracy=0.000000
2020-06-23 23:49:53,906 [INFO]  Epoch[0] Train-cross-entropy=15.558377
2020-06-23 23:49:53,908 [INFO]  Epoch[0] Train-existing_credits-accuracy=0.000000
2020-06-23 23:49:53,911 [INFO]  Epoch[0] Time cost=0.648
2020-06-23 23:49:53,954 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:49:54,013 [INFO]  Epoch[0] Validation-cross-entropy=12.436997
2020-06-23 23:49:54,017 [INFO]  Epoch[0] Validation-existing_credits-accuracy=0.000000
2020-06-23 23:49:54,332 [INFO]  Epoch[1] Batch [0-23]	Speed: 1236.09 samples/sec	cross-entropy=13.220725	existing_credits-accuracy=0.000000
2020-06-23 23:49:54,630 [INFO]  Epoch[1] Train-cross-entropy=13.562427
2020-06-23 23:49:54,634 [INFO]  Epoch[1] Train-existing_credits-accuracy=0.000000
2020-06-23 23:49:54,637 [INFO]  Epoch[1] Time cost=0.618
2020-06-23 23:49:5

Fitting model for column: num_dependents


2020-06-23 23:50:03,277 [INFO]  
2020-06-23 23:50:03,642 [INFO]  Epoch[0] Batch [0-23]	Speed: 1070.98 samples/sec	cross-entropy=14.737461	num_dependents-accuracy=0.000000
2020-06-23 23:50:03,903 [INFO]  Epoch[0] Train-cross-entropy=15.592755
2020-06-23 23:50:03,905 [INFO]  Epoch[0] Train-num_dependents-accuracy=0.000000
2020-06-23 23:50:03,909 [INFO]  Epoch[0] Time cost=0.625
2020-06-23 23:50:03,917 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:50:03,979 [INFO]  Epoch[0] Validation-cross-entropy=14.082316
2020-06-23 23:50:03,982 [INFO]  Epoch[0] Validation-num_dependents-accuracy=0.000000
2020-06-23 23:50:04,298 [INFO]  Epoch[1] Batch [0-23]	Speed: 1234.06 samples/sec	cross-entropy=13.418814	num_dependents-accuracy=0.000000
2020-06-23 23:50:04,578 [INFO]  Epoch[1] Train-cross-entropy=14.410141
2020-06-23 23:50:04,582 [INFO]  Epoch[1] Train-num_dependents-accuracy=0.000000
2020-06-23 23:50:04,585 [INFO]  Epoch[1] Time cost=0.598
2020-06-23 23:50:04,599 [INF

PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A95BD68>: 0.7898550724637682
PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A95BEF0>: 0.7850631136044881
PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A6C5EB8>: 0.7911407199625993


2020-06-23 23:50:09,787 [INFO]  CategoricalEncoder for column checking_status                                found only 44 occurrences of value >=200


Fitting model for column: checking_status


2020-06-23 23:50:10,746 [INFO]  
2020-06-23 23:50:11,135 [INFO]  Epoch[0] Batch [0-23]	Speed: 1045.28 samples/sec	cross-entropy=1.272596	checking_status-accuracy=0.369792
2020-06-23 23:50:11,465 [INFO]  Epoch[0] Train-cross-entropy=1.231952
2020-06-23 23:50:11,467 [INFO]  Epoch[0] Train-checking_status-accuracy=0.373611
2020-06-23 23:50:11,470 [INFO]  Epoch[0] Time cost=0.717
2020-06-23 23:50:11,481 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:50:11,543 [INFO]  Epoch[0] Validation-cross-entropy=1.035290
2020-06-23 23:50:11,545 [INFO]  Epoch[0] Validation-checking_status-accuracy=0.550000
2020-06-23 23:50:11,951 [INFO]  Epoch[1] Batch [0-23]	Speed: 950.94 samples/sec	cross-entropy=1.128232	checking_status-accuracy=0.424479
2020-06-23 23:50:12,539 [INFO]  Epoch[1] Train-cross-entropy=1.129626
2020-06-23 23:50:12,542 [INFO]  Epoch[1] Train-checking_status-accuracy=0.437500
2020-06-23 23:50:12,547 [INFO]  Epoch[1] Time cost=0.998
2020-06-23 23:50:12,561 [INFO

Fitting model for column: credit_history


2020-06-23 23:50:23,295 [INFO]  
2020-06-23 23:50:23,721 [INFO]  Epoch[0] Batch [0-23]	Speed: 962.12 samples/sec	cross-entropy=1.097143	credit_history-accuracy=0.562500
2020-06-23 23:50:24,048 [INFO]  Epoch[0] Train-cross-entropy=1.055306
2020-06-23 23:50:24,051 [INFO]  Epoch[0] Train-credit_history-accuracy=0.616667
2020-06-23 23:50:24,056 [INFO]  Epoch[0] Time cost=0.753
2020-06-23 23:50:24,068 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:50:24,136 [INFO]  Epoch[0] Validation-cross-entropy=0.863893
2020-06-23 23:50:24,139 [INFO]  Epoch[0] Validation-credit_history-accuracy=0.700000
2020-06-23 23:50:24,527 [INFO]  Epoch[1] Batch [0-23]	Speed: 999.93 samples/sec	cross-entropy=0.867855	credit_history-accuracy=0.713542
2020-06-23 23:50:24,875 [INFO]  Epoch[1] Train-cross-entropy=0.893284
2020-06-23 23:50:24,878 [INFO]  Epoch[1] Train-credit_history-accuracy=0.704167
2020-06-23 23:50:24,881 [INFO]  Epoch[1] Time cost=0.739
2020-06-23 23:50:24,891 [INFO]  Sav

Fitting model for column: purpose


2020-06-23 23:50:31,835 [INFO]  
2020-06-23 23:50:32,543 [INFO]  Epoch[0] Batch [0-23]	Speed: 552.37 samples/sec	cross-entropy=1.859061	purpose-accuracy=0.234375
2020-06-23 23:50:33,025 [INFO]  Epoch[0] Train-cross-entropy=1.785752
2020-06-23 23:50:33,028 [INFO]  Epoch[0] Train-purpose-accuracy=0.270833
2020-06-23 23:50:33,032 [INFO]  Epoch[0] Time cost=1.190
2020-06-23 23:50:33,045 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:50:33,107 [INFO]  Epoch[0] Validation-cross-entropy=1.741100
2020-06-23 23:50:33,111 [INFO]  Epoch[0] Validation-purpose-accuracy=0.312500
2020-06-23 23:50:33,557 [INFO]  Epoch[1] Batch [0-23]	Speed: 872.76 samples/sec	cross-entropy=1.612840	purpose-accuracy=0.302083
2020-06-23 23:50:33,951 [INFO]  Epoch[1] Train-cross-entropy=1.610436
2020-06-23 23:50:33,953 [INFO]  Epoch[1] Train-purpose-accuracy=0.340278
2020-06-23 23:50:33,958 [INFO]  Epoch[1] Time cost=0.843
2020-06-23 23:50:33,975 [INFO]  Saved checkpoint to "imputer_model\mod

Fitting model for column: savings_status


2020-06-23 23:50:43,889 [INFO]  
2020-06-23 23:50:44,400 [INFO]  Epoch[0] Batch [0-23]	Speed: 800.46 samples/sec	cross-entropy=1.114269	savings_status-accuracy=0.630208
2020-06-23 23:50:44,746 [INFO]  Epoch[0] Train-cross-entropy=1.165122
2020-06-23 23:50:44,749 [INFO]  Epoch[0] Train-savings_status-accuracy=0.598611
2020-06-23 23:50:44,752 [INFO]  Epoch[0] Time cost=0.853
2020-06-23 23:50:44,764 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:50:44,832 [INFO]  Epoch[0] Validation-cross-entropy=1.256334
2020-06-23 23:50:44,834 [INFO]  Epoch[0] Validation-savings_status-accuracy=0.487500
2020-06-23 23:50:45,206 [INFO]  Epoch[1] Batch [0-23]	Speed: 1042.25 samples/sec	cross-entropy=0.977119	savings_status-accuracy=0.653646
2020-06-23 23:50:45,532 [INFO]  Epoch[1] Train-cross-entropy=1.066509
2020-06-23 23:50:45,536 [INFO]  Epoch[1] Train-savings_status-accuracy=0.606944
2020-06-23 23:50:45,539 [INFO]  Epoch[1] Time cost=0.702
2020-06-23 23:50:45,549 [INFO]  Sa

Fitting model for column: employment


2020-06-23 23:50:51,657 [INFO]  
2020-06-23 23:50:52,124 [INFO]  Epoch[0] Batch [0-23]	Speed: 859.03 samples/sec	cross-entropy=1.451804	employment-accuracy=0.343750
2020-06-23 23:50:52,463 [INFO]  Epoch[0] Train-cross-entropy=1.389062
2020-06-23 23:50:52,467 [INFO]  Epoch[0] Train-employment-accuracy=0.393056
2020-06-23 23:50:52,470 [INFO]  Epoch[0] Time cost=0.803
2020-06-23 23:50:52,480 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:50:52,548 [INFO]  Epoch[0] Validation-cross-entropy=1.206459
2020-06-23 23:50:52,551 [INFO]  Epoch[0] Validation-employment-accuracy=0.512500
2020-06-23 23:50:52,909 [INFO]  Epoch[1] Batch [0-23]	Speed: 1098.17 samples/sec	cross-entropy=1.266448	employment-accuracy=0.434896
2020-06-23 23:50:53,247 [INFO]  Epoch[1] Train-cross-entropy=1.267117
2020-06-23 23:50:53,251 [INFO]  Epoch[1] Train-employment-accuracy=0.448611
2020-06-23 23:50:53,253 [INFO]  Epoch[1] Time cost=0.699
2020-06-23 23:50:53,262 [INFO]  Saved checkpoint to "i

Fitting model for column: personal_status


2020-06-23 23:51:00,983 [INFO]  
2020-06-23 23:51:01,750 [INFO]  Epoch[0] Batch [0-23]	Speed: 538.00 samples/sec	cross-entropy=1.085453	personal_status-accuracy=0.578125
2020-06-23 23:51:02,175 [INFO]  Epoch[0] Train-cross-entropy=1.003259
2020-06-23 23:51:02,178 [INFO]  Epoch[0] Train-personal_status-accuracy=0.600000
2020-06-23 23:51:02,181 [INFO]  Epoch[0] Time cost=1.188
2020-06-23 23:51:02,191 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:51:02,257 [INFO]  Epoch[0] Validation-cross-entropy=0.909149
2020-06-23 23:51:02,260 [INFO]  Epoch[0] Validation-personal_status-accuracy=0.587500
2020-06-23 23:51:02,624 [INFO]  Epoch[1] Batch [0-23]	Speed: 1070.56 samples/sec	cross-entropy=0.897463	personal_status-accuracy=0.617188
2020-06-23 23:51:02,947 [INFO]  Epoch[1] Train-cross-entropy=0.885884
2020-06-23 23:51:02,950 [INFO]  Epoch[1] Train-personal_status-accuracy=0.629167
2020-06-23 23:51:02,954 [INFO]  Epoch[1] Time cost=0.690
2020-06-23 23:51:02,962 [INFO

Fitting model for column: other_parties


2020-06-23 23:51:10,268 [INFO]  
2020-06-23 23:51:10,641 [INFO]  Epoch[0] Batch [0-23]	Speed: 1083.30 samples/sec	cross-entropy=0.554831	other_parties-accuracy=0.869792
2020-06-23 23:51:11,000 [INFO]  Epoch[0] Train-cross-entropy=0.474616
2020-06-23 23:51:11,003 [INFO]  Epoch[0] Train-other_parties-accuracy=0.886111
2020-06-23 23:51:11,008 [INFO]  Epoch[0] Time cost=0.731
2020-06-23 23:51:11,017 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:51:11,083 [INFO]  Epoch[0] Validation-cross-entropy=0.347015
2020-06-23 23:51:11,086 [INFO]  Epoch[0] Validation-other_parties-accuracy=0.912500
2020-06-23 23:51:11,432 [INFO]  Epoch[1] Batch [0-23]	Speed: 1131.85 samples/sec	cross-entropy=0.356582	other_parties-accuracy=0.898438
2020-06-23 23:51:11,710 [INFO]  Epoch[1] Train-cross-entropy=0.354121
2020-06-23 23:51:11,714 [INFO]  Epoch[1] Train-other_parties-accuracy=0.901389
2020-06-23 23:51:11,719 [INFO]  Epoch[1] Time cost=0.628
2020-06-23 23:51:11,739 [INFO]  Saved 

Fitting model for column: property_magnitude


2020-06-23 23:51:18,301 [INFO]  
2020-06-23 23:51:18,691 [INFO]  Epoch[0] Batch [0-23]	Speed: 1033.52 samples/sec	cross-entropy=1.285270	property_magnitude-accuracy=0.343750
2020-06-23 23:51:19,039 [INFO]  Epoch[0] Train-cross-entropy=1.262011
2020-06-23 23:51:19,042 [INFO]  Epoch[0] Train-property_magnitude-accuracy=0.370833
2020-06-23 23:51:19,046 [INFO]  Epoch[0] Time cost=0.736
2020-06-23 23:51:19,055 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:51:19,123 [INFO]  Epoch[0] Validation-cross-entropy=1.106049
2020-06-23 23:51:19,256 [INFO]  Epoch[0] Validation-property_magnitude-accuracy=0.450000
2020-06-23 23:51:19,653 [INFO]  Epoch[1] Batch [0-23]	Speed: 1040.82 samples/sec	cross-entropy=1.136674	property_magnitude-accuracy=0.429688
2020-06-23 23:51:19,974 [INFO]  Epoch[1] Train-cross-entropy=1.153811
2020-06-23 23:51:19,978 [INFO]  Epoch[1] Train-property_magnitude-accuracy=0.443056
2020-06-23 23:51:19,981 [INFO]  Epoch[1] Time cost=0.721
2020-06-23 23

2020-06-23 23:51:30,252 [INFO]  Epoch[13] Time cost=0.771
2020-06-23 23:51:30,263 [INFO]  Saved checkpoint to "imputer_model\model-0013.params"
2020-06-23 23:51:30,355 [INFO]  Epoch[13] Validation-cross-entropy=1.027115
2020-06-23 23:51:30,360 [INFO]  Epoch[13] Validation-property_magnitude-accuracy=0.525000
2020-06-23 23:51:30,806 [INFO]  Epoch[14] Batch [0-23]	Speed: 886.94 samples/sec	cross-entropy=0.920656	property_magnitude-accuracy=0.583333
2020-06-23 23:51:31,242 [INFO]  Epoch[14] Train-cross-entropy=0.929050
2020-06-23 23:51:31,246 [INFO]  Epoch[14] Train-property_magnitude-accuracy=0.591667
2020-06-23 23:51:31,250 [INFO]  Epoch[14] Time cost=0.886
2020-06-23 23:51:31,262 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:51:31,353 [INFO]  Epoch[14] Validation-cross-entropy=1.027367
2020-06-23 23:51:31,358 [INFO]  Epoch[14] Validation-property_magnitude-accuracy=0.512500
2020-06-23 23:51:31,800 [INFO]  Epoch[15] Batch [0-23]	Speed: 894.44 samples/sec	cr

Fitting model for column: other_payment_plans


2020-06-23 23:51:36,691 [INFO]  
2020-06-23 23:51:37,101 [INFO]  Epoch[0] Batch [0-23]	Speed: 1001.83 samples/sec	cross-entropy=0.725826	other_payment_plans-accuracy=0.796875
2020-06-23 23:51:37,393 [INFO]  Epoch[0] Train-cross-entropy=0.667361
2020-06-23 23:51:37,396 [INFO]  Epoch[0] Train-other_payment_plans-accuracy=0.800000
2020-06-23 23:51:37,399 [INFO]  Epoch[0] Time cost=0.700
2020-06-23 23:51:37,409 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:51:37,506 [INFO]  Epoch[0] Validation-cross-entropy=0.504431
2020-06-23 23:51:37,510 [INFO]  Epoch[0] Validation-other_payment_plans-accuracy=0.850000
2020-06-23 23:51:37,969 [INFO]  Epoch[1] Batch [0-23]	Speed: 852.39 samples/sec	cross-entropy=0.539095	other_payment_plans-accuracy=0.817708
2020-06-23 23:51:38,303 [INFO]  Epoch[1] Train-cross-entropy=0.548536
2020-06-23 23:51:38,307 [INFO]  Epoch[1] Train-other_payment_plans-accuracy=0.811111
2020-06-23 23:51:38,312 [INFO]  Epoch[1] Time cost=0.796
2020-06-2

Fitting model for column: housing


2020-06-23 23:51:45,386 [INFO]  
2020-06-23 23:51:45,736 [INFO]  Epoch[0] Batch [0-23]	Speed: 1169.32 samples/sec	cross-entropy=0.803784	housing-accuracy=0.684896
2020-06-23 23:51:46,052 [INFO]  Epoch[0] Train-cross-entropy=0.727185
2020-06-23 23:51:46,056 [INFO]  Epoch[0] Train-housing-accuracy=0.709722
2020-06-23 23:51:46,060 [INFO]  Epoch[0] Time cost=0.665
2020-06-23 23:51:46,070 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:51:46,132 [INFO]  Epoch[0] Validation-cross-entropy=0.563342
2020-06-23 23:51:46,137 [INFO]  Epoch[0] Validation-housing-accuracy=0.750000
2020-06-23 23:51:46,519 [INFO]  Epoch[1] Batch [0-23]	Speed: 1030.69 samples/sec	cross-entropy=0.626798	housing-accuracy=0.752604
2020-06-23 23:51:46,822 [INFO]  Epoch[1] Train-cross-entropy=0.604234
2020-06-23 23:51:46,826 [INFO]  Epoch[1] Train-housing-accuracy=0.758333
2020-06-23 23:51:46,830 [INFO]  Epoch[1] Time cost=0.689
2020-06-23 23:51:46,839 [INFO]  Saved checkpoint to "imputer_model\m

2020-06-23 23:51:56,464 [INFO]  Epoch[14] Train-cross-entropy=0.419790
2020-06-23 23:51:56,470 [INFO]  Epoch[14] Train-housing-accuracy=0.840278
2020-06-23 23:51:56,475 [INFO]  Epoch[14] Time cost=0.738
2020-06-23 23:51:56,487 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:51:56,564 [INFO]  Epoch[14] Validation-cross-entropy=0.537889
2020-06-23 23:51:56,568 [INFO]  Epoch[14] Validation-housing-accuracy=0.737500
2020-06-23 23:51:56,927 [INFO]  Epoch[15] Batch [0-23]	Speed: 1104.74 samples/sec	cross-entropy=0.420076	housing-accuracy=0.838542
2020-06-23 23:51:57,238 [INFO]  Epoch[15] Train-cross-entropy=0.414244
2020-06-23 23:51:57,241 [INFO]  Epoch[15] Train-housing-accuracy=0.840278
2020-06-23 23:51:57,245 [INFO]  Epoch[15] Time cost=0.673
2020-06-23 23:51:57,257 [INFO]  Saved checkpoint to "imputer_model\model-0015.params"
2020-06-23 23:51:57,327 [INFO]  Epoch[15] Validation-cross-entropy=0.538654
2020-06-23 23:51:57,331 [INFO]  Epoch[15] Validation-housing

Fitting model for column: job


2020-06-23 23:52:00,329 [INFO]  
2020-06-23 23:52:01,190 [INFO]  Epoch[0] Batch [0-23]	Speed: 538.65 samples/sec	cross-entropy=1.065688	job-accuracy=0.585938
2020-06-23 23:52:01,969 [INFO]  Epoch[0] Train-cross-entropy=0.943950
2020-06-23 23:52:01,973 [INFO]  Epoch[0] Train-job-accuracy=0.629167
2020-06-23 23:52:01,977 [INFO]  Epoch[0] Time cost=1.632
2020-06-23 23:52:02,000 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:52:02,100 [INFO]  Epoch[0] Validation-cross-entropy=0.761282
2020-06-23 23:52:02,104 [INFO]  Epoch[0] Validation-job-accuracy=0.712500
2020-06-23 23:52:02,715 [INFO]  Epoch[1] Batch [0-23]	Speed: 647.72 samples/sec	cross-entropy=0.865200	job-accuracy=0.619792
2020-06-23 23:52:03,032 [INFO]  Epoch[1] Train-cross-entropy=0.811324
2020-06-23 23:52:03,037 [INFO]  Epoch[1] Train-job-accuracy=0.651389
2020-06-23 23:52:03,041 [INFO]  Epoch[1] Time cost=0.933
2020-06-23 23:52:03,052 [INFO]  Saved checkpoint to "imputer_model\model-0001.params"
2020

Fitting model for column: own_telephone


2020-06-23 23:52:09,971 [INFO]  
2020-06-23 23:52:10,339 [INFO]  Epoch[0] Batch [0-23]	Speed: 1085.25 samples/sec	cross-entropy=0.677173	own_telephone-accuracy=0.627604
2020-06-23 23:52:10,671 [INFO]  Epoch[0] Train-cross-entropy=0.629852
2020-06-23 23:52:10,677 [INFO]  Epoch[0] Train-own_telephone-accuracy=0.644444
2020-06-23 23:52:10,681 [INFO]  Epoch[0] Time cost=0.699
2020-06-23 23:52:10,691 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:52:10,750 [INFO]  Epoch[0] Validation-cross-entropy=0.563255
2020-06-23 23:52:10,755 [INFO]  Epoch[0] Validation-own_telephone-accuracy=0.737500
2020-06-23 23:52:11,106 [INFO]  Epoch[1] Batch [0-23]	Speed: 1104.74 samples/sec	cross-entropy=0.549081	own_telephone-accuracy=0.721354
2020-06-23 23:52:11,392 [INFO]  Epoch[1] Train-cross-entropy=0.550538
2020-06-23 23:52:11,396 [INFO]  Epoch[1] Train-own_telephone-accuracy=0.705556
2020-06-23 23:52:11,400 [INFO]  Epoch[1] Time cost=0.643
2020-06-23 23:52:11,410 [INFO]  Saved 

Fitting model for column: foreign_worker


2020-06-23 23:52:18,979 [INFO]  
2020-06-23 23:52:19,377 [INFO]  Epoch[0] Batch [0-23]	Speed: 999.89 samples/sec	cross-entropy=0.355868	foreign_worker-accuracy=0.914062
2020-06-23 23:52:19,695 [INFO]  Epoch[0] Train-cross-entropy=0.242124
2020-06-23 23:52:19,698 [INFO]  Epoch[0] Train-foreign_worker-accuracy=0.941667
2020-06-23 23:52:19,701 [INFO]  Epoch[0] Time cost=0.711
2020-06-23 23:52:19,710 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:52:19,766 [INFO]  Epoch[0] Validation-cross-entropy=0.109380
2020-06-23 23:52:19,771 [INFO]  Epoch[0] Validation-foreign_worker-accuracy=0.975000
2020-06-23 23:52:20,097 [INFO]  Epoch[1] Batch [0-23]	Speed: 1204.68 samples/sec	cross-entropy=0.171919	foreign_worker-accuracy=0.950521
2020-06-23 23:52:20,412 [INFO]  Epoch[1] Train-cross-entropy=0.148340
2020-06-23 23:52:20,416 [INFO]  Epoch[1] Train-foreign_worker-accuracy=0.961111
2020-06-23 23:52:20,419 [INFO]  Epoch[1] Time cost=0.645
2020-06-23 23:52:20,443 [INFO]  Sa

2020-06-23 23:52:30,936 [INFO]  No improvement detected for 5 epochs compared to 0.08920075036585332 last error obtained: 0.09405812760815024, stopping here
2020-06-23 23:52:30,940 [INFO]  
  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: duration


2020-06-23 23:52:32,971 [INFO]  
2020-06-23 23:52:33,286 [INFO]  Epoch[0] Batch [0-23]	Speed: 1272.36 samples/sec	cross-entropy=12.304196	duration-accuracy=0.000000
2020-06-23 23:52:33,657 [INFO]  Epoch[0] Train-cross-entropy=10.155735
2020-06-23 23:52:33,663 [INFO]  Epoch[0] Train-duration-accuracy=0.000000
2020-06-23 23:52:33,669 [INFO]  Epoch[0] Time cost=0.687
2020-06-23 23:52:33,681 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:52:33,776 [INFO]  Epoch[0] Validation-cross-entropy=8.343112
2020-06-23 23:52:33,781 [INFO]  Epoch[0] Validation-duration-accuracy=0.000000
2020-06-23 23:52:34,247 [INFO]  Epoch[1] Batch [0-23]	Speed: 849.19 samples/sec	cross-entropy=8.080444	duration-accuracy=0.000000
2020-06-23 23:52:34,521 [INFO]  Epoch[1] Train-cross-entropy=7.710044
2020-06-23 23:52:34,524 [INFO]  Epoch[1] Train-duration-accuracy=0.000000
2020-06-23 23:52:34,528 [INFO]  Epoch[1] Time cost=0.740
2020-06-23 23:52:34,537 [INFO]  Saved checkpoint to "imputer_m

Fitting model for column: credit_amount


2020-06-23 23:52:40,687 [INFO]  
2020-06-23 23:52:41,026 [INFO]  Epoch[0] Batch [0-23]	Speed: 1176.87 samples/sec	cross-entropy=11.386904	credit_amount-accuracy=0.000000
2020-06-23 23:52:41,293 [INFO]  Epoch[0] Train-cross-entropy=9.348560
2020-06-23 23:52:41,297 [INFO]  Epoch[0] Train-credit_amount-accuracy=0.000000
2020-06-23 23:52:41,300 [INFO]  Epoch[0] Time cost=0.605
2020-06-23 23:52:41,449 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:52:41,531 [INFO]  Epoch[0] Validation-cross-entropy=7.902932
2020-06-23 23:52:41,536 [INFO]  Epoch[0] Validation-credit_amount-accuracy=0.000000
2020-06-23 23:52:41,819 [INFO]  Epoch[1] Batch [0-23]	Speed: 1381.96 samples/sec	cross-entropy=7.437895	credit_amount-accuracy=0.000000
2020-06-23 23:52:42,087 [INFO]  Epoch[1] Train-cross-entropy=6.775810
2020-06-23 23:52:42,090 [INFO]  Epoch[1] Train-credit_amount-accuracy=0.000000
2020-06-23 23:52:42,094 [INFO]  Epoch[1] Time cost=0.554
2020-06-23 23:52:42,232 [INFO]  Saved

Fitting model for column: installment_commitment


2020-06-23 23:52:48,309 [INFO]  
2020-06-23 23:52:48,647 [INFO]  Epoch[0] Batch [0-23]	Speed: 1198.00 samples/sec	cross-entropy=14.126879	installment_commitment-accuracy=0.000000
2020-06-23 23:52:48,912 [INFO]  Epoch[0] Train-cross-entropy=14.218525
2020-06-23 23:52:48,917 [INFO]  Epoch[0] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:52:49,226 [INFO]  Epoch[0] Time cost=0.906
2020-06-23 23:52:49,238 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:52:49,309 [INFO]  Epoch[0] Validation-cross-entropy=12.467763
2020-06-23 23:52:49,314 [INFO]  Epoch[0] Validation-installment_commitment-accuracy=0.000000
2020-06-23 23:52:49,628 [INFO]  Epoch[1] Batch [0-23]	Speed: 1267.99 samples/sec	cross-entropy=11.996369	installment_commitment-accuracy=0.000000
2020-06-23 23:52:49,908 [INFO]  Epoch[1] Train-cross-entropy=12.466674
2020-06-23 23:52:49,912 [INFO]  Epoch[1] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:52:49,916 [INFO]  Epoch[1] Time

2020-06-23 23:52:58,301 [INFO]  Epoch[13] Train-cross-entropy=10.153947
2020-06-23 23:52:58,304 [INFO]  Epoch[13] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:52:58,309 [INFO]  Epoch[13] Time cost=0.572
2020-06-23 23:52:58,318 [INFO]  Saved checkpoint to "imputer_model\model-0013.params"
2020-06-23 23:52:58,380 [INFO]  No improvement detected for 5 epochs compared to 10.84937572479248 last error obtained: 10.889868545532227, stopping here
2020-06-23 23:52:58,384 [INFO]  


Fitting model for column: residence_since


2020-06-23 23:52:59,891 [INFO]  
2020-06-23 23:53:00,659 [INFO]  Epoch[0] Batch [0-23]	Speed: 496.93 samples/sec	cross-entropy=15.337657	residence_since-accuracy=0.000000
2020-06-23 23:53:01,185 [INFO]  Epoch[0] Train-cross-entropy=15.664172
2020-06-23 23:53:01,189 [INFO]  Epoch[0] Train-residence_since-accuracy=0.000000
2020-06-23 23:53:01,198 [INFO]  Epoch[0] Time cost=1.298
2020-06-23 23:53:01,234 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:53:01,356 [INFO]  Epoch[0] Validation-cross-entropy=13.463180
2020-06-23 23:53:01,360 [INFO]  Epoch[0] Validation-residence_since-accuracy=0.000000
2020-06-23 23:53:01,785 [INFO]  Epoch[1] Batch [0-23]	Speed: 909.30 samples/sec	cross-entropy=14.000938	residence_since-accuracy=0.000000
2020-06-23 23:53:02,052 [INFO]  Epoch[1] Train-cross-entropy=14.373775
2020-06-23 23:53:02,054 [INFO]  Epoch[1] Train-residence_since-accuracy=0.000000
2020-06-23 23:53:02,058 [INFO]  Epoch[1] Time cost=0.693
2020-06-23 23:53:02,068 [

Fitting model for column: age


2020-06-23 23:53:06,886 [INFO]  
2020-06-23 23:53:07,432 [INFO]  Epoch[0] Batch [0-23]	Speed: 710.91 samples/sec	cross-entropy=14.887749	age-accuracy=0.000000
2020-06-23 23:53:07,707 [INFO]  Epoch[0] Train-cross-entropy=14.556079
2020-06-23 23:53:07,710 [INFO]  Epoch[0] Train-age-accuracy=0.000000
2020-06-23 23:53:07,715 [INFO]  Epoch[0] Time cost=0.818
2020-06-23 23:53:07,722 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:53:07,780 [INFO]  Epoch[0] Validation-cross-entropy=18.617782
2020-06-23 23:53:07,784 [INFO]  Epoch[0] Validation-age-accuracy=0.000000
2020-06-23 23:53:08,097 [INFO]  Epoch[1] Batch [0-23]	Speed: 1272.36 samples/sec	cross-entropy=13.249361	age-accuracy=0.000000
2020-06-23 23:53:08,427 [INFO]  Epoch[1] Train-cross-entropy=13.257826
2020-06-23 23:53:08,431 [INFO]  Epoch[1] Train-age-accuracy=0.000000
2020-06-23 23:53:08,435 [INFO]  Epoch[1] Time cost=0.645
2020-06-23 23:53:08,445 [INFO]  Saved checkpoint to "imputer_model\model-0001.params

2020-06-23 23:53:17,355 [INFO]  Epoch[14] Time cost=0.593
2020-06-23 23:53:17,364 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:53:17,443 [INFO]  Epoch[14] Validation-cross-entropy=15.289049
2020-06-23 23:53:17,447 [INFO]  Epoch[14] Validation-age-accuracy=0.000000
2020-06-23 23:53:17,777 [INFO]  Epoch[15] Batch [0-23]	Speed: 1209.79 samples/sec	cross-entropy=10.219643	age-accuracy=0.000000
2020-06-23 23:53:18,048 [INFO]  Epoch[15] Train-cross-entropy=10.232442
2020-06-23 23:53:18,053 [INFO]  Epoch[15] Train-age-accuracy=0.000000
2020-06-23 23:53:18,057 [INFO]  Epoch[15] Time cost=0.606
2020-06-23 23:53:18,065 [INFO]  Saved checkpoint to "imputer_model\model-0015.params"
2020-06-23 23:53:18,135 [INFO]  Epoch[15] Validation-cross-entropy=15.280020
2020-06-23 23:53:18,140 [INFO]  Epoch[15] Validation-age-accuracy=0.000000
2020-06-23 23:53:18,456 [INFO]  Epoch[16] Batch [0-23]	Speed: 1262.56 samples/sec	cross-entropy=10.147742	age-accuracy=0.000000
2020-06-23

Fitting model for column: existing_credits


2020-06-23 23:53:27,336 [INFO]  
2020-06-23 23:53:27,701 [INFO]  Epoch[0] Batch [0-23]	Speed: 1186.16 samples/sec	cross-entropy=15.370970	existing_credits-accuracy=0.000000
2020-06-23 23:53:27,989 [INFO]  Epoch[0] Train-cross-entropy=15.468412
2020-06-23 23:53:27,993 [INFO]  Epoch[0] Train-existing_credits-accuracy=0.000000
2020-06-23 23:53:27,999 [INFO]  Epoch[0] Time cost=0.649
2020-06-23 23:53:28,019 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:53:28,085 [INFO]  Epoch[0] Validation-cross-entropy=12.327077
2020-06-23 23:53:28,089 [INFO]  Epoch[0] Validation-existing_credits-accuracy=0.000000
2020-06-23 23:53:28,539 [INFO]  Epoch[1] Batch [0-23]	Speed: 882.69 samples/sec	cross-entropy=13.228872	existing_credits-accuracy=0.000000
2020-06-23 23:53:28,842 [INFO]  Epoch[1] Train-cross-entropy=13.579072
2020-06-23 23:53:28,848 [INFO]  Epoch[1] Train-existing_credits-accuracy=0.000000
2020-06-23 23:53:28,852 [INFO]  Epoch[1] Time cost=0.759
2020-06-23 23:53:28

Fitting model for column: num_dependents


2020-06-23 23:53:37,844 [INFO]  
2020-06-23 23:53:38,158 [INFO]  Epoch[0] Batch [0-23]	Speed: 1281.09 samples/sec	cross-entropy=14.917808	num_dependents-accuracy=0.000000
2020-06-23 23:53:38,428 [INFO]  Epoch[0] Train-cross-entropy=15.656465
2020-06-23 23:53:38,432 [INFO]  Epoch[0] Train-num_dependents-accuracy=0.000000
2020-06-23 23:53:38,436 [INFO]  Epoch[0] Time cost=0.582
2020-06-23 23:53:38,445 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:53:38,503 [INFO]  Epoch[0] Validation-cross-entropy=14.461855
2020-06-23 23:53:38,507 [INFO]  Epoch[0] Validation-num_dependents-accuracy=0.000000
2020-06-23 23:53:38,807 [INFO]  Epoch[1] Batch [0-23]	Speed: 1308.44 samples/sec	cross-entropy=13.459229	num_dependents-accuracy=0.000000
2020-06-23 23:53:39,053 [INFO]  Epoch[1] Train-cross-entropy=14.406663
2020-06-23 23:53:39,057 [INFO]  Epoch[1] Train-num_dependents-accuracy=0.000000
2020-06-23 23:53:39,061 [INFO]  Epoch[1] Time cost=0.551
2020-06-23 23:53:39,072 [INF

PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A6C5780>: 0.7897381954184197
PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A970208>: 0.7850631136044881
PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A970438>: 0.7911407199625993


2020-06-23 23:53:45,304 [INFO]  CategoricalEncoder for column checking_status                                found only 44 occurrences of value >=200


Fitting model for column: checking_status


2020-06-23 23:53:46,179 [INFO]  
2020-06-23 23:53:46,560 [INFO]  Epoch[0] Batch [0-23]	Speed: 1108.03 samples/sec	cross-entropy=1.269060	checking_status-accuracy=0.367188
2020-06-23 23:53:46,921 [INFO]  Epoch[0] Train-cross-entropy=1.230553
2020-06-23 23:53:46,925 [INFO]  Epoch[0] Train-checking_status-accuracy=0.373611
2020-06-23 23:53:46,929 [INFO]  Epoch[0] Time cost=0.738
2020-06-23 23:53:46,938 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:53:47,010 [INFO]  Epoch[0] Validation-cross-entropy=1.033660
2020-06-23 23:53:47,015 [INFO]  Epoch[0] Validation-checking_status-accuracy=0.537500
2020-06-23 23:53:47,379 [INFO]  Epoch[1] Batch [0-23]	Speed: 1075.75 samples/sec	cross-entropy=1.130952	checking_status-accuracy=0.440104
2020-06-23 23:53:47,698 [INFO]  Epoch[1] Train-cross-entropy=1.133254
2020-06-23 23:53:47,702 [INFO]  Epoch[1] Train-checking_status-accuracy=0.440278
2020-06-23 23:53:47,706 [INFO]  Epoch[1] Time cost=0.687
2020-06-23 23:53:47,715 [INF

Fitting model for column: credit_history


2020-06-23 23:53:56,730 [INFO]  
2020-06-23 23:53:57,152 [INFO]  Epoch[0] Batch [0-23]	Speed: 952.19 samples/sec	cross-entropy=1.101764	credit_history-accuracy=0.580729
2020-06-23 23:53:57,531 [INFO]  Epoch[0] Train-cross-entropy=1.059231
2020-06-23 23:53:57,535 [INFO]  Epoch[0] Train-credit_history-accuracy=0.627778
2020-06-23 23:53:57,539 [INFO]  Epoch[0] Time cost=0.798
2020-06-23 23:53:57,548 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:53:57,616 [INFO]  Epoch[0] Validation-cross-entropy=0.865247
2020-06-23 23:53:57,620 [INFO]  Epoch[0] Validation-credit_history-accuracy=0.700000
2020-06-23 23:53:57,989 [INFO]  Epoch[1] Batch [0-23]	Speed: 1069.87 samples/sec	cross-entropy=0.870805	credit_history-accuracy=0.710938
2020-06-23 23:53:58,355 [INFO]  Epoch[1] Train-cross-entropy=0.899350
2020-06-23 23:53:58,360 [INFO]  Epoch[1] Train-credit_history-accuracy=0.705556
2020-06-23 23:53:58,365 [INFO]  Epoch[1] Time cost=0.740
2020-06-23 23:53:58,390 [INFO]  Sa

Fitting model for column: purpose


2020-06-23 23:54:04,381 [INFO]  
2020-06-23 23:54:04,907 [INFO]  Epoch[0] Batch [0-23]	Speed: 778.45 samples/sec	cross-entropy=1.856849	purpose-accuracy=0.250000
2020-06-23 23:54:05,288 [INFO]  Epoch[0] Train-cross-entropy=1.778873
2020-06-23 23:54:05,294 [INFO]  Epoch[0] Train-purpose-accuracy=0.288889
2020-06-23 23:54:05,300 [INFO]  Epoch[0] Time cost=0.908
2020-06-23 23:54:05,321 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:54:05,406 [INFO]  Epoch[0] Validation-cross-entropy=1.739591
2020-06-23 23:54:05,409 [INFO]  Epoch[0] Validation-purpose-accuracy=0.325000
2020-06-23 23:54:05,946 [INFO]  Epoch[1] Batch [0-23]	Speed: 719.51 samples/sec	cross-entropy=1.608621	purpose-accuracy=0.294271
2020-06-23 23:54:06,420 [INFO]  Epoch[1] Train-cross-entropy=1.605954
2020-06-23 23:54:06,426 [INFO]  Epoch[1] Train-purpose-accuracy=0.333333
2020-06-23 23:54:06,430 [INFO]  Epoch[1] Time cost=1.016
2020-06-23 23:54:06,447 [INFO]  Saved checkpoint to "imputer_model\mod

Fitting model for column: savings_status


2020-06-23 23:54:15,764 [INFO]  
2020-06-23 23:54:16,196 [INFO]  Epoch[0] Batch [0-23]	Speed: 981.34 samples/sec	cross-entropy=1.110756	savings_status-accuracy=0.627604
2020-06-23 23:54:16,572 [INFO]  Epoch[0] Train-cross-entropy=1.167470
2020-06-23 23:54:16,577 [INFO]  Epoch[0] Train-savings_status-accuracy=0.591667
2020-06-23 23:54:16,581 [INFO]  Epoch[0] Time cost=0.807
2020-06-23 23:54:16,591 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:54:16,651 [INFO]  Epoch[0] Validation-cross-entropy=1.266267
2020-06-23 23:54:16,656 [INFO]  Epoch[0] Validation-savings_status-accuracy=0.487500
2020-06-23 23:54:17,159 [INFO]  Epoch[1] Batch [0-23]	Speed: 804.73 samples/sec	cross-entropy=0.971913	savings_status-accuracy=0.658854
2020-06-23 23:54:17,538 [INFO]  Epoch[1] Train-cross-entropy=1.066156
2020-06-23 23:54:17,541 [INFO]  Epoch[1] Train-savings_status-accuracy=0.612500
2020-06-23 23:54:17,546 [INFO]  Epoch[1] Time cost=0.882
2020-06-23 23:54:17,557 [INFO]  Sav

Fitting model for column: employment


2020-06-23 23:54:23,163 [INFO]  
2020-06-23 23:54:23,656 [INFO]  Epoch[0] Batch [0-23]	Speed: 827.32 samples/sec	cross-entropy=1.457832	employment-accuracy=0.341146
2020-06-23 23:54:23,994 [INFO]  Epoch[0] Train-cross-entropy=1.389292
2020-06-23 23:54:24,000 [INFO]  Epoch[0] Train-employment-accuracy=0.387500
2020-06-23 23:54:24,005 [INFO]  Epoch[0] Time cost=0.832
2020-06-23 23:54:24,017 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:54:24,078 [INFO]  Epoch[0] Validation-cross-entropy=1.204195
2020-06-23 23:54:24,083 [INFO]  Epoch[0] Validation-employment-accuracy=0.462500
2020-06-23 23:54:24,464 [INFO]  Epoch[1] Batch [0-23]	Speed: 1024.96 samples/sec	cross-entropy=1.274735	employment-accuracy=0.424479
2020-06-23 23:54:24,930 [INFO]  Epoch[1] Train-cross-entropy=1.268565
2020-06-23 23:54:24,935 [INFO]  Epoch[1] Train-employment-accuracy=0.433333
2020-06-23 23:54:24,941 [INFO]  Epoch[1] Time cost=0.853
2020-06-23 23:54:24,957 [INFO]  Saved checkpoint to "i

Fitting model for column: personal_status


2020-06-23 23:54:32,492 [INFO]  
2020-06-23 23:54:32,904 [INFO]  Epoch[0] Batch [0-23]	Speed: 986.59 samples/sec	cross-entropy=1.086510	personal_status-accuracy=0.559896
2020-06-23 23:54:33,248 [INFO]  Epoch[0] Train-cross-entropy=1.006520
2020-06-23 23:54:33,252 [INFO]  Epoch[0] Train-personal_status-accuracy=0.588889
2020-06-23 23:54:33,259 [INFO]  Epoch[0] Time cost=0.755
2020-06-23 23:54:33,268 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:54:33,327 [INFO]  Epoch[0] Validation-cross-entropy=0.904718
2020-06-23 23:54:33,330 [INFO]  Epoch[0] Validation-personal_status-accuracy=0.625000
2020-06-23 23:54:33,737 [INFO]  Epoch[1] Batch [0-23]	Speed: 1003.97 samples/sec	cross-entropy=0.898684	personal_status-accuracy=0.619792
2020-06-23 23:54:34,048 [INFO]  Epoch[1] Train-cross-entropy=0.887209
2020-06-23 23:54:34,053 [INFO]  Epoch[1] Train-personal_status-accuracy=0.637500
2020-06-23 23:54:34,057 [INFO]  Epoch[1] Time cost=0.721
2020-06-23 23:54:34,069 [INFO

Fitting model for column: other_parties


2020-06-23 23:54:39,835 [INFO]  
2020-06-23 23:54:40,249 [INFO]  Epoch[0] Batch [0-23]	Speed: 1042.33 samples/sec	cross-entropy=0.559025	other_parties-accuracy=0.864583
2020-06-23 23:54:40,548 [INFO]  Epoch[0] Train-cross-entropy=0.473106
2020-06-23 23:54:40,553 [INFO]  Epoch[0] Train-other_parties-accuracy=0.883333
2020-06-23 23:54:40,557 [INFO]  Epoch[0] Time cost=0.708
2020-06-23 23:54:40,567 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:54:40,625 [INFO]  Epoch[0] Validation-cross-entropy=0.344084
2020-06-23 23:54:40,630 [INFO]  Epoch[0] Validation-other_parties-accuracy=0.912500
2020-06-23 23:54:40,962 [INFO]  Epoch[1] Batch [0-23]	Speed: 1203.83 samples/sec	cross-entropy=0.352839	other_parties-accuracy=0.898438
2020-06-23 23:54:41,271 [INFO]  Epoch[1] Train-cross-entropy=0.352027
2020-06-23 23:54:41,276 [INFO]  Epoch[1] Train-other_parties-accuracy=0.901389
2020-06-23 23:54:41,282 [INFO]  Epoch[1] Time cost=0.646
2020-06-23 23:54:41,323 [INFO]  Saved 

Fitting model for column: property_magnitude


2020-06-23 23:54:49,230 [INFO]  
2020-06-23 23:54:49,626 [INFO]  Epoch[0] Batch [0-23]	Speed: 1036.47 samples/sec	cross-entropy=1.284189	property_magnitude-accuracy=0.375000
2020-06-23 23:54:49,928 [INFO]  Epoch[0] Train-cross-entropy=1.262701
2020-06-23 23:54:49,933 [INFO]  Epoch[0] Train-property_magnitude-accuracy=0.390278
2020-06-23 23:54:49,937 [INFO]  Epoch[0] Time cost=0.697
2020-06-23 23:54:49,947 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:54:50,007 [INFO]  Epoch[0] Validation-cross-entropy=1.110885
2020-06-23 23:54:50,011 [INFO]  Epoch[0] Validation-property_magnitude-accuracy=0.500000
2020-06-23 23:54:50,363 [INFO]  Epoch[1] Batch [0-23]	Speed: 1103.03 samples/sec	cross-entropy=1.133572	property_magnitude-accuracy=0.437500
2020-06-23 23:54:50,663 [INFO]  Epoch[1] Train-cross-entropy=1.153508
2020-06-23 23:54:50,667 [INFO]  Epoch[1] Train-property_magnitude-accuracy=0.452778
2020-06-23 23:54:50,672 [INFO]  Epoch[1] Time cost=0.656
2020-06-23 23

2020-06-23 23:55:01,119 [INFO]  Epoch[13] Time cost=1.260
2020-06-23 23:55:01,139 [INFO]  Saved checkpoint to "imputer_model\model-0013.params"
2020-06-23 23:55:01,227 [INFO]  Epoch[13] Validation-cross-entropy=1.045725
2020-06-23 23:55:01,233 [INFO]  Epoch[13] Validation-property_magnitude-accuracy=0.537500
2020-06-23 23:55:01,691 [INFO]  Epoch[14] Batch [0-23]	Speed: 878.43 samples/sec	cross-entropy=0.920862	property_magnitude-accuracy=0.585938
2020-06-23 23:55:01,999 [INFO]  Epoch[14] Train-cross-entropy=0.928417
2020-06-23 23:55:02,004 [INFO]  Epoch[14] Train-property_magnitude-accuracy=0.590278
2020-06-23 23:55:02,008 [INFO]  Epoch[14] Time cost=0.770
2020-06-23 23:55:02,018 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:55:02,074 [INFO]  Epoch[14] Validation-cross-entropy=1.045268
2020-06-23 23:55:02,079 [INFO]  Epoch[14] Validation-property_magnitude-accuracy=0.537500
2020-06-23 23:55:02,472 [INFO]  Epoch[15] Batch [0-23]	Speed: 1008.15 samples/sec	c

Fitting model for column: other_payment_plans


2020-06-23 23:55:08,332 [INFO]  
2020-06-23 23:55:08,712 [INFO]  Epoch[0] Batch [0-23]	Speed: 1075.75 samples/sec	cross-entropy=0.721867	other_payment_plans-accuracy=0.786458
2020-06-23 23:55:09,080 [INFO]  Epoch[0] Train-cross-entropy=0.664412
2020-06-23 23:55:09,085 [INFO]  Epoch[0] Train-other_payment_plans-accuracy=0.794444
2020-06-23 23:55:09,091 [INFO]  Epoch[0] Time cost=0.747
2020-06-23 23:55:09,105 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:55:09,203 [INFO]  Epoch[0] Validation-cross-entropy=0.511950
2020-06-23 23:55:09,212 [INFO]  Epoch[0] Validation-other_payment_plans-accuracy=0.850000
2020-06-23 23:55:09,588 [INFO]  Epoch[1] Batch [0-23]	Speed: 1110.32 samples/sec	cross-entropy=0.538499	other_payment_plans-accuracy=0.817708
2020-06-23 23:55:09,888 [INFO]  Epoch[1] Train-cross-entropy=0.548103
2020-06-23 23:55:09,895 [INFO]  Epoch[1] Train-other_payment_plans-accuracy=0.811111
2020-06-23 23:55:09,899 [INFO]  Epoch[1] Time cost=0.682
2020-06-

Fitting model for column: housing


2020-06-23 23:55:18,284 [INFO]  
2020-06-23 23:55:18,681 [INFO]  Epoch[0] Batch [0-23]	Speed: 1036.47 samples/sec	cross-entropy=0.809463	housing-accuracy=0.690104
2020-06-23 23:55:18,988 [INFO]  Epoch[0] Train-cross-entropy=0.727844
2020-06-23 23:55:18,992 [INFO]  Epoch[0] Train-housing-accuracy=0.716667
2020-06-23 23:55:19,009 [INFO]  Epoch[0] Time cost=0.713
2020-06-23 23:55:19,021 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:55:19,088 [INFO]  Epoch[0] Validation-cross-entropy=0.572505
2020-06-23 23:55:19,092 [INFO]  Epoch[0] Validation-housing-accuracy=0.737500
2020-06-23 23:55:19,426 [INFO]  Epoch[1] Batch [0-23]	Speed: 1169.58 samples/sec	cross-entropy=0.632027	housing-accuracy=0.739583
2020-06-23 23:55:19,718 [INFO]  Epoch[1] Train-cross-entropy=0.606030
2020-06-23 23:55:19,722 [INFO]  Epoch[1] Train-housing-accuracy=0.754167
2020-06-23 23:55:19,726 [INFO]  Epoch[1] Time cost=0.630
2020-06-23 23:55:19,736 [INFO]  Saved checkpoint to "imputer_model\m

Fitting model for column: job


2020-06-23 23:55:29,384 [INFO]  
2020-06-23 23:55:30,032 [INFO]  Epoch[0] Batch [0-23]	Speed: 690.99 samples/sec	cross-entropy=1.061599	job-accuracy=0.588542
2020-06-23 23:55:30,420 [INFO]  Epoch[0] Train-cross-entropy=0.940452
2020-06-23 23:55:30,424 [INFO]  Epoch[0] Train-job-accuracy=0.634722
2020-06-23 23:55:30,428 [INFO]  Epoch[0] Time cost=1.017
2020-06-23 23:55:30,448 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:55:30,519 [INFO]  Epoch[0] Validation-cross-entropy=0.747117
2020-06-23 23:55:30,524 [INFO]  Epoch[0] Validation-job-accuracy=0.712500
2020-06-23 23:55:30,876 [INFO]  Epoch[1] Batch [0-23]	Speed: 1106.70 samples/sec	cross-entropy=0.863160	job-accuracy=0.619792
2020-06-23 23:55:31,193 [INFO]  Epoch[1] Train-cross-entropy=0.808506
2020-06-23 23:55:31,197 [INFO]  Epoch[1] Train-job-accuracy=0.656944
2020-06-23 23:55:31,202 [INFO]  Epoch[1] Time cost=0.672
2020-06-23 23:55:31,215 [INFO]  Saved checkpoint to "imputer_model\model-0001.params"
202

Fitting model for column: own_telephone


2020-06-23 23:55:37,750 [INFO]  
2020-06-23 23:55:38,169 [INFO]  Epoch[0] Batch [0-23]	Speed: 962.85 samples/sec	cross-entropy=0.672225	own_telephone-accuracy=0.658854
2020-06-23 23:55:38,478 [INFO]  Epoch[0] Train-cross-entropy=0.629434
2020-06-23 23:55:38,483 [INFO]  Epoch[0] Train-own_telephone-accuracy=0.665278
2020-06-23 23:55:38,488 [INFO]  Epoch[0] Time cost=0.725
2020-06-23 23:55:38,500 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:55:38,563 [INFO]  Epoch[0] Validation-cross-entropy=0.563801
2020-06-23 23:55:38,569 [INFO]  Epoch[0] Validation-own_telephone-accuracy=0.712500
2020-06-23 23:55:38,906 [INFO]  Epoch[1] Batch [0-23]	Speed: 1163.98 samples/sec	cross-entropy=0.550000	own_telephone-accuracy=0.721354
2020-06-23 23:55:39,265 [INFO]  Epoch[1] Train-cross-entropy=0.551348
2020-06-23 23:55:39,269 [INFO]  Epoch[1] Train-own_telephone-accuracy=0.712500
2020-06-23 23:55:39,274 [INFO]  Epoch[1] Time cost=0.700
2020-06-23 23:55:39,284 [INFO]  Saved c

Fitting model for column: foreign_worker


2020-06-23 23:55:47,778 [INFO]  
2020-06-23 23:55:48,307 [INFO]  Epoch[0] Batch [0-23]	Speed: 740.93 samples/sec	cross-entropy=0.340132	foreign_worker-accuracy=0.927083
2020-06-23 23:55:48,624 [INFO]  Epoch[0] Train-cross-entropy=0.233669
2020-06-23 23:55:48,629 [INFO]  Epoch[0] Train-foreign_worker-accuracy=0.948611
2020-06-23 23:55:48,633 [INFO]  Epoch[0] Time cost=0.843
2020-06-23 23:55:48,643 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:55:48,701 [INFO]  Epoch[0] Validation-cross-entropy=0.111131
2020-06-23 23:55:48,706 [INFO]  Epoch[0] Validation-foreign_worker-accuracy=0.975000
2020-06-23 23:55:49,017 [INFO]  Epoch[1] Batch [0-23]	Speed: 1265.72 samples/sec	cross-entropy=0.174161	foreign_worker-accuracy=0.950521
2020-06-23 23:55:49,306 [INFO]  Epoch[1] Train-cross-entropy=0.148877
2020-06-23 23:55:49,311 [INFO]  Epoch[1] Train-foreign_worker-accuracy=0.961111
2020-06-23 23:55:49,317 [INFO]  Epoch[1] Time cost=0.604
2020-06-23 23:55:49,334 [INFO]  Sa

2020-06-23 23:55:58,768 [INFO]  No improvement detected for 5 epochs compared to 0.09165458530187606 last error obtained: 0.0954352891072631, stopping here
2020-06-23 23:55:58,772 [INFO]  
  _warn_prf(average, modifier, msg_start, len(result))


Fitting model for column: duration


2020-06-23 23:56:01,281 [INFO]  
2020-06-23 23:56:01,659 [INFO]  Epoch[0] Batch [0-23]	Speed: 1075.69 samples/sec	cross-entropy=12.703345	duration-accuracy=0.000000
2020-06-23 23:56:02,043 [INFO]  Epoch[0] Train-cross-entropy=10.368585
2020-06-23 23:56:02,048 [INFO]  Epoch[0] Train-duration-accuracy=0.000000
2020-06-23 23:56:02,054 [INFO]  Epoch[0] Time cost=0.761
2020-06-23 23:56:02,064 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:56:02,126 [INFO]  Epoch[0] Validation-cross-entropy=8.888605
2020-06-23 23:56:02,131 [INFO]  Epoch[0] Validation-duration-accuracy=0.000000
2020-06-23 23:56:02,426 [INFO]  Epoch[1] Batch [0-23]	Speed: 1315.37 samples/sec	cross-entropy=8.120125	duration-accuracy=0.000000
2020-06-23 23:56:02,687 [INFO]  Epoch[1] Train-cross-entropy=7.788915
2020-06-23 23:56:02,690 [INFO]  Epoch[1] Train-duration-accuracy=0.000000
2020-06-23 23:56:02,694 [INFO]  Epoch[1] Time cost=0.559
2020-06-23 23:56:02,702 [INFO]  Saved checkpoint to "imputer_

Fitting model for column: credit_amount


2020-06-23 23:56:10,377 [INFO]  
2020-06-23 23:56:10,700 [INFO]  Epoch[0] Batch [0-23]	Speed: 1295.75 samples/sec	cross-entropy=11.537270	credit_amount-accuracy=0.000000
2020-06-23 23:56:10,981 [INFO]  Epoch[0] Train-cross-entropy=9.301062
2020-06-23 23:56:10,987 [INFO]  Epoch[0] Train-credit_amount-accuracy=0.000000
2020-06-23 23:56:10,992 [INFO]  Epoch[0] Time cost=0.601
2020-06-23 23:56:11,004 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:56:11,089 [INFO]  Epoch[0] Validation-cross-entropy=7.968016
2020-06-23 23:56:11,098 [INFO]  Epoch[0] Validation-credit_amount-accuracy=0.000000
2020-06-23 23:56:11,510 [INFO]  Epoch[1] Batch [0-23]	Speed: 976.15 samples/sec	cross-entropy=7.463742	credit_amount-accuracy=0.000000
2020-06-23 23:56:11,868 [INFO]  Epoch[1] Train-cross-entropy=6.744511
2020-06-23 23:56:11,876 [INFO]  Epoch[1] Train-credit_amount-accuracy=0.000000
2020-06-23 23:56:11,881 [INFO]  Epoch[1] Time cost=0.774
2020-06-23 23:56:11,911 [INFO]  Saved 

Fitting model for column: installment_commitment


2020-06-23 23:56:18,801 [INFO]  
2020-06-23 23:56:19,192 [INFO]  Epoch[0] Batch [0-23]	Speed: 1041.97 samples/sec	cross-entropy=14.128041	installment_commitment-accuracy=0.000000
2020-06-23 23:56:19,736 [INFO]  Epoch[0] Train-cross-entropy=14.292359
2020-06-23 23:56:19,741 [INFO]  Epoch[0] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:56:19,745 [INFO]  Epoch[0] Time cost=0.929
2020-06-23 23:56:19,758 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:56:19,817 [INFO]  Epoch[0] Validation-cross-entropy=12.551503
2020-06-23 23:56:19,821 [INFO]  Epoch[0] Validation-installment_commitment-accuracy=0.000000
2020-06-23 23:56:20,176 [INFO]  Epoch[1] Batch [0-23]	Speed: 1109.70 samples/sec	cross-entropy=12.282364	installment_commitment-accuracy=0.000000
2020-06-23 23:56:20,455 [INFO]  Epoch[1] Train-cross-entropy=12.750998
2020-06-23 23:56:20,460 [INFO]  Epoch[1] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:56:20,465 [INFO]  Epoch[1] Time

2020-06-23 23:56:31,508 [INFO]  Epoch[13] Train-cross-entropy=10.136193
2020-06-23 23:56:31,515 [INFO]  Epoch[13] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:56:31,522 [INFO]  Epoch[13] Time cost=0.918
2020-06-23 23:56:31,544 [INFO]  Saved checkpoint to "imputer_model\model-0013.params"
2020-06-23 23:56:31,650 [INFO]  Epoch[13] Validation-cross-entropy=10.684056
2020-06-23 23:56:31,657 [INFO]  Epoch[13] Validation-installment_commitment-accuracy=0.000000
2020-06-23 23:56:32,155 [INFO]  Epoch[14] Batch [0-23]	Speed: 785.07 samples/sec	cross-entropy=9.716877	installment_commitment-accuracy=0.000000
2020-06-23 23:56:32,461 [INFO]  Epoch[14] Train-cross-entropy=10.074372
2020-06-23 23:56:32,467 [INFO]  Epoch[14] Train-installment_commitment-accuracy=0.000000
2020-06-23 23:56:32,472 [INFO]  Epoch[14] Time cost=0.806
2020-06-23 23:56:32,482 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:56:32,554 [INFO]  Epoch[14] Validation-cross-entropy=10.6685

Fitting model for column: residence_since


2020-06-23 23:56:41,095 [INFO]  
2020-06-23 23:56:41,412 [INFO]  Epoch[0] Batch [0-23]	Speed: 1286.83 samples/sec	cross-entropy=15.473120	residence_since-accuracy=0.000000
2020-06-23 23:56:41,696 [INFO]  Epoch[0] Train-cross-entropy=15.691096
2020-06-23 23:56:41,700 [INFO]  Epoch[0] Train-residence_since-accuracy=0.000000
2020-06-23 23:56:41,707 [INFO]  Epoch[0] Time cost=0.596
2020-06-23 23:56:41,716 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:56:41,768 [INFO]  Epoch[0] Validation-cross-entropy=13.573388
2020-06-23 23:56:41,775 [INFO]  Epoch[0] Validation-residence_since-accuracy=0.000000
2020-06-23 23:56:42,185 [INFO]  Epoch[1] Batch [0-23]	Speed: 960.90 samples/sec	cross-entropy=13.994703	residence_since-accuracy=0.000000
2020-06-23 23:56:42,506 [INFO]  Epoch[1] Train-cross-entropy=14.332313
2020-06-23 23:56:42,511 [INFO]  Epoch[1] Train-residence_since-accuracy=0.000000
2020-06-23 23:56:42,517 [INFO]  Epoch[1] Time cost=0.737
2020-06-23 23:56:42,528 

Fitting model for column: age


2020-06-23 23:56:46,894 [INFO]  
2020-06-23 23:56:47,219 [INFO]  Epoch[0] Batch [0-23]	Speed: 1225.77 samples/sec	cross-entropy=14.677479	age-accuracy=0.000000
2020-06-23 23:56:47,520 [INFO]  Epoch[0] Train-cross-entropy=14.555230
2020-06-23 23:56:47,525 [INFO]  Epoch[0] Train-age-accuracy=0.000000
2020-06-23 23:56:47,529 [INFO]  Epoch[0] Time cost=0.622
2020-06-23 23:56:47,538 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:56:47,605 [INFO]  Epoch[0] Validation-cross-entropy=18.909968
2020-06-23 23:56:47,610 [INFO]  Epoch[0] Validation-age-accuracy=0.000000
2020-06-23 23:56:47,905 [INFO]  Epoch[1] Batch [0-23]	Speed: 1332.03 samples/sec	cross-entropy=13.260581	age-accuracy=0.000000
2020-06-23 23:56:48,142 [INFO]  Epoch[1] Train-cross-entropy=13.301010
2020-06-23 23:56:48,146 [INFO]  Epoch[1] Train-age-accuracy=0.000000
2020-06-23 23:56:48,151 [INFO]  Epoch[1] Time cost=0.536
2020-06-23 23:56:48,160 [INFO]  Saved checkpoint to "imputer_model\model-0001.param

2020-06-23 23:56:56,537 [INFO]  Epoch[14] Time cost=0.582
2020-06-23 23:56:56,546 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-23 23:56:56,596 [INFO]  Epoch[14] Validation-cross-entropy=15.281016
2020-06-23 23:56:56,601 [INFO]  Epoch[14] Validation-age-accuracy=0.000000
2020-06-23 23:56:56,921 [INFO]  Epoch[15] Batch [0-23]	Speed: 1205.86 samples/sec	cross-entropy=10.306528	age-accuracy=0.000000
2020-06-23 23:56:57,178 [INFO]  Epoch[15] Train-cross-entropy=10.274636
2020-06-23 23:56:57,184 [INFO]  Epoch[15] Train-age-accuracy=0.000000
2020-06-23 23:56:57,189 [INFO]  Epoch[15] Time cost=0.583
2020-06-23 23:56:57,199 [INFO]  Saved checkpoint to "imputer_model\model-0015.params"
2020-06-23 23:56:57,266 [INFO]  Epoch[15] Validation-cross-entropy=15.266189
2020-06-23 23:56:57,271 [INFO]  Epoch[15] Validation-age-accuracy=0.000000
2020-06-23 23:56:57,581 [INFO]  Epoch[16] Batch [0-23]	Speed: 1250.79 samples/sec	cross-entropy=10.240177	age-accuracy=0.000000
2020-06-23

Fitting model for column: existing_credits


2020-06-23 23:57:04,883 [INFO]  
2020-06-23 23:57:05,218 [INFO]  Epoch[0] Batch [0-23]	Speed: 1182.63 samples/sec	cross-entropy=15.320334	existing_credits-accuracy=0.000000
2020-06-23 23:57:05,493 [INFO]  Epoch[0] Train-cross-entropy=15.455334
2020-06-23 23:57:05,499 [INFO]  Epoch[0] Train-existing_credits-accuracy=0.000000
2020-06-23 23:57:05,504 [INFO]  Epoch[0] Time cost=0.610
2020-06-23 23:57:05,514 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:57:05,568 [INFO]  Epoch[0] Validation-cross-entropy=12.308671
2020-06-23 23:57:05,573 [INFO]  Epoch[0] Validation-existing_credits-accuracy=0.000000
2020-06-23 23:57:05,856 [INFO]  Epoch[1] Batch [0-23]	Speed: 1376.44 samples/sec	cross-entropy=13.204112	existing_credits-accuracy=0.000000
2020-06-23 23:57:06,111 [INFO]  Epoch[1] Train-cross-entropy=13.589545
2020-06-23 23:57:06,116 [INFO]  Epoch[1] Train-existing_credits-accuracy=0.000000
2020-06-23 23:57:06,121 [INFO]  Epoch[1] Time cost=0.543
2020-06-23 23:57:0

Fitting model for column: num_dependents


2020-06-23 23:57:13,275 [INFO]  
2020-06-23 23:57:13,591 [INFO]  Epoch[0] Batch [0-23]	Speed: 1290.15 samples/sec	cross-entropy=14.863863	num_dependents-accuracy=0.000000
2020-06-23 23:57:13,859 [INFO]  Epoch[0] Train-cross-entropy=15.618117
2020-06-23 23:57:13,864 [INFO]  Epoch[0] Train-num_dependents-accuracy=0.000000
2020-06-23 23:57:13,870 [INFO]  Epoch[0] Time cost=0.578
2020-06-23 23:57:13,881 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-23 23:57:13,936 [INFO]  Epoch[0] Validation-cross-entropy=14.198002
2020-06-23 23:57:13,941 [INFO]  Epoch[0] Validation-num_dependents-accuracy=0.000000
2020-06-23 23:57:14,240 [INFO]  Epoch[1] Batch [0-23]	Speed: 1315.07 samples/sec	cross-entropy=13.507775	num_dependents-accuracy=0.000000
2020-06-23 23:57:14,561 [INFO]  Epoch[1] Train-cross-entropy=14.474562
2020-06-23 23:57:14,569 [INFO]  Epoch[1] Train-num_dependents-accuracy=0.000000
2020-06-23 23:57:14,576 [INFO]  Epoch[1] Time cost=0.631
2020-06-23 23:57:14,589 [INF

PPP score with cleaning: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A970668>: 0.78903693314633
Best cleaning method: <jenga.cleaning.cleaner.Cleaner object at 0x000002454A6A7C50>: 0.7911407199625993


## Results

### Model Results

In [21]:
# model 
pipeline.fit(train_data, train_labels)

Pipeline(memory=None,
         steps=[('features',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('categorical_features',
                                                  Pipeline(memory=None,
                                                           steps=[('imputer',
                                                                   SimpleImputer(add_indicator=False,
                                                                                 copy=True,
                                                                                 fill_value='__NA__',
                                                                                 missing_values=nan,
                                                                                 strategy='constant',
                                          

In [22]:
# original data test score
pipeline.score(test_data, test_labels)

0.7

In [23]:
# corrupted data test score
pipeline.score(test_data_corrupted, test_labels)

0.69

In [24]:
# cleaned data test score
pipeline.score(test_data_cleaned, test_labels)

0.665

### PPP Results

In [25]:
# ppp model score
ppp_model_score

0.8093735390369332

In [26]:
# ppp score corrupted
score_no_cleaning

0.7850631136044881

In [29]:
# ppp score cleaned
np.array(cleaner_scores_ppp).max()

0.7911407199625993

In [30]:
# ppp cleaner scores
cleaner_scores_ppp

[0.7850631136044881,
 0.7911407199625993,
 0.7898550724637682,
 0.7850631136044881,
 0.7911407199625993,
 0.7897381954184197,
 0.7850631136044881,
 0.7911407199625993,
 0.78903693314633]