## Dataset

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import SGDClassifier

from jenga.basis import Dataset
from jenga.models.simple_model import SimpleModel
from jenga.corruptions.perturbations import Perturbation
from jenga.cleaning.imputation import MeanModeImputation, DatawigImputation

In [2]:
seed = 10

In [3]:
dataset = Dataset(seed, "credit-g")

Dataset 'credit-g', target: 'class'
**Author**: Dr. Hans Hofmann  

**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)) - 1994    

**Please cite**: [UCI](https://archive.ics.uci.edu/ml/citation_policy.html)



**German Credit data**  

This dataset classifies people described by a set of attributes as good or bad credit risks.



This dataset comes with a cost matrix: 

``` 

      Good  Bad (predicted)  

Good   0    1   (actual)  

Bad    5    0  

```



It is worse to class a customer as go

Attribute types: 


Unnamed: 0,attribute_names,categorical_indicator
0,checking_status,True
1,duration,False
2,credit_history,True
3,purpose,True
4,credit_amount,False
5,savings_status,True
6,employment,True
7,installment_commitment,False
8,personal_status,True
9,other_parties,True


In [4]:
all_data = dataset.all_data
all_data

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,...,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes,good
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,...,real estate,22.0,none,own,1.0,skilled,1.0,none,yes,bad
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,...,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes,good
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,...,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes,good
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,...,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes,bad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,no checking,12.0,existing paid,furniture/equipment,1736.0,<100,4<=X<7,3.0,female div/dep/mar,none,...,real estate,31.0,none,own,1.0,unskilled resident,1.0,none,yes,good
996,<0,30.0,existing paid,used car,3857.0,<100,1<=X<4,4.0,male div/sep,none,...,life insurance,40.0,none,own,1.0,high qualif/self emp/mgmt,1.0,yes,yes,good
997,no checking,12.0,existing paid,radio/tv,804.0,<100,>=7,4.0,male single,none,...,car,38.0,none,own,1.0,skilled,1.0,none,yes,good
998,<0,45.0,existing paid,radio/tv,1845.0,<100,1<=X<4,4.0,male single,none,...,no known property,23.0,none,for free,1.0,skilled,1.0,yes,yes,bad


In [5]:
attribute_names = dataset.attribute_names
attribute_names

['checking_status',
 'duration',
 'credit_history',
 'purpose',
 'credit_amount',
 'savings_status',
 'employment',
 'installment_commitment',
 'personal_status',
 'other_parties',
 'residence_since',
 'property_magnitude',
 'age',
 'other_payment_plans',
 'housing',
 'existing_credits',
 'job',
 'num_dependents',
 'own_telephone',
 'foreign_worker']

In [6]:
attribute_types = dataset.attribute_types
attribute_types

Unnamed: 0,attribute_names,categorical_indicator
0,checking_status,True
1,duration,False
2,credit_history,True
3,purpose,True
4,credit_amount,False
5,savings_status,True
6,employment,True
7,installment_commitment,False
8,personal_status,True
9,other_parties,True


### Visualize the dataset

In [None]:
## plot the original dataset
def hide_current_axis(*args, **kwds):
        plt.gca().set_visible(False)
        
def plot_data(data):
    sns.set_style("white") # grid/no grid style: darkgrid, whitegrid, dark, white, ticks
    
    plot = sns.pairplot(data, hue="class")
    plot.map_upper(hide_current_axis)
    plt.show()

In [None]:
plot_data(all_data)

### Get training and test sets

In [7]:
train_data, train_labels, test_data, test_labels = dataset.get_train_test_data()

display(train_data.head())
print(train_labels[0:5])

display(test_data.head())
print(test_labels[0:5])

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
188,<0,12.0,existing paid,radio/tv,674.0,100<=X<500,4<=X<7,4.0,male mar/wid,none,1.0,life insurance,20.0,none,own,1.0,skilled,1.0,none,yes
194,0<=X<200,45.0,existing paid,radio/tv,3031.0,100<=X<500,1<=X<4,4.0,male single,guarantor,4.0,life insurance,21.0,none,rent,1.0,skilled,1.0,none,yes
225,no checking,36.0,no credits/all paid,repairs,2613.0,<100,1<=X<4,4.0,male single,none,2.0,car,27.0,none,own,2.0,skilled,1.0,none,yes
580,0<=X<200,18.0,critical/other existing credit,new car,1056.0,<100,>=7,3.0,male single,guarantor,3.0,real estate,30.0,bank,own,2.0,skilled,1.0,none,yes
428,no checking,9.0,existing paid,furniture/equipment,1313.0,<100,>=7,1.0,male single,none,4.0,car,20.0,none,own,1.0,skilled,1.0,none,yes


['bad' 'bad' 'good' 'bad' 'good']


Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
841,no checking,21.0,delayed previously,used car,2993.0,<100,1<=X<4,3.0,male single,none,2.0,real estate,28.0,stores,own,2.0,unskilled resident,1.0,none,yes
956,>=200,30.0,critical/other existing credit,radio/tv,3656.0,no known savings,>=7,4.0,male single,none,4.0,life insurance,49.0,stores,own,2.0,unskilled resident,1.0,none,yes
544,no checking,12.0,critical/other existing credit,new car,1255.0,<100,>=7,4.0,male single,none,4.0,real estate,61.0,none,own,2.0,unskilled resident,1.0,none,yes
173,0<=X<200,8.0,existing paid,radio/tv,1414.0,<100,1<=X<4,4.0,male single,guarantor,2.0,real estate,33.0,none,own,1.0,skilled,1.0,none,no
759,<0,12.0,critical/other existing credit,new car,691.0,<100,>=7,4.0,male single,none,3.0,life insurance,35.0,none,own,2.0,skilled,1.0,none,yes


['good' 'good' 'good' 'good' 'bad']


## Model

In [8]:
learner = SGDClassifier(max_iter=1000)
param_grid = {
    'learner__loss': ['log'],
    'learner__penalty': ['l2', 'l1', 'elasticnet'],
    'learner__alpha': [0.0001, 0.001, 0.01, 0.1]
}

In [9]:
model_obj = SimpleModel(seed, train_data, train_labels, test_data, test_labels, attribute_types, learner, param_grid)

In [10]:
categorical_columns = model_obj.categorical_columns
categorical_columns

['checking_status',
 'credit_history',
 'purpose',
 'savings_status',
 'employment',
 'personal_status',
 'other_parties',
 'property_magnitude',
 'other_payment_plans',
 'housing',
 'job',
 'own_telephone',
 'foreign_worker']

In [11]:
numerical_columms = model_obj.numerical_columms
numerical_columms

['duration',
 'credit_amount',
 'installment_commitment',
 'residence_since',
 'age',
 'existing_credits',
 'num_dependents']

In [12]:
model = model_obj.fit_baseline_model(train_data, train_labels)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   12.4s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   13.2s finished


## Corruptions

In [13]:
# corruption perturbations to apply
corr_perturbations = Perturbation(categorical_columns, numerical_columms)

In [14]:
test_data_corrupted, perturbations, cols_perturbed = corr_perturbations.apply_perturbation(test_data, 5)
test_data_corrupted

Scaling: {'column': 'residence_since', 'fraction': 0.25}
['residence_since']
SwappedValues: {'column_a': 'checking_status', 'column_b': 'savings_status', 'fraction': 0.75}
['checking_status', 'savings_status']
SwappedValues: {'column_a': 'housing', 'column_b': 'purpose', 'fraction': 0.5}
['housing', 'purpose']
MissingValues: {'column': 'existing_credits', 'fraction': 0.5, 'na_value': nan, 'missingness': 'MNAR'}
['existing_credits']
SwappedValues: {'column_a': 'job', 'column_b': 'property_magnitude', 'fraction': 0.75}
['job', 'property_magnitude']


Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
841,<100,21.0,delayed previously,own,2993.0,no checking,1<=X<4,3.0,male single,none,2.0,unskilled resident,28.0,stores,used car,2.0,real estate,1.0,none,yes
956,no known savings,30.0,critical/other existing credit,own,3656.0,>=200,>=7,4.0,male single,none,4.0,unskilled resident,49.0,stores,radio/tv,2.0,life insurance,1.0,none,yes
544,<100,12.0,critical/other existing credit,new car,1255.0,no checking,>=7,4.0,male single,none,4.0,unskilled resident,61.0,none,own,2.0,real estate,1.0,none,yes
173,0<=X<200,8.0,existing paid,radio/tv,1414.0,<100,1<=X<4,4.0,male single,guarantor,2.0,skilled,33.0,none,own,1.0,real estate,1.0,none,no
759,<100,12.0,critical/other existing credit,own,691.0,<0,>=7,4.0,male single,none,30.0,skilled,35.0,none,new car,2.0,life insurance,1.0,none,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,<100,30.0,existing paid,own,11998.0,<0,<1,1.0,male div/sep,none,1.0,unskilled resident,34.0,none,repairs,1.0,no known property,1.0,yes,yes
192,<100,27.0,existing paid,own,3915.0,0<=X<200,1<=X<4,4.0,male single,none,2.0,skilled,36.0,none,business,1.0,car,2.0,yes,yes
398,<100,12.0,existing paid,new car,1223.0,0<=X<200,>=7,1.0,male div/sep,none,1.0,skilled,46.0,none,rent,2.0,real estate,1.0,none,yes
450,no known savings,36.0,critical/other existing credit,own,11054.0,no checking,1<=X<4,4.0,male single,none,20.0,high qualif/self emp/mgmt,30.0,none,used car,,car,1.0,yes,yes


In [15]:
perturbations

[<jenga.corruptions.numerical.Scaling at 0x13e1d25dbe0>,
 <jenga.corruptions.generic.SwappedValues at 0x13e0fd3f630>,
 <jenga.corruptions.generic.SwappedValues at 0x13e1d25dc18>,
 <jenga.corruptions.generic.MissingValues at 0x13e1d25d470>,
 <jenga.corruptions.generic.SwappedValues at 0x13e1d25d7f0>]

In [16]:
cols_perturbed

['residence_since',
 'checking_status',
 'savings_status',
 'housing',
 'purpose',
 'existing_credits',
 'job',
 'property_magnitude']

### Visualize the original and corrupted test set

In [None]:
## original test data
plot_data(pd.concat([test_data, pd.Series(test_labels, name='class')], axis=1))

In [None]:
## corrupted test data
plot_data(pd.concat([test_data_corrupted, pd.Series(test_labels, name='class')], axis=1))

## Cleaning

In [17]:
mean_mode_imputer = MeanModeImputation(train_data, test_data_corrupted, categorical_columns, numerical_columms)

mean_mode_imputer.fit(train_data)
test_data_mm_imputed = mean_mode_imputer.transform(test_data_corrupted)
test_data_mm_imputed

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
841,<100,21.0,delayed previously,own,2993.0,no checking,1<=X<4,3.0,male single,none,2.0,unskilled resident,28.0,stores,used car,2.00000,real estate,1.0,none,yes
956,no known savings,30.0,critical/other existing credit,own,3656.0,>=200,>=7,4.0,male single,none,4.0,unskilled resident,49.0,stores,radio/tv,2.00000,life insurance,1.0,none,yes
544,<100,12.0,critical/other existing credit,new car,1255.0,no checking,>=7,4.0,male single,none,4.0,unskilled resident,61.0,none,own,2.00000,real estate,1.0,none,yes
173,0<=X<200,8.0,existing paid,radio/tv,1414.0,<100,1<=X<4,4.0,male single,guarantor,2.0,skilled,33.0,none,own,1.00000,real estate,1.0,none,no
759,<100,12.0,critical/other existing credit,own,691.0,<0,>=7,4.0,male single,none,30.0,skilled,35.0,none,new car,2.00000,life insurance,1.0,none,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,<100,30.0,existing paid,own,11998.0,<0,<1,1.0,male div/sep,none,1.0,unskilled resident,34.0,none,repairs,1.00000,no known property,1.0,yes,yes
192,<100,27.0,existing paid,own,3915.0,0<=X<200,1<=X<4,4.0,male single,none,2.0,skilled,36.0,none,business,1.00000,car,2.0,yes,yes
398,<100,12.0,existing paid,new car,1223.0,0<=X<200,>=7,1.0,male div/sep,none,1.0,skilled,46.0,none,rent,2.00000,real estate,1.0,none,yes
450,no known savings,36.0,critical/other existing credit,own,11054.0,no checking,1<=X<4,4.0,male single,none,20.0,high qualif/self emp/mgmt,30.0,none,used car,1.40625,car,1.0,yes,yes


In [19]:
datawig_imputer = DatawigImputation(train_data, test_data_corrupted, categorical_columns, numerical_columms)

test_data_dw_imputed = datawig_imputer.fit_transform(train_data, test_data_corrupted)
test_data_dw_imputed

2020-06-19 00:59:14,607 [INFO]  CategoricalEncoder for column checking_status                                found only 44 occurrences of value >=200


Fitting model for column: checking_status


2020-06-19 00:59:16,469 [INFO]  
2020-06-19 00:59:17,199 [INFO]  Epoch[0] Batch [0-23]	Speed: 552.35 samples/sec	cross-entropy=1.268521	checking_status-accuracy=0.380208
2020-06-19 00:59:17,816 [INFO]  Epoch[0] Train-cross-entropy=1.226915
2020-06-19 00:59:17,819 [INFO]  Epoch[0] Train-checking_status-accuracy=0.387500
2020-06-19 00:59:17,821 [INFO]  Epoch[0] Time cost=1.342
2020-06-19 00:59:17,838 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 00:59:17,973 [INFO]  Epoch[0] Validation-cross-entropy=1.032174
2020-06-19 00:59:17,978 [INFO]  Epoch[0] Validation-checking_status-accuracy=0.550000
2020-06-19 00:59:18,797 [INFO]  Epoch[1] Batch [0-23]	Speed: 473.35 samples/sec	cross-entropy=1.127402	checking_status-accuracy=0.427083
2020-06-19 00:59:19,446 [INFO]  Epoch[1] Train-cross-entropy=1.128148
2020-06-19 00:59:19,448 [INFO]  Epoch[1] Train-checking_status-accuracy=0.438889
2020-06-19 00:59:19,450 [INFO]  Epoch[1] Time cost=1.465
2020-06-19 00:59:19,466 [INFO]

2020-06-19 00:59:44,921 [INFO]  CategoricalEncoder for column credit_history                                found only 63 occurrences of value delayed previously
2020-06-19 00:59:44,925 [INFO]  CategoricalEncoder for column credit_history                                found only 36 occurrences of value all paid
2020-06-19 00:59:44,928 [INFO]  CategoricalEncoder for column credit_history                                found only 31 occurrences of value no credits/all paid


Fitting model for column: credit_history


2020-06-19 00:59:47,562 [INFO]  
2020-06-19 00:59:48,572 [INFO]  Epoch[0] Batch [0-23]	Speed: 421.43 samples/sec	cross-entropy=1.087346	credit_history-accuracy=0.583333
2020-06-19 00:59:49,329 [INFO]  Epoch[0] Train-cross-entropy=1.052565
2020-06-19 00:59:49,336 [INFO]  Epoch[0] Train-credit_history-accuracy=0.629167
2020-06-19 00:59:49,344 [INFO]  Epoch[0] Time cost=1.770
2020-06-19 00:59:49,386 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 00:59:49,564 [INFO]  Epoch[0] Validation-cross-entropy=0.870367
2020-06-19 00:59:49,567 [INFO]  Epoch[0] Validation-credit_history-accuracy=0.700000
2020-06-19 00:59:50,464 [INFO]  Epoch[1] Batch [0-23]	Speed: 439.79 samples/sec	cross-entropy=0.869627	credit_history-accuracy=0.713542
2020-06-19 00:59:51,286 [INFO]  Epoch[1] Train-cross-entropy=0.899130
2020-06-19 00:59:51,290 [INFO]  Epoch[1] Train-credit_history-accuracy=0.705556
2020-06-19 00:59:51,294 [INFO]  Epoch[1] Time cost=1.720
2020-06-19 00:59:51,315 [INFO]  Sav

Fitting model for column: purpose


2020-06-19 01:00:06,516 [INFO]  
2020-06-19 01:00:07,901 [INFO]  Epoch[0] Batch [0-23]	Speed: 287.14 samples/sec	cross-entropy=1.849403	purpose-accuracy=0.242188
2020-06-19 01:00:09,400 [INFO]  Epoch[0] Train-cross-entropy=1.777741
2020-06-19 01:00:09,405 [INFO]  Epoch[0] Train-purpose-accuracy=0.287500
2020-06-19 01:00:09,409 [INFO]  Epoch[0] Time cost=2.881
2020-06-19 01:00:09,489 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:00:09,769 [INFO]  Epoch[0] Validation-cross-entropy=1.747401
2020-06-19 01:00:09,775 [INFO]  Epoch[0] Validation-purpose-accuracy=0.312500
2020-06-19 01:00:10,925 [INFO]  Epoch[1] Batch [0-23]	Speed: 336.94 samples/sec	cross-entropy=1.607665	purpose-accuracy=0.291667
2020-06-19 01:00:11,893 [INFO]  Epoch[1] Train-cross-entropy=1.607178
2020-06-19 01:00:11,898 [INFO]  Epoch[1] Train-purpose-accuracy=0.329167
2020-06-19 01:00:11,902 [INFO]  Epoch[1] Time cost=2.124
2020-06-19 01:00:11,940 [INFO]  Saved checkpoint to "imputer_model\mod

Fitting model for column: savings_status


2020-06-19 01:00:36,031 [INFO]  
2020-06-19 01:00:36,843 [INFO]  Epoch[0] Batch [0-23]	Speed: 515.34 samples/sec	cross-entropy=1.111819	savings_status-accuracy=0.627604
2020-06-19 01:00:37,534 [INFO]  Epoch[0] Train-cross-entropy=1.164974
2020-06-19 01:00:37,538 [INFO]  Epoch[0] Train-savings_status-accuracy=0.593056
2020-06-19 01:00:37,543 [INFO]  Epoch[0] Time cost=1.496
2020-06-19 01:00:37,584 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:00:37,704 [INFO]  Epoch[0] Validation-cross-entropy=1.248488
2020-06-19 01:00:37,706 [INFO]  Epoch[0] Validation-savings_status-accuracy=0.475000
2020-06-19 01:00:38,525 [INFO]  Epoch[1] Batch [0-23]	Speed: 473.06 samples/sec	cross-entropy=0.970144	savings_status-accuracy=0.656250
2020-06-19 01:00:39,156 [INFO]  Epoch[1] Train-cross-entropy=1.064525
2020-06-19 01:00:39,159 [INFO]  Epoch[1] Train-savings_status-accuracy=0.611111
2020-06-19 01:00:39,163 [INFO]  Epoch[1] Time cost=1.453
2020-06-19 01:00:39,178 [INFO]  Sav

Fitting model for column: employment


2020-06-19 01:00:50,835 [INFO]  
2020-06-19 01:00:52,661 [INFO]  Epoch[0] Batch [0-23]	Speed: 210.43 samples/sec	cross-entropy=1.466208	employment-accuracy=0.341146
2020-06-19 01:00:54,510 [INFO]  Epoch[0] Train-cross-entropy=1.398919
2020-06-19 01:00:54,519 [INFO]  Epoch[0] Train-employment-accuracy=0.384722
2020-06-19 01:00:54,524 [INFO]  Epoch[0] Time cost=3.676
2020-06-19 01:00:54,558 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:00:54,740 [INFO]  Epoch[0] Validation-cross-entropy=1.198217
2020-06-19 01:00:54,745 [INFO]  Epoch[0] Validation-employment-accuracy=0.500000
2020-06-19 01:00:55,641 [INFO]  Epoch[1] Batch [0-23]	Speed: 431.55 samples/sec	cross-entropy=1.274813	employment-accuracy=0.421875
2020-06-19 01:00:56,586 [INFO]  Epoch[1] Train-cross-entropy=1.272563
2020-06-19 01:00:56,591 [INFO]  Epoch[1] Train-employment-accuracy=0.437500
2020-06-19 01:00:56,597 [INFO]  Epoch[1] Time cost=1.847
2020-06-19 01:00:56,614 [INFO]  Saved checkpoint to "im

Fitting model for column: personal_status


2020-06-19 01:01:12,318 [INFO]  
2020-06-19 01:01:13,130 [INFO]  Epoch[0] Batch [0-23]	Speed: 498.61 samples/sec	cross-entropy=1.093136	personal_status-accuracy=0.557292
2020-06-19 01:01:13,985 [INFO]  Epoch[0] Train-cross-entropy=1.008956
2020-06-19 01:01:13,990 [INFO]  Epoch[0] Train-personal_status-accuracy=0.587500
2020-06-19 01:01:13,995 [INFO]  Epoch[0] Time cost=1.664
2020-06-19 01:01:14,020 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:01:14,157 [INFO]  Epoch[0] Validation-cross-entropy=0.886126
2020-06-19 01:01:14,161 [INFO]  Epoch[0] Validation-personal_status-accuracy=0.587500
2020-06-19 01:01:15,510 [INFO]  Epoch[1] Batch [0-23]	Speed: 281.19 samples/sec	cross-entropy=0.904175	personal_status-accuracy=0.619792
2020-06-19 01:01:16,716 [INFO]  Epoch[1] Train-cross-entropy=0.889470
2020-06-19 01:01:16,724 [INFO]  Epoch[1] Train-personal_status-accuracy=0.634722
2020-06-19 01:01:16,733 [INFO]  Epoch[1] Time cost=2.567
2020-06-19 01:01:16,855 [INFO]

Fitting model for column: other_parties


2020-06-19 01:01:32,808 [INFO]  
2020-06-19 01:01:33,721 [INFO]  Epoch[0] Batch [0-23]	Speed: 483.91 samples/sec	cross-entropy=0.557104	other_parties-accuracy=0.872396
2020-06-19 01:01:34,540 [INFO]  Epoch[0] Train-cross-entropy=0.472395
2020-06-19 01:01:34,545 [INFO]  Epoch[0] Train-other_parties-accuracy=0.887500
2020-06-19 01:01:34,552 [INFO]  Epoch[0] Time cost=1.729
2020-06-19 01:01:34,571 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:01:34,725 [INFO]  Epoch[0] Validation-cross-entropy=0.344561
2020-06-19 01:01:34,730 [INFO]  Epoch[0] Validation-other_parties-accuracy=0.912500
2020-06-19 01:01:35,494 [INFO]  Epoch[1] Batch [0-23]	Speed: 512.11 samples/sec	cross-entropy=0.355369	other_parties-accuracy=0.898438
2020-06-19 01:01:36,096 [INFO]  Epoch[1] Train-cross-entropy=0.351783
2020-06-19 01:01:36,100 [INFO]  Epoch[1] Train-other_parties-accuracy=0.901389
2020-06-19 01:01:36,108 [INFO]  Epoch[1] Time cost=1.369
2020-06-19 01:01:36,129 [INFO]  Saved ch

Fitting model for column: property_magnitude


2020-06-19 01:01:52,028 [INFO]  
2020-06-19 01:01:54,217 [INFO]  Epoch[0] Batch [0-23]	Speed: 176.38 samples/sec	cross-entropy=1.283039	property_magnitude-accuracy=0.348958
2020-06-19 01:01:54,913 [INFO]  Epoch[0] Train-cross-entropy=1.260430
2020-06-19 01:01:54,917 [INFO]  Epoch[0] Train-property_magnitude-accuracy=0.368056
2020-06-19 01:01:54,921 [INFO]  Epoch[0] Time cost=2.881
2020-06-19 01:01:54,966 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:01:55,127 [INFO]  Epoch[0] Validation-cross-entropy=1.108693
2020-06-19 01:01:55,132 [INFO]  Epoch[0] Validation-property_magnitude-accuracy=0.450000
2020-06-19 01:01:55,922 [INFO]  Epoch[1] Batch [0-23]	Speed: 497.26 samples/sec	cross-entropy=1.133650	property_magnitude-accuracy=0.437500
2020-06-19 01:01:56,810 [INFO]  Epoch[1] Train-cross-entropy=1.150920
2020-06-19 01:01:56,816 [INFO]  Epoch[1] Train-property_magnitude-accuracy=0.444444
2020-06-19 01:01:56,820 [INFO]  Epoch[1] Time cost=1.682
2020-06-19 01:0

2020-06-19 01:02:21,379 [INFO]  Epoch[13] Time cost=1.472
2020-06-19 01:02:21,415 [INFO]  Saved checkpoint to "imputer_model\model-0013.params"
2020-06-19 01:02:21,546 [INFO]  Epoch[13] Validation-cross-entropy=1.044101
2020-06-19 01:02:21,551 [INFO]  Epoch[13] Validation-property_magnitude-accuracy=0.512500
2020-06-19 01:02:22,317 [INFO]  Epoch[14] Batch [0-23]	Speed: 515.69 samples/sec	cross-entropy=0.920906	property_magnitude-accuracy=0.578125
2020-06-19 01:02:23,063 [INFO]  Epoch[14] Train-cross-entropy=0.925665
2020-06-19 01:02:23,067 [INFO]  Epoch[14] Train-property_magnitude-accuracy=0.588889
2020-06-19 01:02:23,073 [INFO]  Epoch[14] Time cost=1.517
2020-06-19 01:02:23,087 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-19 01:02:23,222 [INFO]  Epoch[14] Validation-cross-entropy=1.044534
2020-06-19 01:02:23,226 [INFO]  Epoch[14] Validation-property_magnitude-accuracy=0.537500
2020-06-19 01:02:23,998 [INFO]  Epoch[15] Batch [0-23]	Speed: 520.78 samples/sec	cr

Fitting model for column: other_payment_plans


2020-06-19 01:02:29,976 [INFO]  
2020-06-19 01:02:30,883 [INFO]  Epoch[0] Batch [0-23]	Speed: 454.68 samples/sec	cross-entropy=0.728644	other_payment_plans-accuracy=0.781250
2020-06-19 01:02:31,760 [INFO]  Epoch[0] Train-cross-entropy=0.664575
2020-06-19 01:02:31,764 [INFO]  Epoch[0] Train-other_payment_plans-accuracy=0.791667
2020-06-19 01:02:31,769 [INFO]  Epoch[0] Time cost=1.777
2020-06-19 01:02:31,832 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:02:31,978 [INFO]  Epoch[0] Validation-cross-entropy=0.505560
2020-06-19 01:02:31,983 [INFO]  Epoch[0] Validation-other_payment_plans-accuracy=0.850000
2020-06-19 01:02:33,232 [INFO]  Epoch[1] Batch [0-23]	Speed: 318.08 samples/sec	cross-entropy=0.542365	other_payment_plans-accuracy=0.817708
2020-06-19 01:02:33,840 [INFO]  Epoch[1] Train-cross-entropy=0.550279
2020-06-19 01:02:33,845 [INFO]  Epoch[1] Train-other_payment_plans-accuracy=0.811111
2020-06-19 01:02:33,849 [INFO]  Epoch[1] Time cost=1.860
2020-06-19

Fitting model for column: housing


2020-06-19 01:02:51,853 [INFO]  
2020-06-19 01:02:53,170 [INFO]  Epoch[0] Batch [0-23]	Speed: 404.09 samples/sec	cross-entropy=0.805051	housing-accuracy=0.687500
2020-06-19 01:02:53,895 [INFO]  Epoch[0] Train-cross-entropy=0.726160
2020-06-19 01:02:53,898 [INFO]  Epoch[0] Train-housing-accuracy=0.713889
2020-06-19 01:02:53,906 [INFO]  Epoch[0] Time cost=1.950
2020-06-19 01:02:53,920 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:02:54,091 [INFO]  Epoch[0] Validation-cross-entropy=0.554947
2020-06-19 01:02:54,105 [INFO]  Epoch[0] Validation-housing-accuracy=0.762500
2020-06-19 01:02:55,991 [INFO]  Epoch[1] Batch [0-23]	Speed: 212.43 samples/sec	cross-entropy=0.623890	housing-accuracy=0.744792
2020-06-19 01:02:56,727 [INFO]  Epoch[1] Train-cross-entropy=0.601633
2020-06-19 01:02:56,734 [INFO]  Epoch[1] Train-housing-accuracy=0.759722
2020-06-19 01:02:56,739 [INFO]  Epoch[1] Time cost=2.626
2020-06-19 01:02:56,756 [INFO]  Saved checkpoint to "imputer_model\mod

Fitting model for column: job


2020-06-19 01:03:19,835 [INFO]  
2020-06-19 01:03:20,744 [INFO]  Epoch[0] Batch [0-23]	Speed: 447.83 samples/sec	cross-entropy=1.083442	job-accuracy=0.578125
2020-06-19 01:03:21,416 [INFO]  Epoch[0] Train-cross-entropy=0.957778
2020-06-19 01:03:21,421 [INFO]  Epoch[0] Train-job-accuracy=0.622222
2020-06-19 01:03:21,427 [INFO]  Epoch[0] Time cost=1.573
2020-06-19 01:03:21,443 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:03:21,565 [INFO]  Epoch[0] Validation-cross-entropy=0.773694
2020-06-19 01:03:21,570 [INFO]  Epoch[0] Validation-job-accuracy=0.712500
2020-06-19 01:03:22,456 [INFO]  Epoch[1] Batch [0-23]	Speed: 439.68 samples/sec	cross-entropy=0.867148	job-accuracy=0.617188
2020-06-19 01:03:23,399 [INFO]  Epoch[1] Train-cross-entropy=0.815123
2020-06-19 01:03:23,407 [INFO]  Epoch[1] Train-job-accuracy=0.650000
2020-06-19 01:03:23,412 [INFO]  Epoch[1] Time cost=1.836
2020-06-19 01:03:23,453 [INFO]  Saved checkpoint to "imputer_model\model-0001.params"
2020

Fitting model for column: own_telephone


2020-06-19 01:03:38,857 [INFO]  
2020-06-19 01:03:39,615 [INFO]  Epoch[0] Batch [0-23]	Speed: 535.50 samples/sec	cross-entropy=0.676522	own_telephone-accuracy=0.627604
2020-06-19 01:03:40,246 [INFO]  Epoch[0] Train-cross-entropy=0.631166
2020-06-19 01:03:40,251 [INFO]  Epoch[0] Train-own_telephone-accuracy=0.638889
2020-06-19 01:03:40,258 [INFO]  Epoch[0] Time cost=1.386
2020-06-19 01:03:40,272 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:03:40,391 [INFO]  Epoch[0] Validation-cross-entropy=0.559734
2020-06-19 01:03:40,397 [INFO]  Epoch[0] Validation-own_telephone-accuracy=0.737500
2020-06-19 01:03:41,091 [INFO]  Epoch[1] Batch [0-23]	Speed: 565.93 samples/sec	cross-entropy=0.554567	own_telephone-accuracy=0.710938
2020-06-19 01:03:41,709 [INFO]  Epoch[1] Train-cross-entropy=0.553761
2020-06-19 01:03:41,713 [INFO]  Epoch[1] Train-own_telephone-accuracy=0.702778
2020-06-19 01:03:41,718 [INFO]  Epoch[1] Time cost=1.314
2020-06-19 01:03:41,736 [INFO]  Saved ch

Fitting model for column: foreign_worker


2020-06-19 01:04:01,246 [INFO]  
2020-06-19 01:04:01,964 [INFO]  Epoch[0] Batch [0-23]	Speed: 561.60 samples/sec	cross-entropy=0.348110	foreign_worker-accuracy=0.927083
2020-06-19 01:04:02,557 [INFO]  Epoch[0] Train-cross-entropy=0.239639
2020-06-19 01:04:02,562 [INFO]  Epoch[0] Train-foreign_worker-accuracy=0.948611
2020-06-19 01:04:02,567 [INFO]  Epoch[0] Time cost=1.306
2020-06-19 01:04:02,581 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:04:02,702 [INFO]  Epoch[0] Validation-cross-entropy=0.103928
2020-06-19 01:04:02,706 [INFO]  Epoch[0] Validation-foreign_worker-accuracy=0.975000
2020-06-19 01:04:03,420 [INFO]  Epoch[1] Batch [0-23]	Speed: 549.06 samples/sec	cross-entropy=0.170049	foreign_worker-accuracy=0.950521
2020-06-19 01:04:03,997 [INFO]  Epoch[1] Train-cross-entropy=0.147075
2020-06-19 01:04:04,002 [INFO]  Epoch[1] Train-foreign_worker-accuracy=0.961111
2020-06-19 01:04:04,007 [INFO]  Epoch[1] Time cost=1.296
2020-06-19 01:04:04,023 [INFO]  Sav

2020-06-19 01:04:23,585 [INFO]  No improvement detected for 5 epochs compared to 0.08504480309784412 last error obtained: 0.08781395442783832, stopping here
2020-06-19 01:04:23,589 [INFO]  
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return np.log(probas)


Fitting model for column: duration


2020-06-19 01:04:27,887 [INFO]  
2020-06-19 01:04:28,504 [INFO]  Epoch[0] Batch [0-23]	Speed: 646.49 samples/sec	cross-entropy=12.380113	duration-accuracy=0.000000
2020-06-19 01:04:29,030 [INFO]  Epoch[0] Train-cross-entropy=10.235032
2020-06-19 01:04:29,036 [INFO]  Epoch[0] Train-duration-accuracy=0.000000
2020-06-19 01:04:29,040 [INFO]  Epoch[0] Time cost=1.140
2020-06-19 01:04:29,053 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:04:29,164 [INFO]  Epoch[0] Validation-cross-entropy=8.206207
2020-06-19 01:04:29,169 [INFO]  Epoch[0] Validation-duration-accuracy=0.000000
2020-06-19 01:04:29,757 [INFO]  Epoch[1] Batch [0-23]	Speed: 658.98 samples/sec	cross-entropy=8.078433	duration-accuracy=0.000000
2020-06-19 01:04:30,323 [INFO]  Epoch[1] Train-cross-entropy=7.714536
2020-06-19 01:04:30,329 [INFO]  Epoch[1] Train-duration-accuracy=0.000000
2020-06-19 01:04:30,336 [INFO]  Epoch[1] Time cost=1.161
2020-06-19 01:04:30,356 [INFO]  Saved checkpoint to "imputer_mo

Fitting model for column: credit_amount


2020-06-19 01:04:42,816 [INFO]  
2020-06-19 01:04:43,567 [INFO]  Epoch[0] Batch [0-23]	Speed: 524.13 samples/sec	cross-entropy=11.448753	credit_amount-accuracy=0.000000
2020-06-19 01:04:44,260 [INFO]  Epoch[0] Train-cross-entropy=9.343541
2020-06-19 01:04:44,266 [INFO]  Epoch[0] Train-credit_amount-accuracy=0.000000
2020-06-19 01:04:44,272 [INFO]  Epoch[0] Time cost=1.442
2020-06-19 01:04:44,300 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:04:44,447 [INFO]  Epoch[0] Validation-cross-entropy=8.030649
2020-06-19 01:04:44,455 [INFO]  Epoch[0] Validation-credit_amount-accuracy=0.000000
2020-06-19 01:04:45,143 [INFO]  Epoch[1] Batch [0-23]	Speed: 582.91 samples/sec	cross-entropy=7.501861	credit_amount-accuracy=0.000000
2020-06-19 01:04:45,743 [INFO]  Epoch[1] Train-cross-entropy=6.751903
2020-06-19 01:04:45,747 [INFO]  Epoch[1] Train-credit_amount-accuracy=0.000000
2020-06-19 01:04:45,753 [INFO]  Epoch[1] Time cost=1.291
2020-06-19 01:04:45,777 [INFO]  Saved c

Fitting model for column: installment_commitment


2020-06-19 01:05:00,636 [INFO]  
2020-06-19 01:05:01,500 [INFO]  Epoch[0] Batch [0-23]	Speed: 530.89 samples/sec	cross-entropy=14.335889	installment_commitment-accuracy=0.000000
2020-06-19 01:05:02,008 [INFO]  Epoch[0] Train-cross-entropy=14.267057
2020-06-19 01:05:02,014 [INFO]  Epoch[0] Train-installment_commitment-accuracy=0.000000
2020-06-19 01:05:02,022 [INFO]  Epoch[0] Time cost=1.373
2020-06-19 01:05:02,050 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:05:02,169 [INFO]  Epoch[0] Validation-cross-entropy=12.553059
2020-06-19 01:05:02,174 [INFO]  Epoch[0] Validation-installment_commitment-accuracy=0.000000
2020-06-19 01:05:02,768 [INFO]  Epoch[1] Batch [0-23]	Speed: 651.32 samples/sec	cross-entropy=12.081643	installment_commitment-accuracy=0.000000
2020-06-19 01:05:03,283 [INFO]  Epoch[1] Train-cross-entropy=12.523088
2020-06-19 01:05:03,288 [INFO]  Epoch[1] Train-installment_commitment-accuracy=0.000000
2020-06-19 01:05:03,293 [INFO]  Epoch[1] Time c

2020-06-19 01:05:21,501 [INFO]  Epoch[13] Train-cross-entropy=10.247522
2020-06-19 01:05:21,508 [INFO]  Epoch[13] Train-installment_commitment-accuracy=0.000000
2020-06-19 01:05:21,515 [INFO]  Epoch[13] Time cost=1.285
2020-06-19 01:05:21,557 [INFO]  Saved checkpoint to "imputer_model\model-0013.params"
2020-06-19 01:05:21,736 [INFO]  Epoch[13] Validation-cross-entropy=10.587593
2020-06-19 01:05:21,744 [INFO]  Epoch[13] Validation-installment_commitment-accuracy=0.000000
2020-06-19 01:05:22,402 [INFO]  Epoch[14] Batch [0-23]	Speed: 603.38 samples/sec	cross-entropy=9.835303	installment_commitment-accuracy=0.000000
2020-06-19 01:05:22,953 [INFO]  Epoch[14] Train-cross-entropy=10.204435
2020-06-19 01:05:22,958 [INFO]  Epoch[14] Train-installment_commitment-accuracy=0.000000
2020-06-19 01:05:22,966 [INFO]  Epoch[14] Time cost=1.216
2020-06-19 01:05:22,981 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-19 01:05:23,120 [INFO]  Epoch[14] Validation-cross-entropy=10.5686

2020-06-19 01:05:42,090 [INFO]  Epoch[26] Time cost=1.549
2020-06-19 01:05:42,116 [INFO]  Saved checkpoint to "imputer_model\model-0026.params"
2020-06-19 01:05:42,325 [INFO]  Epoch[26] Validation-cross-entropy=10.451776
2020-06-19 01:05:42,331 [INFO]  Epoch[26] Validation-installment_commitment-accuracy=0.000000
2020-06-19 01:05:42,963 [INFO]  Epoch[27] Batch [0-23]	Speed: 623.28 samples/sec	cross-entropy=9.498618	installment_commitment-accuracy=0.000000
2020-06-19 01:05:43,800 [INFO]  Epoch[27] Train-cross-entropy=9.743312
2020-06-19 01:05:43,812 [INFO]  Epoch[27] Train-installment_commitment-accuracy=0.000000
2020-06-19 01:05:43,820 [INFO]  Epoch[27] Time cost=1.481
2020-06-19 01:05:43,862 [INFO]  Saved checkpoint to "imputer_model\model-0027.params"
2020-06-19 01:05:44,092 [INFO]  Epoch[27] Validation-cross-entropy=10.450372
2020-06-19 01:05:44,145 [INFO]  Epoch[27] Validation-installment_commitment-accuracy=0.000000
2020-06-19 01:05:44,820 [INFO]  Epoch[28] Batch [0-23]	Speed: 581

Fitting model for column: residence_since


2020-06-19 01:06:04,980 [INFO]  
2020-06-19 01:06:05,681 [INFO]  Epoch[0] Batch [0-23]	Speed: 571.18 samples/sec	cross-entropy=15.560597	residence_since-accuracy=0.000000
2020-06-19 01:06:06,189 [INFO]  Epoch[0] Train-cross-entropy=15.804039
2020-06-19 01:06:06,196 [INFO]  Epoch[0] Train-residence_since-accuracy=0.000000
2020-06-19 01:06:06,201 [INFO]  Epoch[0] Time cost=1.204
2020-06-19 01:06:06,218 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:06:06,332 [INFO]  Epoch[0] Validation-cross-entropy=13.719762
2020-06-19 01:06:06,337 [INFO]  Epoch[0] Validation-residence_since-accuracy=0.000000
2020-06-19 01:06:06,961 [INFO]  Epoch[1] Batch [0-23]	Speed: 627.79 samples/sec	cross-entropy=14.061304	residence_since-accuracy=0.000000
2020-06-19 01:06:07,476 [INFO]  Epoch[1] Train-cross-entropy=14.413785
2020-06-19 01:06:07,481 [INFO]  Epoch[1] Train-residence_since-accuracy=0.000000
2020-06-19 01:06:07,487 [INFO]  Epoch[1] Time cost=1.143
2020-06-19 01:06:07,502 [

Fitting model for column: age


2020-06-19 01:06:18,830 [INFO]  
2020-06-19 01:06:19,566 [INFO]  Epoch[0] Batch [0-23]	Speed: 552.74 samples/sec	cross-entropy=14.818392	age-accuracy=0.000000
2020-06-19 01:06:20,295 [INFO]  Epoch[0] Train-cross-entropy=14.438365
2020-06-19 01:06:20,300 [INFO]  Epoch[0] Train-age-accuracy=0.000000
2020-06-19 01:06:20,307 [INFO]  Epoch[0] Time cost=1.457
2020-06-19 01:06:20,326 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:06:20,443 [INFO]  Epoch[0] Validation-cross-entropy=18.757967
2020-06-19 01:06:20,449 [INFO]  Epoch[0] Validation-age-accuracy=0.000000
2020-06-19 01:06:21,232 [INFO]  Epoch[1] Batch [0-23]	Speed: 494.43 samples/sec	cross-entropy=13.290354	age-accuracy=0.000000
2020-06-19 01:06:21,952 [INFO]  Epoch[1] Train-cross-entropy=13.198677
2020-06-19 01:06:21,962 [INFO]  Epoch[1] Train-age-accuracy=0.000000
2020-06-19 01:06:21,967 [INFO]  Epoch[1] Time cost=1.511
2020-06-19 01:06:21,980 [INFO]  Saved checkpoint to "imputer_model\model-0001.params"

2020-06-19 01:06:42,421 [INFO]  Epoch[14] Time cost=1.377
2020-06-19 01:06:42,440 [INFO]  Saved checkpoint to "imputer_model\model-0014.params"
2020-06-19 01:06:42,556 [INFO]  Epoch[14] Validation-cross-entropy=15.225529
2020-06-19 01:06:42,561 [INFO]  Epoch[14] Validation-age-accuracy=0.000000
2020-06-19 01:06:43,280 [INFO]  Epoch[15] Batch [0-23]	Speed: 531.65 samples/sec	cross-entropy=10.293681	age-accuracy=0.000000
2020-06-19 01:06:43,838 [INFO]  Epoch[15] Train-cross-entropy=10.248815
2020-06-19 01:06:43,844 [INFO]  Epoch[15] Train-age-accuracy=0.000000
2020-06-19 01:06:43,851 [INFO]  Epoch[15] Time cost=1.284
2020-06-19 01:06:43,868 [INFO]  Saved checkpoint to "imputer_model\model-0015.params"
2020-06-19 01:06:43,973 [INFO]  Epoch[15] Validation-cross-entropy=15.155844
2020-06-19 01:06:43,979 [INFO]  Epoch[15] Validation-age-accuracy=0.000000
2020-06-19 01:06:44,715 [INFO]  Epoch[16] Batch [0-23]	Speed: 549.09 samples/sec	cross-entropy=10.224519	age-accuracy=0.000000
2020-06-19 0

Fitting model for column: existing_credits


2020-06-19 01:07:02,572 [INFO]  
2020-06-19 01:07:03,218 [INFO]  Epoch[0] Batch [0-23]	Speed: 608.37 samples/sec	cross-entropy=15.569044	existing_credits-accuracy=0.000000
2020-06-19 01:07:03,819 [INFO]  Epoch[0] Train-cross-entropy=15.555038
2020-06-19 01:07:03,826 [INFO]  Epoch[0] Train-existing_credits-accuracy=0.000000
2020-06-19 01:07:03,832 [INFO]  Epoch[0] Time cost=1.244
2020-06-19 01:07:03,847 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:07:03,960 [INFO]  Epoch[0] Validation-cross-entropy=12.383319
2020-06-19 01:07:03,966 [INFO]  Epoch[0] Validation-existing_credits-accuracy=0.000000
2020-06-19 01:07:04,759 [INFO]  Epoch[1] Batch [0-23]	Speed: 486.13 samples/sec	cross-entropy=13.275037	existing_credits-accuracy=0.000000
2020-06-19 01:07:05,341 [INFO]  Epoch[1] Train-cross-entropy=13.602152
2020-06-19 01:07:05,347 [INFO]  Epoch[1] Train-existing_credits-accuracy=0.000000
2020-06-19 01:07:05,355 [INFO]  Epoch[1] Time cost=1.379
2020-06-19 01:07:05,

Fitting model for column: num_dependents


2020-06-19 01:07:27,174 [INFO]  
2020-06-19 01:07:28,002 [INFO]  Epoch[0] Batch [0-23]	Speed: 475.64 samples/sec	cross-entropy=14.758187	num_dependents-accuracy=0.000000
2020-06-19 01:07:28,557 [INFO]  Epoch[0] Train-cross-entropy=15.566025
2020-06-19 01:07:28,562 [INFO]  Epoch[0] Train-num_dependents-accuracy=0.000000
2020-06-19 01:07:28,568 [INFO]  Epoch[0] Time cost=1.379
2020-06-19 01:07:28,583 [INFO]  Saved checkpoint to "imputer_model\model-0000.params"
2020-06-19 01:07:28,696 [INFO]  Epoch[0] Validation-cross-entropy=14.110274
2020-06-19 01:07:28,702 [INFO]  Epoch[0] Validation-num_dependents-accuracy=0.000000
2020-06-19 01:07:29,399 [INFO]  Epoch[1] Batch [0-23]	Speed: 553.20 samples/sec	cross-entropy=13.411580	num_dependents-accuracy=0.000000
2020-06-19 01:07:29,966 [INFO]  Epoch[1] Train-cross-entropy=14.406336
2020-06-19 01:07:29,971 [INFO]  Epoch[1] Train-num_dependents-accuracy=0.000000
2020-06-19 01:07:29,977 [INFO]  Epoch[1] Time cost=1.268
2020-06-19 01:07:29,991 [INFO]

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,own_telephone_imputed_proba,foreign_worker_imputed,foreign_worker_imputed_proba,duration_imputed,credit_amount_imputed,installment_commitment_imputed,residence_since_imputed,age_imputed,existing_credits_imputed,num_dependents_imputed
841,<100,21.0,delayed previously,own,2993.0,no checking,1<=X<4,3.0,male single,none,...,0.758034,yes,0.989650,23.517211,2817.887040,2.575374,2.668881,35.223627,1.589354,1.357592
956,no known savings,30.0,critical/other existing credit,own,3656.0,>=200,>=7,4.0,male single,none,...,0.585065,yes,0.999707,29.309393,2678.277674,3.381428,3.415969,39.451595,1.554431,1.329838
544,<100,12.0,critical/other existing credit,new car,1255.0,no checking,>=7,4.0,male single,none,...,0.683606,yes,0.997025,15.719845,160.939228,3.395884,3.837262,45.340511,1.964947,1.190203
173,0<=X<200,8.0,existing paid,radio/tv,1414.0,<100,1<=X<4,4.0,male single,guarantor,...,0.866257,yes,0.912659,18.468459,951.483456,2.505351,2.332071,34.029363,1.046940,1.227906
759,<100,12.0,critical/other existing credit,own,691.0,<0,>=7,4.0,male single,none,...,0.983981,yes,1.000000,15.648046,1298.983786,6.664571,3.138410,143.950246,-0.046298,1.442130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,<100,30.0,existing paid,own,11998.0,<0,<1,1.0,male div/sep,none,...,0.718092,yes,0.768896,38.831637,4860.883380,1.861352,3.178385,38.112424,0.937650,1.366352
192,<100,27.0,existing paid,own,3915.0,0<=X<200,1<=X<4,4.0,male single,none,...,0.635739,yes,0.997503,31.885491,2867.356168,2.415288,3.162871,30.940767,1.348355,1.157633
398,<100,12.0,existing paid,new car,1223.0,0<=X<200,>=7,1.0,male div/sep,none,...,0.735327,yes,0.970277,16.161895,842.911359,2.901005,3.180902,31.041716,1.127733,1.595912
450,no known savings,36.0,critical/other existing credit,own,11054.0,no checking,1<=X<4,4.0,male single,none,...,0.999273,yes,1.000000,55.089678,6455.938450,3.592754,2.884257,96.984016,0.771767,1.339269


In [20]:
test_data_dw_imputed.columns

Index(['checking_status', 'duration', 'credit_history', 'purpose',
       'credit_amount', 'savings_status', 'employment',
       'installment_commitment', 'personal_status', 'other_parties',
       'residence_since', 'property_magnitude', 'age', 'other_payment_plans',
       'housing', 'existing_credits', 'job', 'num_dependents', 'own_telephone',
       'foreign_worker', 'checking_status_imputed',
       'checking_status_imputed_proba', 'credit_history_imputed',
       'credit_history_imputed_proba', 'purpose_imputed',
       'purpose_imputed_proba', 'savings_status_imputed',
       'savings_status_imputed_proba', 'employment_imputed',
       'employment_imputed_proba', 'personal_status_imputed',
       'personal_status_imputed_proba', 'other_parties_imputed',
       'other_parties_imputed_proba', 'property_magnitude_imputed',
       'property_magnitude_imputed_proba', 'other_payment_plans_imputed',
       'other_payment_plans_imputed_proba', 'housing_imputed',
       'housing_imputed