In [1]:
import os
import pickle
import numpy as np

In [2]:
datasets = [
    'IRIS',
    'AUSTRALIAN',
    'WINE',
    'PIMA',
    'ABALONE',
]

imputation_methods = [
    'MISSFOREST',
    'SOFTIMPUTE',
    'GAIN',
    'MIDA',
    'SINKHORN',
]
missing_modes = ['MCAR', 'MAR', 'MNAR']
missing_rates = [.1, .15, .25]

n_imputations = 20

In [3]:
for dataset in datasets:
    print('\n## ' + '-' * len(f'Dataset: {dataset}') + ' ##')
    print(f'## Dataset: {dataset} ##\n')
    for imputation_method in imputation_methods:
        incertitudes = []
        for missing_mode in missing_modes:
            for missing_rate in missing_rates:
                imp_name = f'{dataset}_{missing_mode}_{f"{missing_rate:.02f}".split(".")[-1]}_m{imputation_method}'
                if os.path.isfile(f'imputations/{imp_name}.pickle'):
                    imputations = pickle.load(open(f'imputations/{imp_name}.pickle', 'rb'))[:n_imputations]
                    stds = np.std(imputations, axis=0)
                    incertitudes.append(np.true_divide(stds.sum(),(stds!=0).sum()) * 100.)
        print(f'{imputation_method}: {" " * (12 - (len(imputation_method)))}incertitude: {np.mean(incertitudes):.04f}%')
print()


## ------------- ##
## Dataset: IRIS ##

MISSFOREST:   incertitude: 1.1965%
SOFTIMPUTE:   incertitude: 10.8808%
GAIN:         incertitude: 1.6804%
MIDA:         incertitude: 1.1673%
SINKHORN:     incertitude: 1.1053%

## ------------------- ##
## Dataset: AUSTRALIAN ##

MISSFOREST:   incertitude: 2.8167%
SOFTIMPUTE:   incertitude: 9.9198%
GAIN:         incertitude: 9.3976%
MIDA:         incertitude: 1.0666%
SINKHORN:     incertitude: 4.3111%

## ------------- ##
## Dataset: WINE ##

MISSFOREST:   incertitude: 1.6140%
SOFTIMPUTE:   incertitude: 7.5060%
GAIN:         incertitude: 1.3125%
MIDA:         incertitude: 0.9283%
SINKHORN:     incertitude: 0.7496%

## ------------- ##
## Dataset: PIMA ##

MISSFOREST:   incertitude: 1.8177%
SOFTIMPUTE:   incertitude: 7.7407%
GAIN:         incertitude: 1.3669%
MIDA:         incertitude: 0.6446%
SINKHORN:     incertitude: 1.2742%

## ---------------- ##
## Dataset: ABALONE ##

MISSFOREST:   incertitude: 0.9456%
SOFTIMPUTE:   incertitude: 5.3656%
G