In [1]:
import time
import utils
from utils_na import inject_missing
import numpy as np

In [2]:
params = {
    'IRIS': {
        'n_batches': 10,
        'epochs': 50,
        'alpha': 1.,
    },
    'AUSTRALIAN': {
        'n_batches': 10,
        'epochs': 20,
        'alpha': 1.,
    },
    'WINE': {
        'n_batches': 10,
        'epochs': 50,
        'alpha': 1.,
    },
    'PIMA': {
        'n_batches': 20,
        'epochs': 50,
        'alpha': 1.,
    },
    'ABALONE': {
        'n_batches': 100,
        'epochs': 50,
        'alpha': .3,
    },
}

datasets = [
    'IRIS',
    'AUSTRALIAN',
    'WINE',
    'ABALONE',
    'PIMA',
]

clean_datasets = {
    'IRIS': 'IRIS',
    'AUSTRALIAN': 'STAT',
    'WINE': 'WINE',
    'ABALONE': 'ABAL',
    'PIMA': 'PIMA',
}

approaches = [
    'SINGLE',
    'MULTIPLE',
    'SINGLE-HOT-PATCH',
    'HOT-PATCH',
]

imputation_methods = [
    'MISSFOREST',
    'SOFTIMPUTE',
    'GAIN',
    'MIDA',
    'SINKHORN',
]

missing_mode = 'MNAR'
missing_rate = .15
n_runs = 10
n_imputations = 20

In [3]:
%%capture

imp_times = {
    dataset: {
        imputation_method: {
            approach: 0 for approach in approaches
        }
        for imputation_method in imputation_methods
    }
    for dataset in datasets
}

running_times = {
    dataset: {
        imputation_method: {
            approach: 0 for approach in approaches
        }
        for imputation_method in imputation_methods
    }
    for dataset in datasets
}

for dataset in datasets:
    # Load dataset
    x, y = utils.load_ds(dataset, shuffle_seed=0)
    # Inject missing values given missing mode and rate
    data_missing, missing_mask = inject_missing(x, missing_rate, missing_mode, random_seed=0)
    data_missing_nans = np.where(missing_mask, data_missing, np.nan)
    # Execute 1 imputation with each imputation method without saving results just to get runtime required
    for imputation_method in imputation_methods:
        start = time.time()
        _ = utils.run_imputations(data_missing_nans, missing_mask, data_missing, imputation_method, 1)
        total_time = time.time() - start
        for approach in approaches:
            imp_times[dataset][imputation_method][approach] = total_time * (1 if approach == 'SINGLE' else n_imputations)
    # get n imputations results
    imp_name = f'{dataset}_{missing_mode}_{f"{missing_rate:.02f}".split(".")[-1]}_m{imputation_methods[0]}'
    imputations = utils.run_imputations(data_missing_nans, missing_mask, data_missing, imputation_methods[0],
                                        n_imputations, imp_name)
    n_batches = params[dataset]['n_batches']
    epochs = params[dataset]['epochs']
    alpha = params[dataset]['alpha']
    for approach in approaches:
        approach_times = []
        np.random.seed(0)
        for run in range(n_runs):
            start = time.time()
            y_probas, y_test = utils.run_approach(approach, imputations, y, run, n_runs, epochs, n_batches,
                                                  [32, 32], alpha)
            approach_times.append(time.time() - start)
        for imputation_method in imputation_methods:
            running_times[dataset][imputation_method][approach] = np.mean(approach_times)

In [9]:
for dataset in datasets:
    line = f'\\multirow{{5}}{{*}}{{{clean_datasets[dataset]}}} '
    for i, imputation_method in enumerate(imputation_methods):
        line += f'& {imputation_method[:4]}'
        for approach in approaches:
            line += f' & ${imp_times[dataset][imputation_method][approach]:.02f}$'
            if i == 0:
                line += f' & \\multirow{{5}}{{*}}{{${running_times[dataset][imputation_method][approach]:.02f}$}}'
            else:
                line += ' &'
        line += ' \\\\'
        print(line)
        line = ''
    print('\hline')

\multirow{5}{*}{IRIS} & MISS & $10.61$ & \multirow{5}{*}{$0.62$} & $212.30$ & \multirow{5}{*}{$12.13$} & $212.30$ & \multirow{5}{*}{$0.60$} & $212.30$ & \multirow{5}{*}{$12.60$} \\
& SOFT & $0.02$ & & $0.34$ & & $0.34$ & & $0.34$ & \\
& GAIN & $17.76$ & & $355.12$ & & $355.12$ & & $355.12$ & \\
& MIDA & $21.02$ & & $420.40$ & & $420.40$ & & $420.40$ & \\
& SINK & $43.27$ & & $865.39$ & & $865.39$ & & $865.39$ & \\
\hline
\multirow{5}{*}{STAT} & MISS & $41.09$ & \multirow{5}{*}{$0.29$} & $821.84$ & \multirow{5}{*}{$4.82$} & $821.84$ & \multirow{5}{*}{$0.31$} & $821.84$ & \multirow{5}{*}{$5.56$} \\
& SOFT & $0.01$ & & $0.14$ & & $0.14$ & & $0.14$ & \\
& GAIN & $25.58$ & & $511.67$ & & $511.67$ & & $511.67$ & \\
& MIDA & $16.71$ & & $334.11$ & & $334.11$ & & $334.11$ & \\
& SINK & $46.62$ & & $932.40$ & & $932.40$ & & $932.40$ & \\
\hline
\multirow{5}{*}{WINE} & MISS & $35.74$ & \multirow{5}{*}{$0.58$} & $714.80$ & \multirow{5}{*}{$12.30$} & $714.80$ & \multirow{5}{*}{$0.60$} & $714.80$ &