In [None]:
%%capture
import os
import site
os.sys.path.insert(0, '/home/schirrmr/braindecode/code/')
os.sys.path.insert(0, '/home/schirrmr/braindecode/code/braindecode/')
%cd /home/schirrmr/
# switch to cpu
os.environ['THEANO_FLAGS'] = 'floatX=float32,device=cpu,nvcc.fastmath=True'

%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
from matplotlib import cm
%matplotlib inline
%config InlineBackend.figure_format = 'svg' 
matplotlib.rcParams['figure.figsize'] = (12.0, 3.0)
matplotlib.rcParams['font.size'] = 7

import matplotlib.lines as mlines
import seaborn
seaborn.set_style('darkgrid')
import logging
log = logging.getLogger()
log.setLevel('DEBUG')
import sys
logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s',
                     level=logging.DEBUG, stream=sys.stdout)
from hyperoptim.results import load_data_frame, remove_columns_with_same_value, dataset_averaged_frame
import pandas as pd

In [None]:
from scipy.stats.morestats import wilcoxon
import random

def to_numeric_where_possible(df):
    df = df.copy(deep=True)
    for col in df.columns:
        df.loc[:,col] = pd.to_numeric(df.loc[:,col], errors='ignore')
    return df

def round_numeric_columns(df, decimals):
    df = df.copy(deep=True)
    tmp = df.select_dtypes(include=[np.number], exclude=[np.timedelta64])
    df.loc[:, tmp.columns] = np.round(tmp, decimals)
    return df

def perm_mean_diffs_sampled(a, b, n_diffs=None):
    """Compute differences between all permutations of  labels.
    Version that samples.
    Parameters
    --------------
    a: list or numpy array
    b: list or numpy array
    n_diffs: int
        How many diffs/samples to compute.
    Returns
    -------
    all_diffs: 1d-array of float
        Sampled mean differences.
    """
    
    n_exps = len(a)
    all_bit_masks = [2 ** n for n in range(n_exps-1,-1,-1)]
    if n_diffs is None:
        n_diffs = 2**n_exps
        i_all_masks = range(n_diffs)
    else:
        random.seed(39483948)
        # take samples of all masks, always add identity mask
        i_all_masks = random.sample(range(0,2**n_exps-1), n_diffs - 1)
        i_all_masks = i_all_masks + [(2**n_exps)-1]
        # verification this is actually identity mask for code below:
        test_i_mask = i_all_masks[-1]
        test_mask = (np.bitwise_and(test_i_mask, all_bit_masks) > 0) * 2 - 1
        assert np.array_equal(a - b, (test_mask * a)  -test_mask * b)

        
    all_diffs = np.float32(np.ones(n_diffs) * np.nan)
    for i_diff, i_mask in enumerate(i_all_masks):
        # masks has -1s and 1s,
        # 1 interpretable as
        # correct value selected
        # -1 as randomly flipped value/"incorrect" value selected
        # *2 makes values between 2 and 0, then -1 to make 
        # values between 1 and -1
        mask = (np.bitwise_and(i_mask, all_bit_masks) > 0) * 2 - 1
        # mean later by dividing by n_exp
        # seems to be a little bit faster that way
        diff = np.sum((mask * a)  -mask * b)
        all_diffs[i_diff] = diff
    all_diffs = all_diffs / float(n_exps)
    return all_diffs

def perm_mean_diffs(a,b):
    """Compute differences between all permutations of  labels.
    Assumes a and b are paired values,
    a are values with label 0 and b with label 1.
    Computes mean differences for all possible   
    switches of 0 and 1 (but keeping pairs together, i.e.
    2 ^ len(a) switches).
    
    Parameters
    --------------
    a: list or numpy array
    b: list or numpy array
    
    Returns
    -------
    diffs: 1d-numpy array
        Differences between means of labelled values
        for all label-switched values.
        
    Notes
    -----
    http://www.stat.ncsu.edu/people/lu/courses/ST505/Ch4.pdf#page=10
    http://stats.stackexchange.com/a/64215/56289
    http://www.jarrodmillman.com/publications/millman2015thesis.pdf ->
    https://github.com/statlab/permute python package 
    (probably, didnt read: http://finzi.psych.upenn.edu/R/library/EnvStats/html/twoSamplePermutationTestLocation.html)
    """
    a = np.array(a)
    b = np.array(b)
    assert len(a) == len(b)
    n_exps = len(a)
    all_masks = _create_masks(n_exps)
    diffs = _compute_diffs(a, b, all_masks)
    return diffs

def perm_mean_diff_test(a,b, n_diffs=None):
    """Return two sided p-value of perm mean diff."""
    if n_diffs is None:
        diffs = perm_mean_diffs(a, b)
    else:
        diffs = perm_mean_diffs_sampled(a, b, n_diffs)
    
    actual_diff = np.mean(a - b)
    n_samples_as_large_diff = np.sum(np.abs(diffs) >= np.abs(actual_diff))
    #if n_diffs is not None:
    #    p_val = n_samples_as_large_diff + 1 /
    return n_samples_as_large_diff / float(len(diffs))


def _create_masks(n_exps):
    """ Create all (2^n_exps) binary selection masks for this number of experiments.
    E.g. for 3 experiments:
    False, False, False
    False, False, True
    False, True, False
    False, True, True
    True, False, False
    True, False, True
    True, True, False
    True, True, True""" 
    all_masks = np.array([[False] * n_exps] * (2 ** n_exps))
    i_block_size = all_masks.shape[0] // 2 
    for i_col in range(0,all_masks.shape[1]):
        for i_row in range(0,all_masks.shape[0], i_block_size * 2):
            all_masks[i_row:i_row+i_block_size,i_col] = [[True]] * i_block_size
        i_block_size //= 2
    return all_masks
    
def _compute_diffs(a, b, all_masks):
    # first add "first set" part
    # positive labels from a
    # and negative labels from b
    diffs = all_masks * a
    diffs += (1 - all_masks) * b
    # subtract "second set" part
    # negative labels from a
    # positive labels from b
    diffs -= (1 - all_masks) * a
    diffs -= all_masks * b
    return np.mean(diffs, axis=1)

def pairwise_compare_frame(df, with_p_vals=False, result_cols=('test', 'time', 'train',
        'test_sample', 'train_sample'), compare_col='test'):
    table_vals = []
    table_indices = []
    param_keys = set(df.keys()) - set(list(result_cols))
    for key in param_keys:
        if key == 'dataset_filename' or key == 'test_filename' or key == 'subject_id':
            continue
        possible_vals = df[key].unique()
        for i_value_a in range(0, len(possible_vals) - 1):
            for i_value_b in range(i_value_a + 1, len(possible_vals)):
                val_a = possible_vals[i_value_a]
                val_b = possible_vals[i_value_b]
                frame_1 = df[df[key] == val_a]
                frame_2 = df[df[key] == val_b]
                other_param_keys = list(param_keys - set([key]))
                joined_frame = frame_1.merge(frame_2, on=other_param_keys)
                if joined_frame.size == 0:
                    continue
                accuracies_a = np.array(joined_frame[compare_col + '_x'],
                    dtype=np.float64)
                accuracies_b = np.array(joined_frame[compare_col + '_y'],
                    dtype=np.float64)
                mean_a = np.mean(accuracies_a)
                mean_b = np.mean(accuracies_b)
                # Always put better value first in table
                if mean_a >= mean_b:
                    accuracies_1 = accuracies_a
                    accuracies_2 = accuracies_b
                    mean_1 = mean_a 
                    mean_2 = mean_b 
                    val_1 = val_a
                    val_2 = val_b
                else:
                    accuracies_1 = accuracies_b
                    accuracies_2 = accuracies_a
                    mean_1 = mean_b 
                    mean_2 = mean_a 
                    val_1 = val_b
                    val_2 = val_a
                if with_p_vals:
                    if len(accuracies_1) <= 18:
                        diff_perm = perm_mean_diff_test(accuracies_1,
                            accuracies_2) * 100
                    elif len(accuracies_1) <= 62:
                        diff_perm = perm_mean_diff_test(accuracies_1,
                            accuracies_2, n_diffs=2**17) * 100
                    else:
                        _, diff_perm = wilcoxon(accuracies_1,
                            accuracies_2)
                        diff_perm *= 100

                diffs = accuracies_2 - accuracies_1
                diff_std = np.std(diffs)
                diff_mean = np.mean(diffs)
                this_vals = [len(accuracies_1), str(val_1), str(val_2),
                    mean_1, mean_2, diff_mean, diff_std]
                if with_p_vals:
                    this_vals.append(diff_perm)
                table_vals.append(this_vals)
                table_indices.append(key)

    if len(table_vals) == 0:
        return None
    table_vals = np.array(table_vals)
    compare_headers = ['n_exp', 'val_1', 'val_2', 'acc_1', 'acc_2',
                       'diff', 'std']
    if with_p_vals:
        compare_headers.append('p_val')
    compare_frame = pd.DataFrame(table_vals, columns=compare_headers,  
                                 index=(table_indices))
    compare_frame = to_numeric_where_possible(compare_frame)
    compare_frame = round_numeric_columns(compare_frame, 1)
    return compare_frame

In [None]:
df = load_data_frame('data/models/pytorch/auto-diag/dirty-data/')
df = df[df.finished == 1]
df = df.fillna('-')
df = df.drop('seed', axis=1)
df = remove_columns_with_same_value(df)
df.runtime = pd.to_timedelta(np.round(df.runtime), unit='s')
df = df.drop('max_min_expected', axis=1)
df = df[df.max_epochs > 1]
df = remove_columns_with_same_value(df)
print(len(df))
df = df.rename(columns=dict(shrink_the_spikes='shrink',
                            channel_standardize='chan_std',
                            low_cut_hz='low_hz',
                            max_abs_val='max_val',
                            channel_demean='chan_mean',
                            exp_demean='e_mean',
                            exp_standardize='e_std',
                            max_min_expected='max_expd',
                            max_threshold='max_min_threshold',
                            high_cut_hz='high_hz',
                            moving_demean='m_mean',
                            moving_standardize='m_std')).drop(['train_loss', 'valid_loss',
                                                       'test_loss', 'valid_misclass',
                                                       ], axis=1)
df.test_misclass = df.test_misclass * 100
df.test_sample_misclass = df.test_sample_misclass * 100

df.sort_values(by='test_sample_misclass')

In [None]:
df_before_stop = df.copy()
for exp_nr in df.index:
    before_stop_df = np.load('data/models/pytorch/auto-diag/dirty-data/{:d}/before_stop_df.pkl'.format(
        exp_nr))
    final_vals = before_stop_df.iloc[-1]
    for key, val in final_vals.items():
        if key in df_before_stop.columns or key == 'valid_misclass':
            df_before_stop.ix[exp_nr, key] = val

df_before_stop.test_sample_misclass = (df_before_stop.test_sample_misclass + df_before_stop.valid_sample_misclass) / 2.0
df_before_stop.test_misclass = (df_before_stop.test_misclass + df_before_stop.valid_misclass) / 2.0
df_before_stop = df_before_stop.drop(['valid_misclass', 'valid_sample_misclass'], axis=1)

In [None]:
df_avg = dataset_averaged_frame(df_before_stop[df_before_stop.n_recordings == 500].drop('n_recordings', axis=1),
                                'i_test_fold',['runtime',
 'test_sample_misclass',
 'test_misclass',
 'train_sample_misclass',
 'train_misclass',]).sort_values(by=('test_sample_misclass', 'mean'))
df_avg

In [None]:
df_before_stop[(df_before_stop.batch_set_zero_test == True)
              & (df_before_stop.batch_set_zero_val == 500) &
              (df_before_stop.model_name == 'deep')]

In [None]:
df_before_stop[(df_before_stop.batch_set_zero_test == True)
              & (df_before_stop.batch_set_zero_val == 500) &
              (df_before_stop.model_name == 'shallow')]

In [None]:
np.mean(df_before_stop.test_sample_misclass)

In [None]:
np.mean(df_before_stop[df_before_stop.i_test_fold == 1].test_sample_misclass)

In [None]:
df_avg = dataset_averaged_frame(df_before_stop[(df_before_stop.n_recordings == 500)
                                              & (df_before_stop.model_name == 'deep')].drop('n_recordings', axis=1),
                                'i_test_fold',['runtime',
 'test_sample_misclass',
 'test_misclass',
 'train_sample_misclass',
 'train_misclass',]).sort_values(by=('test_sample_misclass', 'mean'))
df_avg

In [None]:
df_avg = dataset_averaged_frame(df_before_stop[(df_before_stop.n_recordings == 500)
                                              & (df_before_stop.model_name == 'shallow')].drop('n_recordings', axis=1),
                                'i_test_fold',['runtime',
 'test_sample_misclass',
 'test_misclass',
 'train_sample_misclass',
 'train_misclass',]).sort_values(by=('test_sample_misclass', 'mean'))
df_avg

In [None]:

df_avg = dataset_averaged_frame(df[df.n_recordings == 500].drop('n_recordings', axis=1),
                                'i_test_fold',result_cols).sort_values(by=('test_sample_misclass', 'mean'))
df_avg

In [None]:

df_avg = dataset_averaged_frame(df[df.n_recordings == 150].drop('n_recordings', axis=1),
                                'i_test_fold',result_cols).sort_values(by=('test_sample_misclass', 'mean'))
df_avg

In [None]:
df = load_data_frame('data/models/pytorch/auto-diag/preprocs-with-divide/')
df = df[df.finished == 1]
df = df.fillna('-')
df = df.drop('seed', axis=1)
df = remove_columns_with_same_value(df)
df.runtime = pd.to_timedelta(np.round(df.runtime), unit='s')
df = df.drop('max_min_expected', axis=1)
print(len(df))
df = df.rename(columns=dict(shrink_the_spikes='shrink',
                            channel_standardize='chan_std',
                            low_cut_hz='low_hz',
                            max_abs_val='max_val',
                            channel_demean='chan_mean',
                            exp_demean='e_mean',
                            exp_standardize='e_std',
                            max_min_expected='max_expd',
                            max_threshold='max_min_threshold',
                            high_cut_hz='high_hz',
                            moving_demean='m_mean',
                            moving_standardize='m_std')).drop(['train_loss', 'valid_loss',
                                                       'test_loss', 'valid_misclass',
                                                       ], axis=1)
df.test_misclass = df.test_misclass * 100
df.test_sample_misclass = df.test_sample_misclass * 100

df.sort_values(by='test_sample_misclass')

In [None]:
from hyperoptim.results import restrict

from hyperoptim.results import dataset_averaged_frame

df_avg = dataset_averaged_frame(df,'i_test_fold',result_cols).sort_values(by=('test_sample_misclass', 'mean'))
df_avg

In [None]:
from IPython.display import display
for nr in np.array(df.index):
    epoch_df  = np.load('data/models/pytorch/auto-diag/preprocs-with-divide/{:d}/before_stop_df.pkl'.format(
        nr))
    display(df.ix[nr])

    fig = plt.figure(figsize=(12,5))
    plt.plot(epoch_df.train_misclass * 100)
    plt.plot(epoch_df.valid_misclass * 100)
    plt.plot(epoch_df.test_misclass * 100)
    plt.title(nr)
    
    display(fig)
    plt.close(fig)
        

In [None]:
df.ix[[3,6,13]]

In [None]:
df.ix[[3,6,9,10,13,15]]

In [None]:
! ls data/models/pytorch/auto-diag/preprocs-more-data/ | wc -l

In [None]:
df = load_data_frame('data/models/pytorch/auto-diag/preprocs-more-data///')
df = df[df.finished == 1]
df = df.fillna('-')
df = df.drop('seed', axis=1)
df = remove_columns_with_same_value(df)
df.runtime = pd.to_timedelta(np.round(df.runtime), unit='s')
df = df.drop('max_min_expected', axis=1)
print(len(df))
df = df.rename(columns=dict(shrink_the_spikes='shrink',
                            channel_standardize='chan_std',
                            low_cut_hz='low_hz',
                            max_abs_val='max_val',
                            channel_demean='chan_mean',
                            exp_demean='e_mean',
                            exp_standardize='e_std',
                            max_min_expected='max_expd',
                            max_threshold='max_min_threshold',
                            high_cut_hz='high_hz',
                            moving_demean='m_mean',
                            moving_standardize='m_std')).drop(['train_loss', 'valid_loss',
                                                       'test_loss', 'valid_misclass',
                                                       ], axis=1)
df.test_misclass = df.test_misclass * 100
df.test_sample_misclass = df.test_sample_misclass * 100

df.sort_values(by='test_misclass')

In [None]:
from hyperoptim.results import restrict

from hyperoptim.results import dataset_averaged_frame

df_avg = dataset_averaged_frame(df,'i_test_fold',result_cols).sort_values(by=('test_sample_misclass', 'mean'))
df_avg

In [None]:
df_avg[df_avg[('runtime', 'len')] == 2]

In [None]:
dataset_averaged_frame(df[df.model_name == 'shallow'],
                       'i_test_fold',result_cols).sort_values(by=('test_sample_misclass', 'mean'))

In [None]:
dataset_averaged_frame(df[df.model_name == 'deep'],
                       'i_test_fold',result_cols).sort_values(by=('test_sample_misclass', 'mean'))

In [None]:
df_no_std = restrict(df, e_std=False, m_std=False, chan_std=False)
df_std = df[(df.e_std == True) | (df.m_std == True) |
            (df.chan_std == True)]


In [None]:
from numpy.random import RandomState
rng = RandomState(39847348)
accs =  (100 - df_std.test_sample_misclass)
plt.plot(rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_no_std.test_sample_misclass)
plt.plot(1 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
plt.xlim(-0.5,2)

In [None]:
df_no_mean = restrict(df, e_mean=False, m_mean=False, chan_mean=False)
df_mean = df[(df.e_mean == True) | (df.m_mean == True) |
            (df.chan_mean == True)]

rng = RandomState(39847348)
accs =  (100 - df_mean.test_sample_misclass)
plt.plot(rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_no_mean.test_sample_misclass)
plt.plot(1 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
plt.xlim(-0.5,2)

In [None]:
df_no_high = restrict(df, high_hz='-')
df_high = df[(df.high_hz == 45)]

rng = RandomState(39847348)
accs =  (100 - df_high.test_sample_misclass)
plt.plot(rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_no_high.test_sample_misclass)
plt.plot(1 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
plt.xlim(-0.5,2)

In [None]:
df_no = restrict(df, shrink=False)
df_yes = df[(df.shrink == True)]

rng = RandomState(39847348)
accs =  (100 - df_yes.test_sample_misclass)
plt.plot(rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_no.test_sample_misclass)
plt.plot(1 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
plt.xlim(-0.5,2)

In [None]:
df_no = restrict(df, max_min_threshold='-')
df_yes = df[(df.max_min_threshold == 600)]

rng = RandomState(39847348)
accs =  (100 - df_yes.test_sample_misclass)
plt.plot(rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_no.test_sample_misclass)
plt.plot(1 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
plt.xlim(-0.5,2)

In [None]:
df_no = restrict(df, max_val='-')
df_yes = df[(df.max_val == 800)]

rng = RandomState(39847348)
accs =  (100 - df_yes.test_sample_misclass)
plt.plot(rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_no.test_sample_misclass)
plt.plot(1 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
plt.xlim(-0.5,2)

In [None]:
df_shrink = df[(df.shrink == True) & (df.max_val == '-')]
df_max_min = df[(df.max_min_threshold == 600) & (df.max_val == '-')]
df_max_abs = df[(df.max_min_threshold == '-')& (df.max_val == 800) & (df.shrink == False)]

rng = RandomState(39847348)
accs =  (100 - df_shrink.test_sample_misclass)
plt.plot(rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_max_min.test_sample_misclass)
plt.plot(1 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_max_abs.test_sample_misclass)
plt.plot(2 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
plt.xlim(-0.5,3)

In [None]:
df_shrink = df_mean[(df_mean.shrink == True) & (df_mean.max_val == '-')]
df_max_min = df_mean[(df_mean.max_min_threshold == 600) & (df_mean.max_val == '-')]
df_max_abs = df_mean[(df_mean.max_min_threshold == '-')& (df_mean.max_val == 800) & (df_mean.shrink == False)]

rng = RandomState(39847348)
accs =  (100 - df_shrink.test_sample_misclass)
plt.plot(rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_max_min.test_sample_misclass)
plt.plot(1 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
accs =  (100 - df_max_abs.test_sample_misclass)
plt.plot(2 + rng.randn(len(accs)) * 0.1, accs, ls='', marker='o', alpha=0.5)
plt.xlim(-0.5,3)

In [None]:
result_cols = ['runtime', 'test_sample_misclass', 'train_misclass', 'valid_sample_misclass', 'test_misclass', 
               'train_sample_misclass']

pairwise_compare_frame(df,result_cols=result_cols,compare_col='test_misclass', with_p_vals=True).sort_values(by='p_val')

In [None]:
pairwise_compare_frame(df,result_cols=result_cols,compare_col='test_sample_misclass', with_p_vals=True).sort_values(by='p_val')

In [None]:
result_cols = ['runtime', 'test_sample_misclass', 'train_misclass', 'valid_sample_misclass', 'test_misclass', 
               'train_sample_misclass']

pairwise_compare_frame(df,result_cols=result_cols,compare_col='runtime', with_p_vals=True).sort_values(by='p_val')

In [None]:
len(df[df.test_sample_misclass < 18])

In [None]:
pairwise_compare_frame(df[df.test_sample_misclass < 18],
                       result_cols=result_cols,compare_col='test_sample_misclass', with_p_vals=True).sort_values(by='p_val')

In [None]:
df = load_data_frame('data/models/pytorch/auto-diag/preprocs//')
df = df[df.finished == 1]
df = df.fillna('-')
df = df.drop('seed', axis=1)
df = remove_columns_with_same_value(df)
df.runtime = pd.to_timedelta(np.round(df.runtime), unit='s')
df = df.drop('max_min_expected', axis=1)
print(len(df))
df = df.rename(columns=dict(shrink_the_spikes='shrink',
                      channel_standardize='chan_std',
                      low_cut_hz='low_hz',
                      max_abs_val='max_val',
                      channel_demean='chan_mean',
                      exp_demean='e_mean',
                      exp_standardize='e_std',
                      max_threshold='max_min_threshold',
                      high_cut_hz='high_hz',
                      moving_demean='m_mean',
                      moving_standardize='m_std')).drop(['train_loss', 'valid_loss',
                                                   'test_loss', 'valid_misclass',
                                                   ], axis=1)
df.test_misclass = df.test_misclass * 100
df.test_sample_misclass = df.test_sample_misclass * 100

df.sort_values(by='test_misclass')

In [None]:
df.sort_values(by='test_sample_misclass')

In [None]:
result_cols = ['runtime', 'test_sample_misclass', 'train_misclass', 'valid_sample_misclass', 'test_misclass', 
               'train_sample_misclass']

pairwise_compare_frame(df,result_cols=result_cols,compare_col='test_misclass', with_p_vals=True).sort_values(by='p_val')

In [None]:

pairwise_compare_frame(df,result_cols=result_cols,compare_col='test_sample_misclass', with_p_vals=True).sort_values(by='p_val')


In [None]:
df = load_data_frame('data/models/pytorch/auto-diag/first-try/')
df = df[df.finished == 1]
df = df.fillna('-')
df = df[df.max_epochs == 35]
df = remove_columns_with_same_value(df)
df.runtime = pd.to_timedelta(np.round(df.runtime), unit='s')

#df = df.sort_values(by='alpha')
#df = df[df.batch_norm == True]
df