In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
model_names = ['alexnet', 'densenet', 'inception','resnet18', 'resnet50', 'squeezenet', 'vgg']
alphas = ['0', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '1.0']

In [3]:
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

def highlight_min(s):
    '''
    highlight the minimum in a Series yellow.
    '''
    is_min = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_min]

In [4]:
#calculate the results of model selecton
def calcStatFold(file_name, fold):
    #correlations between selection scores and test accuracy
    stats = pd.DataFrame()
    #test accuracy of model selected with selection scores
    best = pd.DataFrame()
    #test explanation quality of model selected with selection scores
    best_expl = pd.DataFrame()
    for model_name in  model_names:
        ts = pd.Series(dtype = 'float64', index = alphas)
        bs = pd.Series(dtype = 'float64', index = alphas)
        es = pd.Series(dtype = 'float64', index = alphas)
        df = pd.read_csv(file_name+model_name+'_guidedBackProp_'+str(fold)+'_0.3_0.33.csv')
        for alpha in alphas:
            ts[alpha] = df['test_acc'].corr(df['val_acc'] * float(alpha) + df['val_auc'] * (1-float(alpha)))
            temp = df['val_acc'] * float(alpha) + df['val_auc'] * (1-float(alpha))
            bs[alpha] = df['test_acc'].get(temp.idxmax())
            es[alpha] = df['test_auc'].get(temp.idxmax())
        stats[model_name] = ts
        best[model_name] = bs
        best_expl[model_name] = es
    return stats, best, best_expl

In [12]:
running_stats = None
running_best = None
running_best_expl = None
num_folds = 10
for fold in range(num_folds):
    stats, best, best_es = calcStatFold('stats\\', fold)
    if fold==0:
        running_stats = stats
        running_best = best
        running_best_expl = best_es
    else:
        running_stats += stats
        running_best += best
        running_best_expl += best_es

In [None]:
(running_stats/num_folds).style.apply(highlight_max)

In [None]:
(running_best/num_folds).style.apply(highlight_max)

In [None]:
(running_best_expl/num_folds).style.apply(highlight_max)

In [9]:
from sklearn.model_selection import train_test_split as train_test_split

In [99]:
#analyze the availability of expert explanation
def calcStatFixedTrainFold(file_name,model_name,train_ratio, val_ratio, fold):
    stats = pd.DataFrame()
    best = pd.DataFrame()
    df = pd.read_csv(file_name+model_name+'_guidedBackProp_'+str(fold)+'_'+str(train_ratio)+'_'+str(val_ratio)+'.csv')
    for expl_ratio in  [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]:
        ts = pd.Series(dtype = 'float64')
        bs = pd.Series(dtype = 'float64')
        for alpha in alphas:
            ts['alpha='+alpha] = df['test_acc'].corr(df['val_acc'] * float(alpha) + df['val_auc_'+str(expl_ratio)] * (1-float(alpha)))
            temp = df['val_acc'] * float(alpha) + df['val_auc_'+str(expl_ratio)] * (1-float(alpha))
            bs['alpha='+alpha] = df['test_acc'].get(temp.idxmax())
        stats['ratio='+str(expl_ratio)] = ts
        best['ratio='+str(expl_ratio)] = bs
    return stats, best

In [126]:
running_stats = None
running_best = None
num_folds = 10
for fold in range(num_folds):
    stats, best = calcStatFixedTrainFold('stats\\', 'densenet',0.3, 0.33, fold)
    if fold==0:
        running_stats = stats
        running_best = best
    else:
        running_stats += stats
        running_best += best

In [None]:
(running_best/num_folds).style.apply(highlight_max)