In [None]:
!nvidia-smi

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
import pickle
import pandas as pd
import numpy as np
import warnings
from evaluation import *

print(tf.__version__)
warnings.filterwarnings("ignore")

gpus = tf.config.list_physical_devices(device_type='GPU')
tf.config.set_visible_devices(devices=gpus[0], device_type='GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
seed = 2021
os.environ['PYTHONHASHSEED']=str(seed)
tf.random.set_seed(seed)
np.random.seed(seed)

# Compute disparity

## 3D MRI disparity results

In [None]:
metrics = ['BCE', 'ECE', "Error rate", "Precision"]
color_list = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'black']
group = 'gender'
testdata = 'original'
group_type = {'race': [0, 1], 'gender': [0, 1], 'age': [0, 1]}
group_name = {'race': ['white', 'others'], 'gender': ['female', 'male'], 'age': ['young', 'old']}

results_list = ['results/3D_CNN_AD_CN_on_{testdata}_{group}_results'.format(testdata=testdata, group=group),
                'results/3D_CNN_AD_CN_balanced_on_{testdata}_{group}_results'.format(testdata=testdata, group=group),
                'results/3D_CNN_AD_CN_stratified_on_{testdata}_{group}_results'.format(testdata=testdata, group=group),
                'results/3D_CNN_AD_CN_Adv_on_{testdata}_{group}_results'.format(testdata=testdata, group=group),
                'results/3D_CNN_AD_CN_DistMatchMMD_on_{testdata}_{group}_results'.format(testdata=testdata, group=group),
                'results/3D_CNN_AD_CN_DistMatchMean_on_{testdata}_{group}_results'.format(testdata=testdata, group=group),
                'results/3D_CNN_AD_CN_FairALM_on_{testdata}_{group}_results'.format(testdata=testdata, group=group),
                'results/3D_CNN_AD_CN_proposed_on_{testdata}_{group}_results'.format(testdata=testdata, group=group)]

model_list = ['Baseline', 
              'Balanced', 'Stratified', 'Adversarial learning',
              'DistMatchMMD', 
              'DistMatchMean', 'FairALM', 
              'Proposed augmentation']

gap_result = 5
gap_between_metrics = 20
gap_metrics = gap_result*(len(results_list)-1) + gap_between_metrics

top = int(gap_metrics*(len(metrics)-1) + gap_result*(len(results_list)/2))

plt.figure(figsize=(9, 6), dpi = 400)
plt.title('{group} disparity on {testdata} data'.format(testdata=testdata, group=group.capitalize()), fontsize=10)
plt.yticks([(i*gap_metrics)+2.5 for i in range(len(metrics))][::-1], metrics, fontsize=12)
# plt.xticks(fontsize=9)

for i, metric in enumerate(metrics):
    
    for idx, result_name in enumerate(results_list):

        with open("{result_name}".format(result_name=result_name), "rb") as fp:   # Unpickling
            dfs = pickle.load(fp)

        all_disparity = []
        mean_scores = []
        for k in range(len(group_type[group])):
            dfs[k].replace([np.inf, -np.inf], np.nan, inplace=True)
            mean_scores.append(dfs[k][metric].mean(skipna=True))

        median = np.nanmedian(mean_scores)

        disparity = 0
        for k in range(len(group_type[group])):
            disparity += (np.abs(mean_scores[k]-median))

        all_disparity.append(disparity)
            
        all_mean_disparity = np.nanmean(all_disparity)
        std_dev = np.nanstd(all_disparity)
        std_error = std_dev / np.math.sqrt(1)
        ci =  2.262 * std_error
        all_lower = (all_mean_disparity - ci)
        all_upper = (all_mean_disparity + ci)
                    
        color = color_list[idx]

        plt.plot([all_upper, all_lower], [top-(i*gap_metrics+idx*gap_result), top-(i*gap_metrics+idx*gap_result)], color=color, markersize=1)
        plt.plot(all_mean_disparity, top-(i*gap_metrics+idx*gap_result), 'o', color=color, label=model_list[idx], markersize=2)
                    
    plt.plot([all_mean_disparity, all_mean_disparity], [top-(i*gap_metrics+idx*gap_result), top-(i*gap_metrics)], linestyle='--', color='black', linewidth = 0.5)

for i in range(len(metrics)-1):
    plt.axhline(top-((i+1)*gap_metrics)+int(gap_between_metrics/2), linestyle='--', color='black', linewidth = 0.3)

legend_without_duplicate_labels(plt)



In [None]:
metrics = ['AUC', 'BCE', 'ECE', 'Error rate', 'Precision']

gap_group = 10
gap_between_result = 30
gap_result = gap_group*(len(group_type[group])-1) + gap_between_result

top = int(gap_result*(len(results_list)-1) + gap_group*(len(group_type[group])/2))

for i, metric in enumerate(metrics):
    plt.figure(figsize=(9, 6), dpi = 400)
    plt.title(metrics[i], fontsize=18)
    plt.yticks([(i*gap_result) for i in range(len(results_list))][::-1], model_list, fontsize=12)
#     plt.xticks(fontsize=9)
    
    for idx, result_name in enumerate(results_list):

        with open("{result_name}".format(result_name=result_name), "rb") as fp:   # Unpickling
            dfs = pickle.load(fp)

        mean_score = []
        all_upper = []
        all_lower = []
        for k in range(len(group_type[group])):
            mean_scores = []
            dfs[k].replace([np.inf, -np.inf], np.nan, inplace=True)
            
            scores = dfs[k][metric]

            mean_score.append(np.nanmean(scores))
            std_dev = np.nanstd(scores)
            std_error = std_dev / np.math.sqrt(1)
            ci =  2.262 * std_error
            all_lower.append(np.nanmean(scores) - ci)
            all_upper.append(np.nanmean(scores) + ci)
        
        for k in range(len(group_type[group])):
                            
            color = color_list[k]

            plt.plot([all_upper[k], all_lower[k]], [top-(k*gap_group+idx*gap_result), top-(k*gap_group+idx*gap_result)], color=color)
            plt.plot(mean_score[k], top-(k*gap_group+idx*gap_result), 'o', color=color, label=group_name[group][k], markersize=3)
            

    for i in range(len(results_list)-1):
        plt.axhline((top-((i+1)*gap_result)+int(gap_between_result/2)), linestyle='--', color='k', linewidth = 0.3)
        
    legend_without_duplicate_labels(plt)

## Task transfer 

In [None]:
color_list = ['C0', 'C1']

results_list = [
                'results/task_transfer_3D_gender_original_results',
                'results/task_transfer_3D_gender_proposed_results',
                'results/task_transfer_3D_age_original_results',
                'results/task_transfer_3D_age_proposed_results']

model_list = ['Gender', 'Age']


top = 5

plt.figure(figsize=(5, 2), dpi = 400)
plt.title('Task transfer', fontsize=10)
plt.yticks([(i*4) for i in range(len(model_list))][::-1], model_list, fontsize=12)
plt.xticks(fontsize=9)


for k in range(len(model_list)):
                            
    with open(results_list[2*k], "rb") as fp:
        all_mean_score, all_lower, all_upper = pickle.load(fp)
        
    plt.plot([all_upper, all_lower], [top-2*k*2, top-2*k*2], color=color_list[0])
    plt.plot(all_mean_score, top-2*k*2, 'o', color=color_list[0], label='Original', markersize=3)
    
    with open(results_list[2*k+1], "rb") as fp:
        all_mean_score, all_lower, all_upper = pickle.load(fp)
        
    plt.plot([all_upper, all_lower], [top-(2*k+1)*2, top-(2*k+1)*2], color=color_list[1])
    plt.plot(all_mean_score, top-(2*k+1)*2, 'o', color=color_list[1], label='Proposed', markersize=3) 
    
legend_without_duplicate_labels(plt)