In [96]:
import numpy as np
import os 
import cv2
import random
import sys 
import csv
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd
import bz2
from collections import defaultdict
from bz2 import BZ2File
from datetime import datetime
from PIL import Image
from sklearn.metrics import roc_curve, accuracy_score
from sklearn.metrics import auc, confusion_matrix
import sys
from scikit_posthocs import posthoc_ttest
sys.path.append('.')
from src.modules import *

In [128]:
def statistics(results_all, bs_num=50):
    
    np.random.seed(10)
    
    num_sample = len(results_all[0][0])
    
    class0idx = np.where(results_all[0][0]==0)[0]
    class1idx = np.where(results_all[0][0]==1)[0]
    
    perf_models = {}
    for j in range(len(results_all)):
        perf_models[f'perf_bs{j}'] = {'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]}
    
    for k in range(bs_num):
        idx0 = np.random.choice(class0idx, len(class0idx))
        idx1 = np.random.choice(class1idx, len(class1idx))
        idx = list(idx0) + list(idx1)
        for j in range(len(results_all)):
            _gt = results_all[j][0][idx]
            _pred = results_all[j][1][idx]
            _attr = results_all[j][2][:, idx]

            val_es_acc, val_es_auc, val_aucs_by_attrs, val_dpds, val_eods, val_between_group_disparity = evalute_comprehensive_perf(_pred,
                                                                                                                                    _gt,
                                                                                                                                    _attr)
            for i in range(7):
                perfi = [val_es_acc[i], val_es_auc[i]]
                perfi.extend(val_aucs_by_attrs[i])
                perfi.append(val_dpds[i])
                perfi.append(val_eods[i])
                perfi.extend(val_between_group_disparity[i])
#                 perf_bs_proposed[f'attr{i}'].append(perfi)

                perf_models[f'perf_bs{j}'][f'attr{i}'].append(perfi)
    
    attr_pvalues = {'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]}
    
    for i in range(7):
        num_metrics = len(perf_models['perf_bs0'][f'attr{i}'][0])
        for k in range(num_metrics):
            vpaired = []
            for j in range(len(perf_models.keys())):
                vpaired.append(np.array(perf_models[f'perf_bs{j}'][f'attr{i}'])[:, k])
            
            vpaired = np.array(vpaired)
            vpaired = vpaired[:, ~np.isnan(vpaired).any(axis=0)]
            
            pvalues = posthoc_ttest(vpaired)
            
            attr_pvalues[f'attr{i}'].append(pvalues)
            
    return attr_pvalues

In [130]:
modality = 'oct_bscans' #'slo_fundus or oct_bscans'
folder = '/data/home/shim/pyspace/others/pyspace/ICLR_30k/Harvard-DR30k_results/results'


modality_folder = f'{folder}/dr_{modality}_race'

attrmaps = {'attr0':'Race', 'attr1':'Gender', 'attr2':'Ethnicity', 'attr3':'Maritalstatus', 'attr4':'Language', 
            'attr5':'Gender+Race', 'attr6':'Gender+Ethnicity'}

bestresults = []
epochresults = []
for f in os.listdir(modality_folder):
    if f.endswith('.csv'):
        paths = pd.read_csv(os.path.join(modality_folder, f))['  path']
        bestresults.append([f, paths[0].split('/')[-1].split('auc')[0]])
    else:
        epochresults.append(f)
        
pairedresults = {}
for i, v in enumerate(bestresults):
    for p in epochresults:
        if v[1] in p:
            pairedresults[i] = [v[0], p]

best_results_all = {'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]}
column_names = {'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]}
rownames = []
pred_results_all = []

num_attrs = 7

for k in [0, 2, 6, 8, 4, 3, 9, 1, 7, 5]:
    v = pairedresults[k]
    print(v[0])
    resultpd = pd.read_csv(os.path.join(modality_folder, v[0]))
    results = {}
    attrgroupnums = {}
    for name, data in resultpd.items():
        results[name.strip()] = data.values[-1]
        if 'group' in name:
            attri = name.split('_')[1]
            if attri not in attrgroupnums.keys():
                attrgroupnums[attri] = 1
            else:
                attrgroupnums[attri] += 1
    modelname = v[0].split('_')[1]
    if 'oversample' in v[0]:
        modelname = modelname + '_oversample'
    
    rownames.append(modelname)
    
    for i in range(num_attrs):
        vs = [results[f'esacc_attr{i}'], results['acc'], results[f'esauc_attr{i}'], results['auc']]
        vs = vs + [results[f'auc_attr{i}_group{j}'] for j in range(attrgroupnums[f'attr{i}'])]
        vs = vs + [results[f'dpd_attr{i}'], results[f'eod_attr{i}'], 
                   results[f'std_group_disparity_attr{i}'],
                   results[f'max_group_disparity_attr{i}']]
        best_results_all[f'attr{i}'].append(vs)
        
        vsname = ['esacc', 'acc', 'esauc', 'auc']
        vsname = vsname + [f'auc_group{j}' for j in range(attrgroupnums[f'attr{i}'])]
        vsname = vsname + ['dpd', 'eod', 'std_group_disparity', 'max_group_disparity']
        column_names[f'attr{i}'] = vsname

    npzdata = np.load(os.path.join(modality_folder, v[1], 'pred_gt_best_epoch.npz'))
    test_gt = npzdata['test_gt']
    test_pred = npzdata['test_pred']
    test_attr = npzdata['test_attr']
    pred_results_all.append([test_gt, test_pred, test_attr])


# append adversarial outcome
for modelname in outcome_adv.keys():
    modelname_subfix = f'{modelname}_adv'
    rownames.append(modelname_subfix)
    
    for attri in outcome_adv[modelname]:
        best_results_all[attri].append(attri[0])

    
# write best outcome    
writer = pd.ExcelWriter(f"{folder}/dr_{modality}_race_combined_outcome.xlsx", engine="xlsxwriter")

for k in attrmaps.keys():
    attridx = int(k[-1])
    pdattri = pd.DataFrame(best_results_all[f'attr{attridx}'], 
                 index=rownames, 
                 columns=column_names[f'attr{attridx}'])
    pdattri.to_excel(writer, sheet_name=attrmaps[k])

    
attr_pvalues = statistics(pred_results_all, )

for i in range(num_attrs):
    rowidx = 0
    vsname = ['esacc', 'acc', 'esauc', 'auc']
    vsname = vsname + [f'auc_group{j}' for j in range(len(attr_pvalues[f'attr{i}']))]
    vsname = vsname + ['dpd', 'eod', 'std_group_disparity', 'max_group_disparity']
    
    for idx, pvalues in enumerate(attr_pvalues[f'attr{i}']):
        
        rownames_prefix = [str(v)+'@'+vsname[idx] for v in rownames]
        
        pvdf = pd.DataFrame(np.array(pvalues), index=rownames_prefix, columns=rownames_prefix)
        
        pvdf.to_excel(writer, sheet_name=attrmaps[f'attr{i}']+'_statistics', startrow=rowidx)
        rowidx += len(pvalues) + 2
    
writer.close()
# pairedresults

best_swin_oct_bscans_race_oversample.csv
best_vit_oct_bscans_race.csv
best_resnet_oct_bscans_race_oversample.csv
best_convnext_oct_bscans_race.csv
best_efficientnet_oct_bscans_race.csv
best_efficientnet_oct_bscans_race_oversample.csv
best_swin_oct_bscans_race.csv
best_vit_oct_bscans_race_oversample.csv
best_resnet_oct_bscans_race.csv
best_convnext_oct_bscans_race_oversample.csv








  vs[i, j] = ss.ttest_ind(xg.get_group(groups[i]), xg.get_group(


In [178]:
attridx = 0
pd.DataFrame(best_results_all[f'attr{attridx}'], index=rownames, columns=column_names[f'attr{attridx}'])

Unnamed: 0,esacc,acc,esauc,auc,auc_group0,auc_group1,auc_group2,dpd,eod,std_group_disparity,max_group_disparity
swin,0.8474,0.9137,0.6889,0.7743,0.7431,0.6924,0.7852,0.0275,0.1902,0.049,0.1198
convnext,0.8528,0.9067,0.6484,0.7626,0.6745,0.691,0.7791,0.0059,0.127,0.0602,0.1372
resnet,0.8604,0.917,0.6464,0.7902,0.6601,0.715,0.8074,0.0048,0.0914,0.0769,0.1863
vit,0.845,0.9073,0.6459,0.7582,0.6737,0.6848,0.7741,0.0253,0.1653,0.0593,0.1324
efficientnet,0.8603,0.9183,0.6461,0.7904,0.6656,0.7108,0.8093,0.0033,0.0683,0.0759,0.1818
swin_oversample,0.823,0.9023,0.6635,0.7837,0.6933,0.7148,0.8056,0.046,0.078,0.0621,0.1434
convnext_oversample,0.8561,0.9083,0.7046,0.781,0.7592,0.7112,0.7979,0.0237,0.0813,0.0454,0.1109
resnet_oversample,0.8346,0.9043,0.6615,0.7864,0.71,0.6967,0.809,0.0568,0.2762,0.0637,0.1429
vit_oversample,0.8359,0.905,0.6354,0.7729,0.6702,0.686,0.7998,0.0431,0.1848,0.0747,0.1676
efficientnet_oversample,0.8135,0.8947,0.6864,0.7903,0.6967,0.747,0.8048,0.0331,0.3085,0.0559,0.1367


In [None]:
# transform adversarial results

In [146]:
folder = '/data/home/shim/pyspace/others/pyspace/ICLR_30k/Harvard-DR30k_results/results_adv'

attrs = ['race', 'gender', 'ethnicity', 'maritalstatus', 'language', 'gender_race', 'gender_ethnicity']
attrmaps = {'attr0':'Race', 'attr1':'Gender', 'attr2':'Ethnicity', 'attr3':'Maritalstatus', 'attr4':'Language', 
                'attr5':'Gender+Race', 'attr6':'Gender+Ethnicity'}


outcome_adv = {'efficientnet':{'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]},
              'vit':{'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]},
              'resnet':{'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]},
              'swin':{'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]},
              'convnext':{'attr0':[], 'attr1':[], 'attr2':[], 'attr3':[], 'attr4':[], 'attr5':[], 'attr6':[]}}

for idx, attr in enumerate(attrs):
    modality = f'dr_slo_fundus_{attr}'

    modality_folder = f'{folder}/{modality}'

    bestresults = []
    epochresults = []
    for f in os.listdir(modality_folder):
        if f.endswith('.csv'):
            paths = pd.read_csv(os.path.join(modality_folder, f))['  path']
            bestresults.append([f, paths[0].split('/')[-1].split('auc')[0]])
        else:
            epochresults.append(f)
    pairedresults = {}
    for i, v in enumerate(bestresults):
        for p in epochresults:
            if v[1] in p:
                pairedresults[i] = [v[0], p]
    
    for k in pairedresults.keys():
        v = pairedresults[k]
    
        resultpd = pd.read_csv(os.path.join(modality_folder, v[0]))
        results = {}
        attrgroupnums = {}
        for name, data in resultpd.items():
            results[name.strip()] = data.values[-1]
            if 'group' in name:
                attri = name.split('_')[1]
                if attri not in attrgroupnums.keys():
                    attrgroupnums[attri] = 1
                else:
                    attrgroupnums[attri] += 1
        modelname = v[0].split('_')[1]
        
        
        vs = [results[f'esacc_attr{idx}'], results['acc'], results[f'esauc_attr{idx}'], results['auc']]
        vs = vs + [results[f'auc_attr{idx}_group{j}'] for j in range(attrgroupnums[f'attr{idx}'])]
        vs = vs + [results[f'dpd_attr{idx}'], results[f'eod_attr{idx}'], 
                   results[f'std_group_disparity_attr{idx}'],
                   results[f'max_group_disparity_attr{idx}']]
        
        npzdata = np.load(os.path.join(modality_folder, v[1], 'pred_gt_best_epoch.npz'))
        test_gt = npzdata['test_gt']
        test_pred = npzdata['test_pred']
        test_attr = npzdata['test_attr']
        vs_epoch = [test_gt, test_pred, test_attr]
       
        outcome_adv[modelname][f'attr{idx}'] = (vs, vs_epoch)
    

In [150]:
outcome_adv['efficientnet']['attr0']

([0.8738,
  0.923,
  0.7259,
  0.8084,
  0.7658,
  0.7499,
  0.821,
  0.0113,
  0.1989,
  0.0377,
  0.0879],
 [array([0., 0., 0., ..., 0., 1., 0.], dtype=float32),
  array([0.02058536, 0.01218961, 0.01847378, ..., 0.00180938, 0.10188638,
         0.00496462], dtype=float32),
  array([[2, 2, 2, ..., 2, 2, 2],
         [1, 1, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 1],
         ...,
         [0, 0, 0, ..., 0, 0, 1],
         [5, 5, 4, ..., 4, 4, 4],
         [1, 1, 0, ..., 0, 0, 2]])])

In [140]:
pairedresults

{0: ['/data/home/shim/pyspace/others/pyspace/ICLR_30k/Harvard-DR30k_results/results_adv/dr_slo_fundus_gender_ethnicity/best_swin_slo_fundus_gender_ethnicity_adv.csv',
  '/data/home/shim/pyspace/others/pyspace/ICLR_30k/Harvard-DR30k_results/results_adv/dr_slo_fundus_gender_ethnicity/swin_slo_fundus_lr1e-4_bz10_adv_seed13_auc0.7877'],
 1: ['/data/home/shim/pyspace/others/pyspace/ICLR_30k/Harvard-DR30k_results/results_adv/dr_slo_fundus_gender_ethnicity/best_vit_slo_fundus_gender_ethnicity_adv.csv',
  '/data/home/shim/pyspace/others/pyspace/ICLR_30k/Harvard-DR30k_results/results_adv/dr_slo_fundus_gender_ethnicity/vit_slo_fundus_lr1e-4_bz10_adv_seed13_auc0.7726'],
 2: ['/data/home/shim/pyspace/others/pyspace/ICLR_30k/Harvard-DR30k_results/results_adv/dr_slo_fundus_gender_ethnicity/best_convnext_slo_fundus_gender_ethnicity_adv.csv',
  '/data/home/shim/pyspace/others/pyspace/ICLR_30k/Harvard-DR30k_results/results_adv/dr_slo_fundus_gender_ethnicity/convnext_slo_fundus_lr1e-4_bz10_adv_seed13_au