In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn import metrics
import tqdm

In [2]:
csv_path = "/users/riya/race/classifier_experiments/CNN_train/outputs/probabilities/"
race_data_path = "/users/riya/race/csv/image_race_data.csv"

In [3]:
race_data = pd.read_csv(race_data_path)

In [4]:
race_data.head()

Unnamed: 0,subject_id,race,variable,value,image_id,fundus_location,segmentation_location
0,BEAU-0002,black,posterior,https://tctc.ohsu.edu/uploads/30121.bmp,30121,30121.png,30121.bmp
1,BEAU-0003,white,posterior,https://tctc.ohsu.edu/uploads/30672.bmp,30672,30672.png,30672.bmp
2,BEAU-0003,white,posterior,https://tctc.ohsu.edu/uploads/30676.bmp,30676,30676.png,30676.bmp
3,BEAU-0004,white,posterior,https://tctc.ohsu.edu/uploads/30630.bmp,30630,30630.png,30630.bmp
4,BEAU-0006,white,posterior,https://tctc.ohsu.edu/uploads/30696.bmp,30696,30696.png,30696.bmp


In [5]:
def get_acc(true_values, probabilities):
   
    true_values = true_values.reset_index(drop=True)
    probabilities = probabilities.reset_index(drop=True)

    assert len(true_values) == len(probabilities)
    num_elements = len(true_values)
    same_elements = 0
    
    for i in range(num_elements):
        rounded_prob = round(probabilities[i])
        if true_values[i] == rounded_prob:
            same_elements += 1
    
    acc = same_elements/num_elements
    acc = 1 - acc
            
    acc = np.round(acc,4) # 1 - just to get the percent misclassified
    
    return acc

In [6]:
def patient_level_auc(race_data, csv_paths, 
                     base_path = "/users/riya/race/classifier_experiments/CNN_train/outputs/probabilities/"):
    
    prob_csv_arr = []

    for path in csv_paths:
        img_id_arr = []
        subject_id_arr = []
        race_arr = []

        prob_csv = pd.read_csv(base_path + path)

        for i in range(len(prob_csv)):
            img_id = int(re.findall(r'\d+', prob_csv.iloc[i]['img_loc'])[0])
            img_id_arr.append(img_id)

            subject_id = race_data.loc[race_data['image_id'] == img_id, ['subject_id']]
            subject_id = np.array(subject_id)[0][0]
            
            subject_id_arr.append(subject_id)
            
            race = race_data.loc[race_data['image_id'] == img_id, ['race']]
            race = np.array(race)[0][0]

            race_arr.append(race)
            
        for i in range(len(race_arr)): # fixing up race_arr
            if race_arr[i] == 'black':
                race_arr[i] = 0
            elif race_arr[i] == 'white':
                race_arr[i] = 1
            else:
                print('darn it')

        prob_csv['image_id'] = img_id_arr
        prob_csv['subject_id'] = subject_id_arr
        prob_csv['race'] = race_arr
        
        prob_csv_arr.append(prob_csv)

    return csv_paths, prob_csv_arr

In [11]:
def get_the_aucs(csv_paths, prob_csv_arr, accs):
    
    patient_level_auc = []
    white_acc_arr = []
    black_acc_arr = []
    
    for prob_csv in prob_csv_arr:
        
        med_prob_arr = []
        med_race_arr = []
        
        for unique_subject in np.unique(prob_csv['subject_id']):
            csv_subsect = prob_csv[prob_csv['subject_id'] == unique_subject]
            med_prob = np.median(np.array(csv_subsect['probability']))
            med_race = np.median(np.array(csv_subsect['race']))
            
            med_prob_arr.append(med_prob)
            med_race_arr.append(med_race)
            
        med_auc = metrics.roc_auc_score(med_race_arr, 1 - np.array(med_prob_arr)) # prob of white class
        med_auc = np.round(med_auc, 4)
        
        patient_level_auc.append(med_auc)

        if accs == True:
            acc_df = pd.DataFrame()

            acc_df['race_pred'] = med_prob_arr
            acc_df['race_true'] = med_race_arr
            
            white_acc_df = acc_df[acc_df['race_true'] == 1]
            black_acc_df = acc_df[acc_df['race_true'] == 0]
            
            white_acc = get_acc(white_acc_df['race_true'], white_acc_df['race_pred'])
            black_acc = get_acc(black_acc_df['race_true'], black_acc_df['race_pred'])
            
            white_acc_arr.append(white_acc)
            black_acc_arr.append(black_acc)
    
    auc_dict = {csv_paths[i]: patient_level_auc[i] for i in range(len(csv_paths))}
    
    if accs == True:
        white_acc_dict = {csv_paths[i]: white_acc_arr[i] for i in range(len(csv_paths))}
        black_acc_dict = {csv_paths[i]: black_acc_arr[i] for i in range(len(csv_paths))}
        
        return auc_dict, white_acc_dict, black_acc_dict

    return auc_dict

In [7]:
center_retained_arr = ["original_epoch50.csv", "skeletonized_epoch50.csv", "shadow_rings_dark_background_45_90_epoch50.csv", 
            "shadow_rings_dark_background_45_90_skeletonized_epoch50.csv", "shadow_regions_dark_background_45_epoch50.csv", 
           "shadow_regions_dark_background_45_skeletonized_epoch50.csv", "shadow_regions_dark_background_60_epoch50.csv", 
           "shadow_regions_dark_background_60_skeletonized_epoch50.csv", "shadow_regions_dark_background_90_epoch50.csv",
           "shadow_regions_dark_background_90_skeletonized_epoch50.csv"]

In [11]:
back_retained_arr = ["shadow_rings_dark_center_45_90_epoch50.csv", 
            "shadow_rings_dark_center_45_90_skeletonized_epoch50.csv", "shadow_regions_dark_center_45_epoch50.csv", 
           "shadow_regions_dark_center_45_skeletonized_epoch50.csv", "shadow_regions_dark_center_60_epoch50.csv", 
           "shadow_regions_dark_center_60_skeletonized_epoch50.csv", "shadow_regions_dark_center_90_epoch50.csv",
           "shadow_regions_dark_center_90_skeletonized_epoch50.csv"]

In [19]:
macula_arr = ["#10(macula_focus)/show_macula_brightened_by_0_epoch50.csv", 
              "#10(macula_focus)/hide_macula_brightened_by_0_epoch50.csv", 
              "#10(macula_focus)/show_macula_brightened_by_0_skeletonized_epoch50.csv",
              "#10(macula_focus)/hide_macula_brightened_by_0_skeletonized_epoch50.csv"]

In [25]:
nonzero_pixel = ["#8(random_pixels)/random_pixels_brightened_by_0_skeletonized_epoch50.csv"]

In [30]:
piv_arr = ["#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_0_epoch50.csv",
          "#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_20_epoch50.csv",
          "#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_40_epoch50.csv",
          "#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_60_epoch50.csv",
          "#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_80_epoch50.csv",
          "#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_100_epoch50.csv",
          "#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_120_epoch50.csv"]

In [31]:
csv_paths, prob_csv_arr = patient_level_auc(race_data, piv_arr)

In [32]:
auc_dict, white_acc_dict, black_acc_dict = get_the_aucs(csv_paths, prob_csv_arr, accs = True)

In [33]:
auc_dict, white_acc_dict, black_acc_dict

({'#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_0_epoch50.csv': 0.807,
  '#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_20_epoch50.csv': 0.8107,
  '#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_40_epoch50.csv': 0.7855,
  '#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_60_epoch50.csv': 0.7742,
  '#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_80_epoch50.csv': 0.7806,
  '#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_100_epoch50.csv': 0.7349,
  '#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_120_epoch50.csv': 0.7607},
 {'#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_0_epoch50.csv': 0.6821,
  '#13(random_pixels_constant_pixel_count)/full_set_random_pixels_brightened_by_20_epoch50.csv': 0.7351,
  '#13(random_pixels_constant_pixel_count)/full_set_ran

In [29]:
auc_dict, white_acc_dict, black_acc_dict

({'#8(random_pixels)/random_pixels_brightened_by_0_skeletonized_epoch50.csv': 0.7672},
 {'#8(random_pixels)/random_pixels_brightened_by_0_skeletonized_epoch50.csv': 0.7313},
 {'#8(random_pixels)/random_pixels_brightened_by_0_skeletonized_epoch50.csv': 0.7215})

In [22]:
auc_dict

{'#10(macula_focus)/show_macula_brightened_by_0_epoch50.csv': 0.9646,
 '#10(macula_focus)/hide_macula_brightened_by_0_epoch50.csv': 0.9302,
 '#10(macula_focus)/show_macula_brightened_by_0_skeletonized_epoch50.csv': 0.9252,
 '#10(macula_focus)/hide_macula_brightened_by_0_skeletonized_epoch50.csv': 0.9126}

In [23]:
white_acc_dict

{'#10(macula_focus)/show_macula_brightened_by_0_epoch50.csv': 0.8344,
 '#10(macula_focus)/hide_macula_brightened_by_0_epoch50.csv': 0.9272,
 '#10(macula_focus)/show_macula_brightened_by_0_skeletonized_epoch50.csv': 0.7682,
 '#10(macula_focus)/hide_macula_brightened_by_0_skeletonized_epoch50.csv': 0.8146}

In [24]:
black_acc_dict

{'#10(macula_focus)/show_macula_brightened_by_0_epoch50.csv': 0.9659,
 '#10(macula_focus)/hide_macula_brightened_by_0_epoch50.csv': 0.7273,
 '#10(macula_focus)/show_macula_brightened_by_0_skeletonized_epoch50.csv': 0.9205,
 '#10(macula_focus)/hide_macula_brightened_by_0_skeletonized_epoch50.csv': 0.8523}

In [14]:
auc_dict

{'shadow_rings_dark_center_45_90_epoch50.csv': 0.9755,
 'shadow_rings_dark_center_45_90_skeletonized_epoch50.csv': 0.9499,
 'shadow_regions_dark_center_45_epoch50.csv': 0.9641,
 'shadow_regions_dark_center_45_skeletonized_epoch50.csv': 0.9643,
 'shadow_regions_dark_center_60_epoch50.csv': 0.9675,
 'shadow_regions_dark_center_60_skeletonized_epoch50.csv': 0.951,
 'shadow_regions_dark_center_90_epoch50.csv': 0.8854,
 'shadow_regions_dark_center_90_skeletonized_epoch50.csv': 0.9343}

In [10]:
auc_dict

{'original_epoch50.csv': 0.987,
 'skeletonized_epoch50.csv': 0.9674,
 'shadow_rings_dark_background_45_90_epoch50.csv': 0.9315,
 'shadow_rings_dark_background_45_90_skeletonized_epoch50.csv': 0.9464,
 'shadow_regions_dark_background_45_epoch50.csv': 0.983,
 'shadow_regions_dark_background_45_skeletonized_epoch50.csv': 0.9028,
 'shadow_regions_dark_background_60_epoch50.csv': 0.9784,
 'shadow_regions_dark_background_60_skeletonized_epoch50.csv': 0.9321,
 'shadow_regions_dark_background_90_epoch50.csv': 0.9854,
 'shadow_regions_dark_background_90_skeletonized_epoch50.csv': 0.9624}