In [None]:
import os
import sys
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import tensorflow as tf
from models import *
from utils import *
import numpy as np
import glob
import random
import matplotlib.pyplot as plt
plt.rcParams['svg.fonttype'] = 'none'
import pandas as pd
from skimage.transform import resize
#import sklearn
import copy
#%matplotlib widget
from skimage.restoration import denoise_tv_chambolle
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
import sklearn.metrics as metrics
import matplotlib
import time
import scipy
import scipy.stats as st

matplotlib.rcParams['xtick.minor.size'] = 0
matplotlib.rcParams['xtick.minor.width'] = 0

## Define utility functions, load networks


In [None]:
unet = U_Net()
model = effnet()

augmentations = False

df_test = pd.read_csv('./Classification/test_dataset_slice_id.csv')

batchsize = 32

def pred_TV(img, weight):
    img_re = img/4096.
    pred = np.zeros_like(img_re)
    
    for channel in range(0, img_re.shape[3]):
        pred[:, :, :, channel] = np.array([denoise_tv_chambolle(img_re[i, :, :, channel], weight=weight) for i in range(img_re.shape[0])])
    
    return pred*4096.

def pred_UNet(img, model):
    img_re = img/4096.
    pred = np.concatenate((model.predict(img_re[:, :, :, 0]), 
                    model.predict(img_re[:, :, :, 1]), 
                    model.predict(img_re[:, :, :, 2])), axis=3)
    
    return pred*4096.

def pred_EffNet(img, model):
    img_re = (((img - 1024).clip(0, 80))/80.)*255.
    img_re = np.array(img_re)
    img_re = tf.image.resize(img_re, [260, 260])
    
    predictions = model.predict(img_re)
    return predictions, img_re

def check_path(path):
    if not os.path.isdir(path):
        os.makedirs(path)

### Classify raw, TV, and U-Net sparse-view data

In [None]:
#define classes, angles...
classes = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']
angles = [2048, 1024, 512, 256, 128, 64]

weights_tv = {2048:0.001, 1024:0.002, 512:0.009, 256:0.045, 128:0.153, 64:0.570}
augmentations = False
batchsize = 16

#load dataframe of test datas
df_test = pd.read_csv(os.path.abspath("./test_data.csv"))[:50]


#initialize dataframes for the classification results
df_subsampled = pd.DataFrame()
df_UNet = pd.DataFrame()
df_tv = pd.DataFrame()
df_label = pd.DataFrame()

#loop through the splits
for split in range(0, 5):
    print("split: ", split)
    
    #choose efficientnet model which produces the lowest val loss
    model.load_weights(os.path.abspath(f'./model_weights/EfficientNet/split_{split}/model_07.h5'))
    
    #loop through the sparse-view datasets
    for angle in angles:
        print('\n', "angle: ", angle)
        #load unet
        unet.load_weights(os.path.abspath(f'./model_weights/U-Net/{angle}/model_75.h5'))

        path = os.path.abspath(f'./Data/{angle}/')

        gen_test = DataGen(df = df_test, path = path, batch_size=batchsize, augmentation = False, train=False, shape=(512, 512))
        filename_list = []
        pred_raw_list = []
        pred_UNet_list = []
        pred_tv_list = []
        label_list = []
        
        #loop trough the individual dataset
        for i in np.arange(gen_test.__len__()):
            sys.stdout.write('\r'+f'{i}/{gen_test.__len__()}')

            #get inpt batch
            inpt_batch, label_batch, filename_batch = gen_test.__getitem__(i)
            
            #get classification on raw, UNet, TV data
            logits_raw, raw_input_dense = pred_EffNet(inpt_batch, model)
            prediction_UNet = pred_UNet(inpt_batch, unet)
            logits_UNet, unet_input_dense = pred_EffNet(prediction_UNet, model)
            img_tv = pred_TV(inpt_batch, weight=weights_tv[angle])
            logits_tv, tv_input_dense = pred_EffNet(img_tv, model)

            filename_list.extend(filename_batch)
            pred_raw_list.extend(logits_raw)
            pred_UNet_list.extend(logits_UNet)
            pred_tv_list.extend(logits_tv)
            label_list.extend(label_batch)

        df_subsampled['filename'] = filename_list
        df_UNet['filename'] = filename_list
        df_tv['filename'] = filename_list
        df_label['filname'] = filename_list
        #split them by hemorrhage subtype and append to df
        for i in range(6):
            #print(i)
            #labels = [int(label_sublist[i]) for label_sublist in label_list]
            preds_raw = [float(pred_sublist[i]) for pred_sublist in pred_raw_list] 
            preds_UNet = [float(pred_sublist[i]) for pred_sublist in pred_UNet_list]  
            preds_tv = [float(pred_sublist[i]) for pred_sublist in pred_tv_list]  
            labels = [float(pred_sublist[i]) for pred_sublist in label_list]  

            df_subsampled[f'{classes[i]}'] = preds_raw
            df_UNet[f'{classes[i]}'] = preds_UNet
            df_tv[f'{classes[i]}'] = preds_tv
            df_label[f'{classes[i]}'] = labels

        path_sub = os.path.abspath(f'./Classifications/sparse/split_{split}')
        path_UNet = os.path.abspath(f'./Classifications/U-Net/split_{split}')
        path_tv = os.path.abspath(f'./Classifications/TV/split_{split}')
        
        check_path(path_sub)
        check_path(path_UNet)
        check_path(path_tv)
        
        df_subsampled.to_csv(path_sub + f'/{angle}_test.csv', index=False)
        df_UNet.to_csv(path_UNet + f'/{angle}_test.csv', index=False)
        df_tv.to_csv(path_tv + f'/{angle}_test.csv', index=False)
        

### Classify ground truth (4096 projections) data

In [None]:
#first sparse view data
classes = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']

angles = [4096]

df_subsampled = pd.DataFrame()
df_label = pd.DataFrame()

#loop through the splits
for split in range(0, 5):
    print("split: ", split)
    
    #choose efficientnet model which produces the lowest val loss
    model.load_weights(os.path.abspath(f'./model_weights/EfficientNet/split_{split}/model_07.h5'))
    
    #loop through the sparse-view datasets
    for angle in angles:
        print('\n', "angle: ", angle)
        #load unet
        path = os.path.abspath(f'./Data/{angle}/')

        gen_test = DataGen(df = df_test, path = path, batch_size=batchsize, augmentation = False, train=False, shape=(512, 512))
        filename_list = []
        pred_raw_list = []
        label_list = []
        
        #loop trough the individual dataset
        for i in np.arange(gen_test.__len__()):
            sys.stdout.write('\r'+f'{i}/{gen_test.__len__()}')

            #get inpt batch
            inpt_batch, label_batch, filename_batch = gen_test.__getitem__(i)
            
            #get classification on raw, UNet, TV data
            logits_raw, raw_input_dense = pred_EffNet(inpt_batch, model)

            filename_list.extend(filename_batch)
            pred_raw_list.extend(logits_raw)
            label_list.extend(label_batch)

        df_subsampled['filename'] = filename_list
        df_label['filename'] = filename_list
        
        #split them by hemorrhage subtype and append to df
        for i in range(6):
            #print(i)
            #labels = [int(label_sublist[i]) for label_sublist in label_list]
            preds_raw = [float(pred_sublist[i]) for pred_sublist in pred_raw_list] 
            labels = [float(pred_sublist[i]) for pred_sublist in label_list]  

            df_subsampled[f'{classes[i]}'] = preds_raw
            df_label[f'{classes[i]}'] = labels

        path_sub = os.path.abspath(f'./Classifications/sparse/split_{split}')
        path_label = os.path.abspath(f'./Classifications/')
        
        check_path(path_sub)
        check_path(path_label)
        
        df_subsampled.to_csv(path_sub + f'/{angle}_test.csv', index=False)
        df_label.to_csv(path_label + f'/label_test.csv', index=False)

### Average over splits

In [None]:
for typ in ("sparse", "TV", "U-Net"):
    if typ == "sparse":
        angles = [4096, 2048, 1024, 512, 256, 128, 64]
    else:
        angles = [2048, 1024, 512, 256, 128, 64]     
    
    for angle in angles:
        split_0 = pd.read_csv(f'./Classifications/{typ}/split_0/{angle}_test.csv')
        split_1 = pd.read_csv(f'./Classifications/{typ}/split_1/{angle}_test.csv')
        split_2 = pd.read_csv(f'./Classifications/{typ}/split_2/{angle}_test.csv')
        split_3 = pd.read_csv(f'./Classifications/{typ}/split_3/{angle}_test.csv')
        split_4 = pd.read_csv(f'./Classifications/{typ}/split_4/{angle}_test.csv')

        pd_con = pd.concat([split_0, split_1, split_2, split_3, split_4]).groupby(level=0).mean()

        pd_con['filename'] = split_0['filename']

        pd_con.to_csv(f'./Classifications/{typ}/{angle}_test_avg.csv', index=False)

### Make ROC curves

In [None]:
CB_color_cycle = ['#377eb8', '#ff7f00', '#4daf4a',
                  '#f781bf', '#a65628', '#984ea3',
                  '#999999', '#e41a1c', '#dede00']

#load labels of testdata
df_truth = pd.read_csv(os.path.abspath('./Classifications/label_test.csv'))
truth_oof = df_truth.sort_values(by='filename').reset_index(drop = True).loc[:, ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']].values

for i, subtype in zip(range(6), ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']):
    plt.title(f'Receiver Operating Characteristic: {subtype}')
    
    #specifiy which data is in the plots
    for typ in ("sparse", "TV", "U-Net"):
        
        #specifiy which subsampling dataset is in the plots
        if typ == "sparse":
            angles = [4096, 512, 256, 128, 64]
        else:
            angles = [512, 256, 128, 64]  

        for angle in angles:
            
            
            df_pred_test = pd.read_csv(os.path.abspath(f'./Classifications/{typ}/{angle}_test_avg.csv'))
            test_predictions = df_pred_test.sort_values(by='filename').reset_index(drop = True).loc[:, ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']].values

            fpr, tpr, threshold = metrics.roc_curve(truth_oof[:, i], test_predictions[:, i])
            roc_auc = metrics.auc(fpr, tpr)
            #print(angle)
            if angle == 4096:
                color = CB_color_cycle[0]
            if angle == 2048:
                color = CB_color_cycle[6]
            if angle == 1024:
                color = CB_color_cycle[5]
            if angle == 512:
                color = CB_color_cycle[1]
            if angle == 256:
                color = CB_color_cycle[2]
            if angle == 128:
                color = CB_color_cycle[3]
            if angle == 64:
                color = CB_color_cycle[4]

            if typ=='U-Net':
                plt.plot(fpr, tpr, color, label = f'U-Net({angle} proj.) (AUC={roc_auc:0.2f})')
            if typ=='TV':
                plt.plot(fpr, tpr, color, linestyle=':', label = f'TV({angle} proj.) (AUC={roc_auc:0.2f})')        
            if typ=='sparse':
                plt.plot(fpr, tpr, color, linestyle='--', label = f'{angle} projections (AUC={roc_auc:0.2f})')


    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1], color='k', linestyle='dotted')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    
    #uncomment these lines to save figures
    #plt.savefig(f'./Images/roc_tv_{subtype}.pdf', format="pdf", bbox_inches="tight")
    #plt.savefig(f'./Images/roc_tv_{subtype}.png', format="png", bbox_inches="tight")
    plt.show()
    
    

### Plot more ROC curves

In [None]:
df_truth = pd.read_csv('./Classifications/label_test.csv')
truth_oof = df_truth.sort_values(by='filename').reset_index(drop = True).loc[:, ['any', 'subdural', 'subarachnoid', 'intraparenchymal', 'intraventricular', 'epidural']].values
angles = [2048, 1024, 512, 256, 128, 64]

fig, axs = plt.subplots(6, 6, sharex=True, sharey=True)
fig.set_figwidth(20)
fig.set_figheight(15)

# Hide x labels and tick labels for top plots and y ticks for right plots.

for i, subtype in zip(range(6), ['any', 'subdural', 'subarachnoid', 'intraparenchymal', 'intraventricular', 'epidural']):
    
    #specifiy which data is in the plots
    for j, angle in enumerate(angles):
        for typ in ("sparse", "U-Net", "TV"):
            
            df_pred_test = pd.read_csv(f'./Classifications/{typ}/{angle}_test_avg.csv')
            test_predictions = df_pred_test.sort_values(by='filename').reset_index(drop = True).loc[:,  ['any', 'subdural', 'subarachnoid', 'intraparenchymal', 'intraventricular', 'epidural']].values

            fpr, tpr, threshold = metrics.roc_curve(truth_oof[:, i], test_predictions[:, i])
            roc_auc = metrics.auc(fpr, tpr)
            #print(angle)

            if typ=='U-Net':
                axs[j, i].plot(fpr, tpr, label = 'U-Net')
            if typ=='TV':
                axs[j, i].plot(fpr, tpr, label = 'TV')        
            if typ=='sparse':
                axs[j, i].plot(fpr, tpr, label = 'FBP')


            axs[j, i].legend(loc = 'lower right')
            axs[j, i].plot([0, 1], [0, 1], color='k', linestyle='dotted')
            #axs[j, i].xlim([-0.05, 1.05])
            #axs[j, i].ylim([-0.05, 1.05])
        #plt.ylabel('True Positive Rate')
        #plt.xlabel('False Positive Rate')

        #uncomment these lines to save figures
#plt.grid()
#plt.savefig(f'./Images/ROC_plots/roc_{subtype}_{angle}.pdf', format="pdf", bbox_inches="tight")
#plt.savefig(f'./Images/roc_combined.pdf', format="pdf", bbox_inches="tight")
plt.show()
    
    

## Calculate ROC-AUCs with DeLong

In [None]:
#implementation of delong algorithm from https://github.com/yandexdataschool/roc_comparison
# See also paper: https://ieeexplore.ieee.org/document/6851192
def compare_rocs(pred_1, pred_2, subtype, alpha=.95):
    df_truth = pd.read_csv('./Classifications/label_test.csv')
    truth_oof = df_truth.sort_values(by='filename').reset_index(drop = True).loc[:, [subtype]].values.squeeze()
    
    df_1 = pd.read_csv(f'./Classifications/{pred_1["proc_type"]}/{pred_1["n_angles"]}_test_avg.csv')
    df_2 = pd.read_csv(f'./Classifications/{pred_2["proc_type"]}/{pred_2["n_angles"]}_test_avg.csv')
    
    df_1_oof = df_1.sort_values(by='filename').reset_index(drop = True).loc[:, [subtype]].values.squeeze()
    df_2_oof = df_2.sort_values(by='filename').reset_index(drop = True).loc[:, [subtype]].values.squeeze()
    aucs, delongcov, pvalue = delong_roc_test(truth_oof, df_1_oof, df_2_oof)
    
    auc_std_0, auc_std_1 = np.sqrt(delongcov[0][0]), np.sqrt(delongcov[1][1])
    lower_upper_q = np.abs(np.array([0, 1]) - (1 - alpha) / 2)
    ci_0, ci_1 = scipy.stats.norm.ppf(lower_upper_q, loc=aucs[0],scale=auc_std_0), scipy.stats.norm.ppf(lower_upper_q, loc=aucs[1],scale=auc_std_1)
    return aucs, (ci_0, ci_1), delongcov, 10**(pvalue)
   
def get_auc(proc_type, n_angles, subtype, alpha=.95):
    df_truth = pd.read_csv('./Classifications/label_test.csv')
    truth_oof = df_truth.sort_values(by='filename').reset_index(drop = True).loc[:, [subtype]].values.squeeze()
    
    df_1 = pd.read_csv(f'./Classifications/{proc_type}/{n_angles}_test_avg.csv')
    df_1_oof = df_1.sort_values(by='filename').reset_index(drop = True).loc[:, [subtype]].values.squeeze()
    auc, variance = delong_roc_variance(truth_oof, df_1_oof)
    
    auc_std = np.sqrt(variance)
    lower_upper_q = np.abs(np.array([0, 1]) - (1 - alpha) / 2)
    ci = scipy.stats.norm.ppf(lower_upper_q, loc=auc, scale=auc_std)
    return auc, ci

    

In [None]:
auc_dict = {}
for i, subtype in zip(range(6),['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']):
    print(subtype)
    auc_sparse = []
    auc_unet = []
    auc_TV = []
    auc_sparse_std = []
    auc_unet_std = []
    auc_TV_std = []
    auc_sparse_er = [[],[]]
    auc_unet_er = [[],[]]
    auc_TV_er = [[],[]]
    auc_sparse_ci = []
    auc_unet_ci = []
    auc_TV_ci = []

    for typ in ("sparse", "U-Net", "TV"):

        if typ == "sparse":
            angles = [4096, 2048, 1024, 512, 256, 128, 64]
        else:
            angles = [2048, 1024, 512, 256, 128, 64]

        for angle in angles:
            auc_mean, ci = get_auc(proc_type=typ, n_angles=angle, subtype=subtype)

            auc_dict[f'{typ}_{subtype}_{angle}_mean'] = auc_mean
            auc_dict[f'{typ}_{subtype}_{angle}_ci'] = ci


            if typ == 'U-Net':
                auc_unet.append(auc_mean)
                auc_unet_er[0].append(auc_mean - ci[0])
                auc_unet_er[1].append(ci[1] - auc_mean)
                auc_unet_ci.append(ci)

            if typ == 'sparse':
                auc_sparse.append(auc_mean)
                auc_sparse_er[0].append(auc_mean - ci[0])
                auc_sparse_er[1].append(ci[1] - auc_mean)
                auc_sparse_ci.append(ci)

            if typ == 'TV':
                auc_TV.append(auc_mean)
                auc_TV_er[0].append(auc_mean - ci[0])
                auc_TV_er[1].append(ci[1] - auc_mean)
                auc_TV_ci.append(ci)


    fig, ax = plt.subplots()
    plt.axhline(y = auc_sparse[0], color = 'tab:gray', linestyle = ':')
    ax.errorbar([4096, 2048, 1024, 512, 256, 128, 64], auc_sparse, np.asarray(auc_sparse_er), label='FBP', marker='x', capsize=5, linewidth=2)
    ax.errorbar([2048, 1024, 512, 256, 128, 64], auc_unet, np.asarray(auc_unet_er), label='U-Net', marker='^', capsize=5, linewidth=2)
    ax.errorbar([2048, 1024, 512, 256, 128, 64], auc_TV, np.asarray(auc_TV_er), label='TV', marker='o', capsize=5, linewidth=2)
    
    
    ax.invert_xaxis()
    plt.title(f'ROC AUC: {subtype}')
    ax.set_xlabel('projections')
    ax.set_ylabel('AUC')
    ax.set_xscale('log')
    ax.set_xticks([4096, 2048, 1024, 512, 256, 128, 64], [4096, 2048, 1024, 512, 256, 128, 64])

    plt.legend(loc='lower left')
    ax.set_ylim([0.45, 1.01])

    plt.grid(alpha=0.3)

    #plt.savefig(f"./Images/auc_mean_{subtype}.pdf", format="pdf", bbox_inches="tight")
    #plt.savefig(f"./Images/auc_mean_{subtype}.png", format="png", bbox_inches="tight")
    plt.show()

    print(subtype, "sparse mean: ", auc_sparse)
    print(subtype, "sparse ci: ", auc_sparse_ci)

    print(subtype, "TV mean: ", auc_TV)
    print(subtype, "TV ci: ", auc_TV_ci)
    
    print(subtype, "U-Net mean: ", auc_unet)
    print(subtype, "U-Net ci: ", auc_unet_ci)
    

In [None]:
#save as csv sheet
df = pd.DataFrame()
classes = ['any', 'subdural', 'subarachnoid', 'intraparenchymal', 'intraventricular', 'epidural']

for j, angle in enumerate([4096, 2048, 1024, 512, 256, 128, 64]):
    print(angle)
    angle_list = []

    for subtype in classes:
        
        if angle == 4096:
            processing_typs = ["sparse"]
        else:
            processing_typs = ["sparse", "U-Net", "TV"]
            
        for processing in processing_typs:
    
            auc_mean = auc_dict[f'{processing}_{subtype}_{angle}_mean']
            ci = auc_dict[f'{processing}_{subtype}_{angle}_ci'] 

            angle_list.append(f'{auc_mean:.3f} ({ci[0]:.3f},{ci[1]:.3f})')

            if angle == 4096:
                angle_list.extend([0, 0])


    df[f"{angle}"] = angle_list

df.to_csv("./raw_auc_table.xlsx")
df.head()

In [None]:
#check if ROC-AUC values are statistically different

for subtype in ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']:
    print(subtype)
    for angle in [2048, 1024, 512, 256, 128, 64]:
        pred_1 = {"n_angles":f"{angle}", "proc_type":"sparse"}
        pred_2= {"n_angles":f"{angle}", "proc_type":"U-Net"}
        aucs_raw_unet, cis_raw_unet, delongcov_raw_unet, pvalue_raw_unet = compare_rocs(pred_1, pred_2, f"{subtype}")

        pred_1 = {"n_angles":f"{angle}", "proc_type":"sparse"}
        pred_2= {"n_angles":f"{angle}", "proc_type":"TV"}
        aucs_raw_tv, cis_raw_tv, delongcov_raw_tv, pvalue_raw_tv = compare_rocs(pred_1, pred_2, f"{subtype}")
        
        pred_1 = {"n_angles":f"{angle}", "proc_type":"U-Net"}
        pred_2= {"n_angles":f"{angle}", "proc_type":"TV"}
        aucs_unet_tv, cis_unet_tv, delongcov_unet_tv, pvalue_unet_tv = compare_rocs(pred_1, pred_2, f"{subtype}")
        
        pred_1 = {"n_angles":"4096", "proc_type":"sparse"}
        pred_2= {"n_angles":f"{angle}", "proc_type":"sparse"}
        aucs_gt_raw, cis_gt_raw, delongcov_gt_raw, pvalue_gt_raw = compare_rocs(pred_1, pred_2, f"{subtype}")
        
        print(f"{angle} raw-4096: {pvalue_gt_raw} raw-U-Net: {pvalue_raw_unet}, raw-TV: {pvalue_raw_tv}, TV-U-Net: {pvalue_unet_tv}")