In [1]:
#mount
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


In [None]:
!pip install seaborn

In [5]:
#imports 
import scipy
from scipy import stats
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [19]:
#plot

def plot_results(x, y, df, hue, kind = 'box', color = 'Greys', xlabel = None, ylabel = None, yticks = [0, 0.2, 0.4, 0.6, 0.8, 1], sns_legend = False, legend = False, fsize = 15, save_name = None):
  if xlabel == None:
    xlabel = x
  if ylabel == None:
    ylabel = y
  if save_name == None:
    save_name = x + '_' + y + '.png'
  #plot
  #sns.set_style({'font.family':'serif', 'font.serif':['Times New Roman']})
  ax = sns.catplot(x = x, y = y, data = df, hue = hue, kind = kind, palette = sns.color_palette(color, 2), legend = sns_legend)
  plt.yticks(yticks, fontsize = fsize, fontname='Times New Roman')
  plt.ylabel(ylabel, fontsize = fsize, fontname='Times New Roman')
  plt.xticks(fontsize = fsize, fontname='Times New Roman')
  plt.xlabel(xlabel, fontsize = fsize, fontname='Times New Roman')
  if legend == True:
    plt.legend(title = 'Dataset', fontsize = fsize, title_fontsize = fsize, loc='center left', bbox_to_anchor=(1, 0.5))

  #plt.savefig(train_params['save_path'] + save_name, bbox_inches="tight")
  plt.show()

#implement Anderson Darling test
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html
#inputs: dice score numpy arrays of the non-localized predictions and the localized predictions
#outputs: Linear if both distributions are linear, Nonlinear if both distributions are nonlinear. Linear/Nonlinear if distributions are different
def ad_test(no_loc, loc, val = False):
  p_no_loc = stats.anderson(no_loc)
  p_loc = stats.anderson(loc)
  #check if both are linear/nonlinear based on the critical values 
  #if (p_no_loc[0] > np.max(p_no_loc[1])) & (p_loc[0] > np.max(p_loc[1])):
  if (p_no_loc[0] > p_no_loc[1][2]) & (p_loc[0] > p_loc[1][2]):
    p = "Nonnormal"
  elif (p_no_loc[0] < p_no_loc[1][2]) & (p_loc[0] < p_loc[1][2]):
    p = "Normal"
  elif (p_no_loc[0] < p_no_loc[1][2]) & (p_loc[0] > p_loc[1][2]):
    p = "Normal/Nonnormal"
  else:
    p = "Nonnormal/Normal"
  if val:
    p = [p_no_loc, p_loc]
  return p

#implement t-test
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html
#inputs: dice score numpy arrays of the non-localized predictions and the localized predictions
#outputs: True if p =< 0.05 and False if p > 0.05
def t_test(no_loc, loc, alpha = 5e-2, val = False):
  p_stat = stats.ttest_ind(no_loc, loc)
  #check if significant
  if p_stat[1] <= alpha:
    p = "True"
  else:
    p = "False"
  if val:
    p = p_stat[1]
  return p

#implement Mann Whitney U test
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html
#inputs: dice score numpy arrays of the non-localized predictions and the localized predictions
#outputs: True if p =< 0.05 and False if p > 0.05
def mw_test(no_loc, loc, alpha = 5e-2, val = False):
  p_stat = stats.mannwhitneyu(no_loc, loc)
  #check if significant
  if p_stat[1] <= alpha:
    p = "True"
  else:
    p = "False"
  if val:
    p = p_stat[1]
  return p

#implement RankSum test
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ranksums.html
#inputs: dice score numpy arrays of the non-localized predictions and the localized predictions
#outputs: True if p =< 0.05 and False if p > 0.05
def rs_test(no_loc, loc, alpha = 5e-2, val = False):
  p_stat = stats.ranksums(no_loc, loc)
  #check if significant
  if p_stat[1] <= alpha:
    p = "True"
  else:
    p = "False"
  if val:
    p = p_stat[1]
  return p

In [22]:
#how to address the outliers? in the graphs --> seaborn sns
#https://stackoverflow.com/questions/43264095/python-seaborn-how-are-outliers-determined-in-boxplots

#the revisions to words and giving quality --> the AAA detection studies not radiologists provided 

#the significance tests for DSC comparison (here)

#test set for AAA-UNet vs the test set of BB-AAA-UNet

#how the ground truth is defined 
#manully provided by student based upon ILT shape, volume, knowldege of disease
#specific aims if possible


#connect that segmentation accelerates the biomedical imaging pipeline


In [140]:
#new Anderson Darling Test
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html
def new_ad_test(scores, description, alpha = 5, display = True):
  #by default the distribution the scores are tested against is the normal distribution
  p_scores = stats.anderson(scores)
  #the stat
  statistic = p_scores[0]
  #retrieve the stat
  stat = p_scores[1]
  #idx
  idx = np.where(p_scores[2] == alpha)[0][0]
  #crit
  crit = stat[idx]
  #compare
  if statistic > crit:
    p = 'Non-normal'
  else:
    p = 'Normal'
  #return
  if display:
    print('Description: ', description)
    print('Statistic: ', statistic)
    print('Critical Value: ', crit)
    print('Distribution: ', p)
    print('alpha (%): ', alpha)
  #return
  return statistic, crit, p

#new RankSum test
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ranksums.html
def new_rs_test(group1, group2, description, alpha = 5e-2, display = True):
  #get rank sum
  p_stat = stats.ranksums(group1, group2)
  #statistic
  statistic = p_stat[0]
  #p-value
  p = p_stat[1]
  #check if significant
  if p_stat[1] < alpha:
    sig = "True"
  else:
    sig = "False"
  #display
  if display:
    print('RankSum on: ', description)
    print('Statistic: ', statistic)
    print('p-value: ', p)
    print('Significance: ', sig)
    print('alpha: ', alpha)
  return statistic, p, sig

#new  MannWhitneyU test
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html
def new_mwu_test(group1, group2, description, alpha = 5e-2, display = True):
  #get rank sum
  p_stat = stats.mannwhitneyu(group1, group2)
  #statistic
  statistic = p_stat[0]
  #p-value
  p = p_stat[1]
  #check if significant
  if p_stat[1] < alpha:
    sig = "True"
  else:
    sig = "False"
  #display
  if display:
    print('MannWhitneyU on: ', description)
    print('Statistic: ', statistic)
    print('p-value: ', p)
    print('Significance: ', sig)
    print('alpha: ', alpha)
  return statistic, p, sig

#new t-test
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html
def new_t_test(group1, group2, description, alpha = 5e-2, display = True):
  #get rank sum
  p_stat = stats.ttest_ind(group1, group2)
  #statistic
  statistic = p_stat[0]
  #p-value
  p = p_stat[1]
  #check if significant
  if p_stat[1] < alpha:
    sig = "True"
  else:
    sig = "False"
  #display
  if display:
    print('t-test on: ', description)
    print('Statistic: ', statistic)
    print('p-value: ', p)
    print('Significance: ', sig)
    print('alpha: ', alpha)
  return statistic, p, sig

In [141]:
#no bounding box
df = pd.read_pickle('/content/gdrive/MyDrive/AAA_Project/Masters-Thesis/AAA-DICOM/AAA-UNet-results/segmentation-2022-10-19-16-05/df_results.pkl')
#bounding box
df_BB = pd.read_pickle('/content/gdrive/MyDrive/AAA_Project/Masters-Thesis/AAA-DICOM/BB-AAA-UNet-results/high-res-segmentation-2022-10-20-17-06/df_results.pkl')
#can even get results for the subgroups?
#take the test set AAA-UNet
no_loc = df[(df['DATA'] == 'TEST')]['DSC'].to_numpy()
#take the test set BB-AAA-UNet
loc = df_BB[(df_BB['DATA'] == 'TEST')]['DSC'].to_numpy()

In [142]:
#distribution of AAA-UNet Test Set
_ = new_ad_test(no_loc, 'AAA-UNet Test Set DSC') 

Description:  AAA-UNet Test Set DSC
Statistic:  1.9992552065532792
Critical Value:  0.712
Distribution:  Non-normal
alpha (%):  5


In [143]:
#distribution of BB-AAA-UNet Test Set
_ = new_ad_test(loc, 'BB-AAA-UNet Test Set DSC') 

Description:  BB-AAA-UNet Test Set DSC
Statistic:  3.536611921915423
Critical Value:  0.712
Distribution:  Non-normal
alpha (%):  5


In [144]:
#compute t-test
_ = new_t_test(no_loc, loc, 'AAA-UNet v. BB-AAA-UNet Test Set DSC')

t-test on:  AAA-UNet v. BB-AAA-UNet Test Set DSC
Statistic:  -0.915902706714024
p-value:  0.3635104433076648
Significance:  False
alpha:  0.05


In [138]:
#compute mannwhitneyu
_ = new_mwu_test(no_loc, loc, 'AAA-UNet v. BB-AAA-UNet Test Set DSC')

MannWhitneyU on:  AAA-UNet v. BB-AAA-UNet Test Set DSC
Statistic:  271.5
p-value:  0.008488551031126466
Significance:  True
alpha:  0.05


In [139]:
#compute ranksum
_ = new_rs_test(no_loc, loc, 'AAA-UNet v. BB-AAA-UNet Test Set DSC')

RankSum on:  AAA-UNet v. BB-AAA-UNet Test Set DSC
Statistic:  -2.639019937307825
p-value:  0.00831460908674944
Significance:  True
alpha:  0.05
