In [None]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import numpy as np
import os 
from scipy.stats import sem,tmean


In [None]:
def calculate_mcc(confusion_matrix):
    num_classes = len(confusion_matrix)
    mcc_values = np.zeros(num_classes)
    
    for i in range(num_classes):
        tp = confusion_matrix[i, i]
        fp = np.sum(confusion_matrix[:, i]) - tp
        fn = np.sum(confusion_matrix[i, :]) - tp
        tn = np.sum(confusion_matrix) - tp - fp - fn
        
        mcc_values[i] = (tp * tn - fp * fn) / np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
    
    mcc = np.mean(mcc_values)
    
    return mcc

Read the responses and the correct answers

In [None]:
curr_dir = os.getcwd()
Q_dir = os.path.join(curr_dir,'Questionnaires')
Q_dir = os.path.join(Q_dir,'Original')
all_files  = sorted(os.listdir(Q_dir))

responses = []
for file in all_files:
    if 'P' in file:
        dir = os.path.join(Q_dir,file)
        responses.append(pd.read_csv(dir))
correct = pd.read_csv('Questionnaires/Original/Correct_answers.csv',header=None)
class_labels = sorted(pd.unique(correct[4]).tolist())
class_to_idx = {}
num_classes = len(class_labels)
for i,clss in enumerate(class_labels):
    class_to_idx[clss] = i
real_fake = {'fake':0,'real':1}

Define and calculate the confusion matrices for each task and pathologist.


In [None]:
cf_matrix_task1_real_all = {}
cf_matrix_task1_fake_all = {}
cf_matrix_task2_all = {}
mistake_real_all = {}
mistake_fake_all = {}


for p,response in enumerate(responses):
    all_replies = response.iloc[0].to_numpy()[2:]
    cf_matrix_task1_real = np.zeros(shape=(num_classes,num_classes))
    cf_matrix_task1_fake = np.zeros(shape=(num_classes,num_classes))
    cf_matrix_task2 = np.zeros((2,2))
    mistake_real = {}
    mistake_fake = {}

    for clss in class_labels:
        mistake_real[clss] = 0
        mistake_fake[clss] = 0
    for idx,reply in enumerate(all_replies):
        if correct.iloc[idx][1] == 'first':
            idx_row = class_to_idx[correct.loc[idx][4]]
            idx_column = class_to_idx[reply]
            if correct.iloc[idx][3] == 'real':
                cf_matrix_task1_real[idx_row,idx_column] += 1
            else:
                cf_matrix_task1_fake[idx_row,idx_column] += 1
        elif correct.loc[idx][1] == 'second':
            idx_row =  real_fake[correct.loc[idx][3]]
            idx_column = real_fake[reply]
            cf_matrix_task2[idx_row,idx_column] += 1
            if idx_row != idx_column:
                if correct.loc[idx][3] == 'real':
                    mistake_real[correct.loc[idx][4]] += 1
                elif correct.loc[idx][3] == 'fake':
                    mistake_fake[correct.loc[idx][4]] += 1
    dic_name = 'Pathologist' + str(p+1)
    cf_matrix_task1_fake_all[dic_name] = cf_matrix_task1_fake
    cf_matrix_task1_real_all[dic_name] = cf_matrix_task1_real
    cf_matrix_task2_all[dic_name] = cf_matrix_task2
    mistake_fake_all[dic_name] = mistake_fake
    mistake_real_all[dic_name] = mistake_real


Visualize the results of the first task on the real data 

In [None]:
num_pathologists = len(cf_matrix_task1_real_all.keys())
num_per_tissue = 10
width_ratios = []
color = 'Blues'
for i in range(num_pathologists+1):
    width_ratios.append(1)
figsz = ((num_pathologists+1)*3.54,3.54)
clss_labels = class_labels
font_properties = {'fontname': 'sans-serif', 'fontsize': 12}
sup_ttl = 'Tissue detection task on real tiles'
sup_font_properties = {'fontname': 'sans-serif', 'fontsize': 14}
fig,axes = plt.subplots(1,num_pathologists+1, 
            gridspec_kw={'width_ratios':width_ratios},figsize = figsz,dpi = 600);
fig.suptitle(sup_ttl,**sup_font_properties)
conf_matrix_total = np.zeros((num_classes,num_classes))
for num,key in enumerate(cf_matrix_task1_real_all):
    ttl = key
    conf_matrix = cf_matrix_task1_real_all[key]
    conf_matrix_total += conf_matrix
    df_conf_matrix = pd.DataFrame(conf_matrix,index= [i for i in clss_labels],columns= [i for i in clss_labels])
    if num == 0:
        g = sn.heatmap(df_conf_matrix,annot=True,cmap= color,vmin=0,vmax=num_per_tissue,square=True,cbar=False,ax=axes[num]);
        g.set_ylabel('actual',**font_properties);
    else:
        g = sn.heatmap(df_conf_matrix,fmt=".0f",cmap = color,annot=True,vmin=0,vmax=num_per_tissue,cbar=False,square=True,ax=axes[num]);
        g.set_yticks([])
    g.set_xlabel('predicted',**font_properties);
    g.set_title(ttl,**font_properties);
df_conf_matrix_total = pd.DataFrame(conf_matrix_total,index= [i for i in clss_labels],columns= [i for i in clss_labels])
g = sn.heatmap(df_conf_matrix_total,fmt=".0f",cmap = color,annot=True,square=True,vmin=0,vmax=num_per_tissue*num_pathologists,cbar=False,ax=axes[num+1],cbar_ax=axes[-1]);
g.set_yticks([])
g.set_xlabel('predicted',**font_properties);
g.set_title('Combined',**font_properties);

for ax in axes:
    ax.tick_params(axis='both', labelsize=10,width = 1)
plt.tight_layout()

for num,key in enumerate(cf_matrix_task1_real_all):
    cf_matrix = cf_matrix_task1_real_all[key]
    mcc = calculate_mcc(cf_matrix)
    acc = np.diagonal(cf_matrix).sum() / np.sum(cf_matrix)
    print(key,': ','MCC: ',mcc,' Accuracy: ',acc*100,'%')
mcc = calculate_mcc(conf_matrix_total)
acc = np.diagonal(conf_matrix_total).sum() / np.sum(conf_matrix_total)
print('All pathologists combined: MCC:',mcc,' Accuracy: ',acc*100,'%')
plt.savefig(os.path.join(os.getcwd(),'task1_real_pathologists.png'),dpi=600)
plt.savefig(os.path.join(os.getcwd(),'task1_real_pathologists.svg'))


Visualize the results of the first task on the fake data 

In [None]:
num_pathologists = len(cf_matrix_task1_fake_all.keys())
num_per_tissue = 10
width_ratios = []
color = 'Blues'
for i in range(num_pathologists+1):
    width_ratios.append(1)
figsz = ((num_pathologists+1)*3.54,3.54)
clss_labels = class_labels
font_properties = {'fontname': 'sans-serif', 'fontsize': 12}
sup_ttl = 'Tissue detection task on fake tiles'
sup_font_properties = {'fontname': 'sans-serif', 'fontsize': 14}
fig,axes = plt.subplots(1,num_pathologists+1, 
            gridspec_kw={'width_ratios':width_ratios},figsize = figsz,dpi = 600);
fig.suptitle(sup_ttl,**sup_font_properties)
conf_matrix_total = np.zeros((num_classes,num_classes))
for num,key in enumerate(cf_matrix_task1_fake_all):
    ttl = key
    conf_matrix = cf_matrix_task1_fake_all[key]
    conf_matrix_total += conf_matrix
    df_conf_matrix = pd.DataFrame(conf_matrix,index= [i for i in clss_labels],columns= [i for i in clss_labels])
    if num == 0:
        g = sn.heatmap(df_conf_matrix,cmap = color,annot=True,vmin=0,fmt=".0f",vmax=num_per_tissue,square=True,cbar=False,ax=axes[num]);
        g.set_ylabel('actual',**font_properties);
    else:
        g = sn.heatmap(df_conf_matrix,cmap=color,annot=True,fmt=".0f",vmin=0,vmax=num_per_tissue,cbar=False,square=True,ax=axes[num]);
        g.set_yticks([])
    g.set_xlabel('predicted',**font_properties);
    g.set_title(ttl,**font_properties);
df_conf_matrix_total = pd.DataFrame(conf_matrix_total,index= [i for i in clss_labels],columns= [i for i in clss_labels])
g = sn.heatmap(df_conf_matrix_total,annot=True,cmap = color,fmt=".0f",square=True,vmin=0,vmax=num_per_tissue*num_pathologists,cbar=False,ax=axes[num+1],cbar_ax=axes[-1]);
g.set_yticks([])
g.set_xlabel('predicted',**font_properties);
g.set_title('Combined',**font_properties);

for ax in axes:
    ax.tick_params(axis='both', labelsize=10,width = 1);

plt.tight_layout()

for num,key in enumerate(cf_matrix_task1_fake_all):
    cf_matrix = cf_matrix_task1_fake_all[key]
    mcc = calculate_mcc(cf_matrix)
    acc = np.diagonal(cf_matrix).sum() / np.sum(cf_matrix)
    print(key,': ','MCC: ',mcc,' Accuracy: ',acc*100,'%')
mcc = calculate_mcc(conf_matrix_total)
acc = np.diagonal(conf_matrix_total).sum() / np.sum(conf_matrix_total)
print('All pathologists combined: MCC:',mcc,' Accuracy: ',acc*100,'%')

plt.savefig(os.path.join(os.getcwd(),'task1_fake_pathologists.png'),dpi=600)
plt.savefig(os.path.join(os.getcwd(),'task1_fake_pathologists.svg'))

Visualize the results of the second task (real vs fake) 

In [None]:
num_pathologists = len(cf_matrix_task1_fake_all.keys())
num_per_type = 50
width_ratios = []
color = 'Blues'
font_properties = {'fontname': 'sans-serif', 'fontsize': 12}
figsz = ((num_pathologists+1)*3.54,3.54)
clss_labels = ['generated','real']
sup_ttl = 'fake vs real'
sup_font_properties = {'fontname': 'sans-serif', 'fontsize': 14}

for i in range(num_pathologists+1):
    width_ratios.append(1)
fig,axes = plt.subplots(1,num_pathologists+1, 
            gridspec_kw={'width_ratios':width_ratios},figsize = figsz,dpi = 600);
fig.suptitle(sup_ttl,**sup_font_properties)
conf_matrix_total= np.zeros((len(clss_labels),len(clss_labels)))
for num,key in enumerate(cf_matrix_task2_all):
    ttl = key
    conf_matrix = cf_matrix_task2_all[key]
    conf_matrix_total += conf_matrix
    df_conf_matrix = pd.DataFrame(conf_matrix,index= [i for i in clss_labels],columns= [i for i in clss_labels])
    if num == 0:
        g = sn.heatmap(df_conf_matrix,square=True,cmap = color,fmt=".0f",annot=True,vmin=0,vmax=num_per_type,cbar=False,ax=axes[num]);
        g.set_ylabel('actual',**font_properties);
    else:
        g = sn.heatmap(df_conf_matrix,square=True,cmap = color,fmt=".0f",annot=True,vmin=0,vmax=num_per_type,cbar=False,ax=axes[num]);
        g.set_yticks([])
    g.set_xlabel('predicted',**font_properties);
    g.set_title(ttl,**font_properties);

df_conf_matrix_total = pd.DataFrame(conf_matrix_total,index= [i for i in clss_labels],columns= [i for i in clss_labels])
g = sn.heatmap(df_conf_matrix_total,annot=True, cmap= color,fmt=".0f",square=True,vmin=0,vmax=num_per_type*num_pathologists,cbar=False,ax=axes[num+1],cbar_ax=axes[-1]);
g.set_yticks([])
g.set_xlabel('predicted',**font_properties);
g.set_title('overall',**font_properties);""

for ax in axes:
    ax.tick_params(axis='both', labelsize=10,width = 1);

plt.tight_layout()

for num,key in enumerate(cf_matrix_task2_all.keys()):
    cf_matrix = cf_matrix_task2_all[key]
    mcc = calculate_mcc(cf_matrix)
    acc = np.diagonal(cf_matrix).sum() / np.sum(cf_matrix)
    print(key,': ','MCC: ',mcc,' Accuracy: ',acc*100,'%')
mcc = calculate_mcc(conf_matrix_total)
acc = np.diagonal(conf_matrix_total).sum() / np.sum(conf_matrix_total)
print('All pathologists combined: MCC:',mcc,' Accuracy: ',acc*100,'%')

plt.savefig(os.path.join(os.getcwd(),'task2_pathologists.png'),dpi=600)
plt.savefig(os.path.join(os.getcwd(),'task2_pathologists.svg'))

Visualize the number of errors per tissue for the second task

In [None]:
mistake_real_avg = {}
mistake_real_se = {}
mistake_fake_avg = {}
mistake_fake_se = {}



for tissue in class_labels:
    tmp_real = []
    tmp_fake = []
    for key in mistake_real_all:
        tmp_real.append(mistake_real_all[key][tissue])
        tmp_fake.append(mistake_fake_all[key][tissue])
    mistake_real_avg[tissue] = tmean(tmp_real)
    mistake_real_se[tissue] = sem(tmp_real)
    mistake_fake_avg[tissue] = tmean(tmp_fake)
    mistake_fake_se[tissue] = sem(tmp_fake)

figsz = ((num_pathologists+1)*3.54,3.54)
fig = plt.figure(figsize=figsz,dpi=600)
font_properties = {'fontname': 'sans-serif', 'fontsize': 12}

data = mistake_real_avg
names = list(data.keys())
values = list(data.values())
err = list(mistake_real_se.values())
ttl = 'real tiles detected as fake'
ax1 = fig.add_subplot(1,2,1)
ax1.bar(range(len(data)), values, tick_label=names,linewidth=1)
ax1.errorbar(range(len(data)), values,yerr=[np.zeros(len(err)),err],fmt='o', capsize=4, color='red',linewidth=0.75)
ax1.set_xlabel("tissues",**font_properties)
ax1.set_ylabel("# of mistakes",**font_properties)
ax1.set_title(ttl,**font_properties);
ax1.tick_params(axis='both', labelsize=10,width = 1)


data = mistake_fake_avg
err = list(mistake_fake_se.values())
names = list(data.keys())
values = list(data.values())
ttl = 'fake tiles detected as real'
ax2 = fig.add_subplot(1,2,2)
ax2.bar(range(len(data)), values, tick_label=names,linewidth=1)
ax2.errorbar(range(len(data)), values,yerr=[np.zeros(len(err)),err,],fmt='o', capsize=4, color='red',linewidth=0.75)
ax2.set_xlabel("tissues",**font_properties)
ax2.set_title(ttl,**font_properties);
ax2.set_yticklabels([])
ax2.tick_params(axis='both', labelsize=10,width = 1)

max_y = max(ax1.get_ylim()[1],ax2.get_ylim()[1])
ax1.set_ylim(0,max_y)
ax1.set_yticks(np.arange(0,int(max_y+2),2))
ax2.set_ylim(0,max_y)
ax2.set_yticks(np.arange(0,int(max_y+2),2))


plt.tight_layout()
plt.savefig(os.path.join(os.getcwd(),'error_tissues.png'),dpi=600)
plt.savefig(os.path.join(os.getcwd(),'error_tissues.svg'))

The followup questionaire

In [None]:
curr_dir = os.getcwd()
Q_dir = os.path.join(curr_dir,'Questionnaires')
Q_dir = os.path.join(Q_dir,'Followup')
file  = sorted(os.listdir(Q_dir))
dir = os.path.join(Q_dir,file[0])
answers = pd.read_csv(dir);
answers.iloc[0], answers.iloc[1], answers.iloc[2]  =  answers.iloc[2].copy(), answers.iloc[0].copy(), answers.iloc[1]
num_pathologists = answers.shape[0] - 1
num_question = answers.shape[1] - 1
image_features = np.array(np.concatenate((answers.values[:,1:4],answers.values[:,-1].reshape(-1, 1)),axis = 1),dtype=np.float64);
biological_features = np.array(answers.values[:,4:-1],dtype=np.float64);

names = []
for num in range(num_pathologists+1):
    names.append('Pathologist' + str(num+1))
names.append('Combined')

values1 = np.mean(image_features,axis = 1)
se1 = np.std(image_features,axis = 1) / np.sqrt(image_features.shape[1])

values2 = np.mean(biological_features,axis = 1)
se2 = np.std(biological_features,axis = 1)/ np.sqrt(biological_features.shape[1])

values1 = np.append(values1,np.mean(values1))
values2 = np.append(values2,np.mean(values2))
se1 = np.append(se1,np.std(values1)/ np.sqrt(num_pathologists))
se2 = np.append(se2,np.std(values2)/ np.sqrt(num_pathologists))


In [None]:

figsz = ((num_pathologists+1)*3.54,3.54);
fig = plt.figure(figsize=figsz,dpi=600)
font_properties = {'fontname': 'sans-serif', 'fontsize': 12}


bar_width = 0.35
x1 = np.arange(len(names))
x2 = x1 + bar_width

plt.bar(x1, values1,linewidth=1,width= bar_width,label= 'Visual attributes',color = 'tab:cyan')
plt.errorbar(x1, values1,yerr=[np.zeros(len(se1)),se1],fmt='o', capsize=4, color='red',linewidth=0.75)
ax = plt.gca()
ax.set_ylabel("average score",**font_properties)
ax.tick_params(axis='both', labelsize=10,width = 1)



plt.bar(x2, values2,linewidth=1,width= bar_width,label= 'Biological attributes',color = 'tab:gray')
plt.errorbar(x2, values2,yerr=[np.zeros(len(se2)),se2],fmt='o', capsize=4, color='red',linewidth=0.75)

plt.xticks((x1 + x2)/ 2, names)
plt.legend();

plt.tight_layout()
plt.savefig(os.path.join(os.getcwd(),'Q2.png'),dpi=600)
plt.savefig(os.path.join(os.getcwd(),'Q2.svg'))


In [None]:
print(values1)
print(se1)
print(values2)
print(se2)