In [1]:
import sqlite3
from scipy import stats
import numpy as np

In [2]:
algos = ['Deep Lift Shape', 
        'Focused Attention',
        'GradCAM',
        'Guided GradCAM',
        'Integrated Gradients',
        'LRP',
        'Attention Rollout',
        'ViT LRP']

#filters = ['LRP', 'GradCAM', 'Focused Attention','Attention Rollout', 'ViT LRP']
filters = ['LRP', 'GradCAM','Attention Rollout', 'ViT LRP', 'Focused Attention']
id_experts = [3,6,7,8]

In [3]:
from bokeh.io import output_file, show, output_notebook
from bokeh.models import ColumnDataSource
import math
from bokeh.palettes import Spectral8, Spectral5
from bokeh.plotting import figure

def plot_histogram(values, labels, title, filter_labels=None, show_value=True):
    if filter_labels:
        indexes = []
        values = np.asarray(values)
        for f in filter_labels:
            indexes.append(labels.index(f))
        indexes = np.asarray(indexes)
        values = values[indexes]
        labels = filter_labels
    source = ColumnDataSource(data=dict(labels=labels, 
                                        y=values, 
                                        text_values = ['%.2f'%v for v in values],
                                        color=Spectral5))
    
    p = figure(x_range=labels, y_range=(0,5), height=400, title=title)
    p.vbar(x='labels', top='y', width=0.9, color='color', source=source)
    
    if show_value:
        
        p.text(x='labels', 
               y='y',
               x_offset=-10,
               text='text_values', source=source)
    
    p.xgrid.grid_line_color = None
    p.legend.orientation = "horizontal"
    p.legend.location = "top_center"
    output_notebook()
    show(p)

def get_result(expert_id):
    con = sqlite3.connect('database_deployed_4.db')
    cur = con.cursor()
    rows = []
    for row in cur.execute(f'''SELECT * FROM revisions WHERE expert_id=={expert_id} ORDER BY data_id'''):
        row = [int(r) for r in row[3:]]
        rows.append(row)
    score = np.asarray(rows)
    if expert_id==8:
        score = np.clip(score+1, 1, 5)
    return score
def plot_result(expert_id, fundus_plot=True, oct_plot=True):
    results = get_result(expert_id)
    if oct_plot:
        plot_histogram(results[27:].mean(0).tolist(), algos, f'OCT-Expert {expert_id}', filters)
    if fundus_plot:
        plot_histogram(results[:27].mean(0).tolist(), algos, f'Fundus-Expert {expert_id}', filters)

for i in id_experts:
    plot_result(i, oct_plot=False)


You are attempting to set `plot.legend.orientation` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.

You are attempting to set `plot.legend.location` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.



In [4]:
for i in id_experts:
    plot_result(i, fundus_plot=False)

In [5]:
all_scores = np.asarray([get_result(i) for i in id_experts])

In [6]:
mean_score = all_scores.mean(0)
plot_histogram(mean_score[27:].mean(0).tolist(), algos, None, filters)
plot_histogram(mean_score[:27].mean(0).tolist(), algos, None, filters)

In [7]:
mean_score.shape

(177, 8)

In [8]:
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import factor_cmap

disease = ['CNV', 'Drusen', 'DME']
values = mean_score
indexes = []
values = np.asarray(values)
for f in filters:
    indexes.append(algos.index(f))
indexes = np.asarray(indexes)
values = values[:, indexes]
labels = filters

data = {l:[values[27:77, i].mean(), values[77:127, i].mean(), values[127:177, i].mean()] for i, l in enumerate(labels)}
data['disease'] = disease
data

x = [ (d, algo) for d in disease for algo in labels ]
counts = sum(zip(*[data[l] for l in labels]), ()) # like an hstack
source = ColumnDataSource(data=dict(x=x, counts=counts, text_values = ['%.2f'%c for c in counts]))

p = figure(x_range=FactorRange(*x), height=450, y_range=(0,5))

p.vbar(x='x', top='counts', width=0.9, source=source, line_color=None,
      fill_color=factor_cmap('x', palette=Spectral5, factors=labels, start=1, end=2))

p.text(x='x', y='counts', text='text_values', source=source, x_offset=-12, text_font_size={'value': '12px'})

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

show(p)

In [9]:
values.shape

(177, 5)

In [10]:
results = np.asarray(mean_score)
indexes_to_keep = np.asarray([algos.index(f) for f in filters])
results = results[:, indexes_to_keep]
groups = [results[:, i] for i in range(results.shape[1])]

stats.friedmanchisquare(*groups)

FriedmanchisquareResult(statistic=358.9143876337695, pvalue=2.0849958126789935e-76)

In [11]:
def build_fleiss_kappa_matrix(scores):
    """
    :param scores: Tensor of shape (:attr:`E`, :attr:`N`, :attr:`A`) where `E` is the number of experts, `N` the number of images `A` the number of algorithms.\
    We build a matrix M (:attr:`N`, :attr:`E`) for the fless_kappa where `M[i, j]` 
    represent the number of raters who chose the `i`th image to be the best categorized by `j`th category.
    """
    
    
    return M

def fleiss_kappa(M):
    """
    See `Fleiss' Kappa <https://en.wikipedia.org/wiki/Fleiss%27_kappa>`_.
    :param M: a matrix of shape (:attr:`N`, :attr:`k`) where `N` is the number of subjects and `k` 
    is the number of categories into which assignments are made. `M[i, j]` 
    represent the number of raters who assigned the `i`th subject to the `j`th category.
    :type M: numpy matrix
    """
    N, k = M.shape  # N is # of items, k is # of categories
    n_annotators = float(np.sum(M[0, :]))  # # of annotators
    p = np.sum(M, axis=0) / (N * n_annotators)
    P = (np.sum(M * M, axis=1) - n_annotators) / (n_annotators * (n_annotators - 1))
    Pbar = np.sum(P) / N
    PbarE = np.sum(p * p)

    kappa = (Pbar - PbarE) / (1 - PbarE)

    return kappa



In [12]:
def score_to_rank(scores):
    e, n, k = scores.shape
    scores = scores.copy().reshape(e*n, k)
    max_scores = np.amax(scores, 1, keepdims=True)
    scores = scores== max_scores
    return scores.reshape(e, n, k).astype(np.uint8)
max_scores = score_to_rank(all_scores)

max_scores.shape

(4, 177, 8)

In [13]:
import krippendorff as kp

for i, alg in enumerate(algos):
    score = kp.alpha(reliability_data=max_scores[:,:,i], level_of_measurement="nominal")
    print(f'Algos: {alg}, Score: {score}')

Algos: Deep Lift Shape, Score: 0.04447005714834551
Algos: Focused Attention, Score: 0.06743890447753975
Algos: GradCAM, Score: -0.019692333312516963
Algos: Guided GradCAM, Score: 0.04014184397163134
Algos: Integrated Gradients, Score: -0.00826416060262547
Algos: LRP, Score: 0.05158867042275117
Algos: Attention Rollout, Score: 0.008372680649685105
Algos: ViT LRP, Score: -0.0008122157244963013


In [14]:
import itertools
from sklearn.metrics import cohen_kappa_score
dices = []
alphas = []
for p in itertools.combinations(range(4), 2):
    x1 = all_scores[p[0]]
    x2 = all_scores[p[1]]
    dice = np.sum(x1[x2==1])*2.0 / (np.sum(x1) + np.sum(x2))
    dices.append(dice)
np.mean(dices)

0.30702716668051205

In [15]:
e, u, a = all_scores.shape
M = all_scores.reshape(e, u*a)
for pair in itertools.combinations(range(4), 2):
    score = kp.alpha(reliability_data=M[pair, :])
    print(f"Pair: {pair}, score: {score}")

Pair: (0, 1), score: 0.30596006669452425
Pair: (0, 2), score: 0.3230255205159652
Pair: (0, 3), score: 0.22971817692405827
Pair: (1, 2), score: 0.6673233155060422
Pair: (1, 3), score: 0.4994664819065008
Pair: (2, 3), score: 0.44208148559362515


In [16]:
kp.alpha(reliability_data=M)

0.4069195035140569