# Results: Braak Stage Inter-rater Agreement Analysis

In [None]:
# imports
from pandas import read_csv, DataFrame
from itertools import combinations
from sklearn.metrics import cohen_kappa_score
import numpy as np
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

from os import makedirs
from os.path import join

from nft_helpers.girder_dsa import login
from nft_helpers.utils import load_yaml
from nft_helpers.plot import format_plot_edges, plot_histogram

cf = load_yaml()
save_dir = join(cf.datadir, 'results/inter-rater-agreement')
makedirs(save_dir, exist_ok='True')
random_state = 68

In [None]:
# Authenticate girder client.
gc = login(join(cf.dsaURL, 'api/v1'), username=cf.user, password=cf.password)

## Dataset Overview
52 Emory cases with regions: hippocampus*, amygdala, and the temporal & occipital cortex.

\* 2 cases had a left and right hippocampus WSI.

In [None]:
# Get metadata for cases in this cohort.
cases_df = read_csv('csvs/cases.csv').sort_values(by='case')
cases_df = cases_df[cases_df.cohort == 'Inference-Cohort-1'].reset_index(
    drop=True)

# Rename race for publication.
cases_df = cases_df.replace({'Caucasian': 'White', 
                             'Black / African American': 'African American'})

# Need to get missing metadata by calling the DSA metadata.
wsi_df = read_csv('csvs/wsis.csv')
wsi_df = wsi_df[wsi_df.cohort == 'Annotated-Cohort']

demographics = []

for i, r in cases_df.iterrows():
    age_at_death = int(r.age_at_death) if int(r.age_at_death) < 90 else '90+'
    
    # Get missing metadata
    meta = gc.getItem(wsi_df[wsi_df.case == r.case].iloc[0].parent_id)['meta']
    
    demographics.append([
        i+1,
        age_at_death,
        'F' if r.sex == 'female' else 'M',
        r.race,
        r.Primary_NP_Dx,
        r.Braak_stage,
        int(r.Thal),
        int(meta['CERAD']),
        int(r.ABC)
    ])
    
demographics = DataFrame(
    demographics, 
    columns=[
        'case', 'Age at Death', 'Sex', 'Race/Ethnicity', 
        'Primary NP Diagnosis', 'Braak Stage', 'Thal', 'CERAD NP Score', 'ABC'
    ]
)
demographics.to_csv(join(save_dir, 'demographics.csv'), index=False)
demographics.head()

## Braak Stage Inter-rater Analysis 
Following the methods described in Montine *et. al.* 2016, Alzheimers Dement.

In [None]:
# Histogram of pair kappas.
# Compile the Braak stage for each case by the raters.
stages = []
raters = sorted(list(
    wsi_df[wsi_df.annotator.str.contains('expert')].annotator.unique()
))
cases = list(sorted(wsi_df.case.unique()))

for i, case in enumerate(cases):
    row = [i+1]
    
    for rater in raters:
        row.append(int(wsi_df[(wsi_df.case == case) & \
                          (wsi_df.annotator == rater)].iloc[0].Braak_stage))
        
    stages.append(row)
    
# Index: cases
# Columns: experts
# Values: Braak stages
stages = DataFrame(stages, index=cases, columns=['case'] + raters)
stages.to_csv(join(save_dir, 'Braak-stages.csv'), index=False)

# Pair kappa histogram.
pair_kappas = []

for pair in combinations(raters, 2):
    rater1, rater2 = sorted(pair)
    
    # Calculate the weighted kappa.
    pair_kappas.append([
        f'{rater1}-{rater2}',
        cohen_kappa_score(stages[rater1], stages[rater2], weights='quadratic')]
    )

pair_kappas = DataFrame(pair_kappas, columns=['Raters', 'k'])
    
kwargs = {'binwidth': 0.050, 'binrange': [0, 1.], 'edgecolor': 'k', 
          'color': 'w', 'linewidth': 2}

ax = plot_histogram(
    pair_kappas.k, 
    spine_lw=2, 
    x_label='Paired Kappas', 
    title='Braak Stage',
    y_freq=1,
    **kwargs
)
plt.xlim([0, 1.])
plt.savefig(join(save_dir, 'pair-kappas-hist.png'), bbox_inches='tight', 
            dpi=300)
plt.show()
display(stages.head())
display(pair_kappas.head())

In [None]:
# Perform jackknife approach on the raters.
jk_raters = []

for rater in raters:
    jk_kappas = []
    
    for pair in combinations([r for r in raters if r != rater], 2):
        jk_kappas.append([
            cohen_kappa_score(stages[pair[0]], stages[pair[1]], 
                              weights='quadratic')
        ])
        
    jk_raters.append([rater, np.mean(jk_kappas)])

jk_raters = DataFrame(jk_raters, columns=['Rater Removed', 'Average Kappa'])
jk_raters.to_csv(join(save_dir, 'jackknife-remove-rater.csv'), index=False)
jk_raters

In [None]:
# Bootstrap approach to calculate 95% CI.
np.random.seed(random_state)
bs_mean_kappas = []

for _ in tqdm(range(1000)):
    # Sample the cases with replacement.
    bs_stages = stages.sample(frac=1, replace=True)
    
    bs_kappas = []
    
    for pair in combinations(raters, 2):
        bs_kappas.append(cohen_kappa_score(
            bs_stages[pair[0]], bs_stages[pair[1]], weights='quadratic'
        ))
        
    bs_mean_kappas.append(np.mean(bs_kappas))
    
low_ci, high_ci = np.percentile(sorted(bs_mean_kappas),[2.5,97.5])

In [None]:
# For each case plot a vertical line based on the stages provideb by raters.
# Add average stage column.
stages['Braak Stage'] = stages[raters].mean(axis=1)
stages = stages.sort_values(by='Braak Stage').reset_index(drop=True)

scatter_map = {1: '.', 2: 'v', 3: 's', 4: 'P', 5: 'X'}

# Plot a scatter plot with vertical lines.
plt.figure(figsize=(10,5))

data = {}

for i, r in stages.iterrows():
    # Get the minium and maximum stage given for this case.
    for stage, counts in r[raters].value_counts().items():
        counts = int(counts)
        
        if counts not in data:
            data[counts] = {'x': [], 'y': []}
            
        data[counts]['x'].append(i+1)
        data[counts]['y'].append(stage)
        
counts = sorted(list(data.keys()))
for count in counts:
    x, y = data[count]['x'], data[count]['y']
    
    plt.scatter(x, y, marker=scatter_map[count], s=50, 
                c=f'#{cf.colors[count-1]}')
    
plt.legend(['1', '2', '3', '4', '5'], fontsize=16, title='Rater Agreement (Count)',
           title_fontsize=16)

for i, r in stages.iterrows():
    min_y = int(r[raters].min())
    max_y = int(r[raters].max())
    
    if min_y != max_y:
        plt.vlines(i+1, min_y, max_y, color='k', alpha=0.5, zorder=0, 
                   linestyles='dashed')
    
ax = plt.gca()
format_plot_edges(ax)
ax.tick_params(axis='both', which='both', direction='out', length=10, 
                width=3)
plt.xlim([0, len(stages)])

plt.xticks(np.arange(1, len(stages)+1), stages.case.tolist(), fontsize=10, 
           rotation=270)

plt.yticks(
    ticks=np.arange(0, 7), 
    labels=('0', 'I', 'II', 'III', 'IV', 'V', 'VI'), 
    fontsize=18,
)
plt.ylabel('Braak Stage', fontsize=18)
plt.xlabel('Case', fontsize=18)
plt.title('Braak Stages', fontsize=18)
# plt.savefig(join(save_dir, 'Braak-stages.png'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
# Add statistics on pair kappas.
results = 'Braak Stage Inter-rater Analysis\n'  # Keep track of results.
results += '-' * (len(results) - 1) + '\n\n'
results += "Weighted Cohen's Kappas between pair of raters:\n"
results += f'   - Average: {np.mean(pair_kappas.k):.4f} (range: ' + \
           f'{min(pair_kappas.k):.4f} - {max(pair_kappas.k):.4f})\n'
results += f'   - 95% CI: {low_ci:.4f} - {high_ci:.4f}  (1000 bootstraps)'

with open(join(save_dir, 'inter-rater-agreement.txt'), 'w') as fh:
    fh.write(results)
    
print(results)