# Results: Annotator Models

In [None]:
# Imports
from pandas import read_csv, DataFrame
import matplotlib.pyplot as plt
import cv2 as cv
import numpy as np

from os import makedirs
from os.path import join

from nft_helpers import compile_model_results
from nft_helpers.utils import load_yaml, imread
from nft_helpers.interactive import model_bars, confusion_matrices
from nft_helpers.box_and_contours import line_to_xys
from nft_helpers.girder_dsa import login, get_tile_metadata

In [None]:
# Parameters
cf = load_yaml()
save_dir = join(cf.datadir, 'results/annotator-models')
makedirs(save_dir, exist_ok=True)

# Parameters
annotators = models = [
    'novice1', 'novice2', 'novice3', 'expert1', 'expert2', 'expert3', 'expert4',
    'expert5'
]
model_results = compile_model_results(join(cf.datadir, 'models'))

kwargs = {  # plotting
    'errorbar': 'se', 'edgecolor': 'k', 'lw': 3, 'width': 0.5, 'errcolor': 'k', 
    'errwidth': 3, 'capsize': 0.25
}

# Configuration (might delete this cell and moved to imports)
results_dir = join(cf.datadir, 'results')
model_results = compile_model_results(join(cf.datadir, 'models'))

models = [
    'novice1', 'novice2', 'novice3', 'expert1', 'expert2', 'expert3', 'expert4',
    'expert5'
]
datasets = ['val', 'test', 'test-roi', 'test-external-roi']

gc = login(join(cf.dsaURL, 'api/v1'), username=cf.user, password=cf.password)

## Table: Annotations Summary

In [None]:
# Annotations summary table.
ann_df = read_csv('csvs/annotations.csv').fillna('')

# Build the dataframe for the table.
ann_summary_df = []

for annotator in annotators:
    if len(annotator) and annotator != 'novice4':
        # Subet to annotations for this annotator.
        annotator_df = ann_df[ann_df.annotator == annotator]
        
        # Add summary of annotations for this annotator.
        class_counts = annotator_df.label.value_counts()
        
        ann_summary_df.append([
            annotator,
            class_counts['Pre-NFT'] if 'Pre-NFT' in class_counts else 0,
            class_counts['iNFT'] if 'iNFT' in class_counts else 0,
            len(annotator_df.roi_im_path.unique()),
            len(annotator_df.wsi_name.unique()),
            len(annotator_df.case.unique())
        ])
        
ann_summary_df = DataFrame(
    ann_summary_df, 
    columns=['Annotator', 'Pre-NFT\nAnnotations', 'iNFT\nAnnotations', '# ROIs',
             '# WSIs', '# Cases']
)

ann_summary_df.to_csv(join(save_dir, 'annotations-summary.csv'), index=False)
ann_summary_df

## Table: Annotator Model Performance

In [None]:
# Table showing model performance.
perf_df = [
    ['', 'Val', 'Test', 'Emory-Test', 'Val', 'Test', 'Emory-Test', 'Val', 
     'Test', 'Emory-Test']
]

for annotator in annotators:
    ann_results = model_results[model_results.model == annotator]
    
    row = [annotator]
    
    test_df = ann_results[ann_results.dataset == 'test']
    val_df = ann_results[ann_results.dataset == 'val']
    em_df = ann_results[ann_results.dataset == 'test-roi']
    
    # Use lst as a hold-out parameter for adding mean +/ std. dev.
    # Pre-NFT columns.
    lst = val_df[val_df.label == 'Pre-NFT']['F1 score (Pre-NFT)']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    lst = test_df[test_df.label == 'Pre-NFT']['F1 score (Pre-NFT)']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    lst = em_df[em_df.label == 'Pre-NFT']['F1 score (Pre-NFT)']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    # iNFT columns.
    lst = val_df[val_df.label == 'iNFT']['F1 score (iNFT)']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    lst = test_df[test_df.label == 'iNFT']['F1 score (iNFT)']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    lst = em_df[em_df.label == 'iNFT']['F1 score (iNFT)']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    # Macro F1 scores (averages).
    lst = val_df[val_df.label == 'all']['macro F1 score']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    lst = test_df[test_df.label == 'all']['macro F1 score']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    lst = em_df[em_df.label == 'all']['macro F1 score']
    row.append(f'{lst.mean():.2f}' + u' \u00B1 ' + f'{lst.std():.2f}')
    
    perf_df.append(row)

perf_df = DataFrame(
    perf_df, 
    columns=['', 'Pre-NFT F1 Score', '', '', 'iNFT F1 Score', '', '', 
             'Macro F1 Score', '', '']
)
perf_df.to_csv(join(save_dir, 'annotator-model-results.csv'), 
               float_format='%11.2f', index=False)
perf_df

## Model Result Interactive
Allow viewing results with different datasets and metrics for the annotator models.

In [None]:
# Bar plot interactive.
model_bars(
    model_results, 
    datasets=datasets, 
    models=models,
    **kwargs
)

## Confusion Matrices
Allow selecting a dataset and model to show its confusion matrix.

Add a toggle for showing version with Pre-NFT and iNFT grouped into a single class.

In [None]:
# Confusion matrix interactive.
confusion_matrices(model_results, datasets=datasets, models=models)

## Emory Hold-out Dataset Table
For each ROI in the Emory hold-out dataset, add the metadata to a table.

In [None]:
#
holdout_df = []

# ROI metadata
rois_df = read_csv('csvs/labeled-rois.csv')
rois_df = rois_df[rois_df.roi_group == 'test-roi']

# Case metadata
cases_df = read_csv('csvs/cases.csv')
cases_df = cases_df[cases_df.case.isin(rois_df.case.unique())]

i = 1

for _, r in rois_df.iterrows():    
    # Find case metadata.
    case_meta = cases_df[cases_df.case == r.case].iloc[0]
    
    # Calculate the width and height of ROI in microns.
    tile_metadata = get_tile_metadata(gc, r.wsi_id)
    
    w = int(tile_metadata['mm_x'] * r.roi_width * 1000)
    h = int(tile_metadata['mm_y'] * r.roi_height * 1000)
    
    holdout_df.append([
        i,
        r.case,
        r.wsi_name,
        r.Braak_stage,
        r.region,
        w,
        h,
        case_meta.Primary_NP_Dx,
    ])
    
    i += 1

# Compile into table to save.
holdout_df = DataFrame(
    holdout_df,
    columns=['ROI #', 'Case', 'WSI Filename', 'Braak Stage', 'Brain Region',
             'Width (microns)', 'Height (microns)', 
             'Primary Neuropathology Diagnosis'
            ]
)

holdout_df.to_csv(join(save_dir, 'Emory-holdout-metadata.csv'), index=False)
holdout_df

## Average Size of ROIs

In [None]:
# Display the average size of ROIs (in pixels) used in annotator models.
rois_df = read_csv('csvs/labeled-rois.csv')
rois_df = rois_df[rois_df.roi_group.isin((
    'ROIv2', 'ROIv1'
))]

roi_widths = rois_df.roi_width.astype(int)
roi_heights = rois_df.roi_height.astype(int)

print(
    'Average size of ROIs in pixels (width x height): '
    f'{np.mean(roi_widths):.0f} x {np.mean(roi_heights):.0f}'
)

## Average Size of Large ROIs

In [None]:
# Display the average size of ROIs (in pixels) used in annotator models.
rois_df = read_csv('../data/datasets/model-assisted-labeling/rois.csv')
rois_df = rois_df[rois_df.group == 'ROIv3']

# roi_widths = rois_df.roi_width.astype(int)
# roi_heights = rois_df.roi_height.astype(int)

# print(
#     'Average size of ROIs in pixels (width x height): '
#     f'{np.mean(roi_widths):.0f} x {np.mean(roi_heights):.0f}'
# )