# Results: Model Assisted Labeling

In [None]:
# Imports
from nft_helpers.plot import plot_bars
import matplotlib.pyplot as plt
from pandas import read_csv, DataFrame

from os import makedirs
from os.path import join, isfile

from nft_helpers import compile_model_results
from nft_helpers.utils import load_yaml, im_to_txt_path
from nft_helpers.interactive import model_bars, confusion_matrices
from nft_helpers.yolov5.utils import read_yolo_label

# Parameters
cf = load_yaml()
model_results = compile_model_results(join(cf.datadir, 'models'))

kwargs = {
    'errorbar': 'se', 'edgecolor': 'k', 'lw': 3, 'width': 0.5, 'errcolor': 'k', 
    'errwidth': 3, 'capsize': 0.25
}

# Location of model assisted labeling files.
dataset_dir = join(cf.datadir, 'datasets/model-assisted-labeling')

# Location to save results.
save_dir = join(cf.datadir, 'results/model-assisted-labeling')
makedirs(save_dir, exist_ok=True)

COLORS = [f'#{color}' for color in cf.colors]

## Models Trained with *n*-Model Agreement Labels
Large dataset is trained with labels created from agreement of model predictions.

Workflow:
1. Train models on human-annotated datasets (annotator-specific models).
2. Select a large dataset of ROIs without labels.
3. Predict on these unlabeled ROIs with the annotator models
4. Combine model predictions using *n*-consensus* voting
5. Train models with now labeled datasets

\* The *n* can vary from as little as a single model to as stringent as all models must agree.

In [None]:
# Interactive - consensus labeled models only.
models = [
    '1-models-consensus', '2-models-consensus', '3-models-consensus',
    '4-models-consensus', '5-models-consensus', '6-models-consensus',
    '7-models-consensus', '8-models-consensus'
]

datasets = ['val', 'test-roi', 'test-external-roi']

model_bars(
    model_results, 
    datasets=datasets, 
    models=models,
    **kwargs
)

In [None]:
# Tailor the plot for the figure
# Plotting the 8 model consensus models for the Emory test dataset and 
# showing the micro-F1 score.
plot_df = model_results[
    (model_results.label == 'all') & (model_results.dataset == 'test-roi')
]

# Format plot.
kwargs['hatch'] = '/'
kwargs['color'] = '#FFC107'
''
ax = plot_bars(
    plot_df, 
    x_col='model', 
    y_col='macro F1 score',
    order=[
        '1-models-consensus', '2-models-consensus', '3-models-consensus',
        '4-models-consensus', '5-models-consensus', '6-models-consensus',
        '7-models-consensus', '8-models-consensus'
    ],
    x_tick_rotation=0,
    **kwargs
)

for i in ax.containers:
    ax.bar_label(i, fmt='%.2f', padding=20)
        
ax.set_xticklabels(['1', '2', '3', '4', '5', '6', '7', '8'])
plt.xlabel('Number of Models in Consensus', fontsize=18)
plt.title('Performance on Emory Holdout Dataset', fontsize=18, 
          fontweight='bold')
plt.ylim([0, 1])
plt.savefig(
    join(save_dir, 'consensus-model-performance.png'), dpi=300, 
    bbox_inches='tight'
)
plt.show()

## Model Assisted Labeling
Compare to 4-models-consensus, since this is where I started the model-assisted-labeling.

In [None]:
# Interactive for model assisted labeling models.
models = [
    'iteration1', 'iteration2', 'iteration3',
     'iteration4', 'iteration5', 'iteration6', 'iteration7', 'iteration8',
]

datasets = ['test-roi', 'test-external-roi']

model_bars(
    model_results, 
    datasets=datasets, 
    models=models,
    **kwargs
)

In [None]:
# Save the plot of iterations.
plot_df = model_results[
    (model_results.label == 'Pre-NFT') & (model_results.dataset == 'test-roi')
]

# Format plot.
kwargs['hatch'] = cf.hatches[1]
kwargs['color'] = COLORS[1]
''
ax = plot_bars(
    plot_df, 
    x_col='model', 
    y_col='F1 score (iNFT)',
    order=[
        'iteration1', 'iteration2', 'iteration3', 'iteration4', 'iteration5',
        'iteration6', 'iteration7', 'iteration8', 'iteration8-cleaned-only'
    ],
    x_tick_rotation=0,
    **kwargs
)

for i in ax.containers:
    ax.bar_label(i, fmt='%.2f', padding=20)
        
ax.set_xticklabels(['1', '2', '3', '4', '5', '6', '7', '8', '8c'])
plt.xlabel('Iteration Number', fontsize=18)
plt.ylabel('Pre-NFT F1 Score', fontsize=18)
plt.title('Model-assisted Labeling Models', fontsize=18, 
          fontweight='bold')
plt.ylim([0, 1])
# plt.savefig(
#     join(save_dir, 'model-assisted-labeling.png'), dpi=300, 
#     bbox_inches='tight'
# )
plt.show()

## Table {to be determined}: Iteration models

In [None]:
# Create table just for the Emory test dataset.
models = [
    'iteration1', 'iteration2', 'iteration3', 'iteration4', 'iteration5', 
    'iteration6', 'iteration7', 'iteration8', 'iteration8-Amygdala',
    'iteration8-Hippocampus', 'iteration8-Temporal', 'iteration8-Occipital',
    'iteration8-cleaned-only', '2-models-consensus', 
]

model_labels = [
    'iter. 1', 'iter. 2', 'iter. 3', 'iter. 4', 'iter. 5', 
    'iter. 6', 'iter. 7', 'iter. 8', 'amygdala', 'hippocampus', 
    'temporal', 'occipital', 'QC ROIs', 'best consensus', 
]

iteration_df = [[
    '', 'Precision', 'Recall', 'F1 score', 'Precision', 'Recall', 'F1 Score',
    'Macro F1-score'
]]

for i, model in enumerate(models):
    # Subset to this model only.
    model_df = model_results[
        (model_results.model == model) & (model_results.dataset == 'test-roi')
    ]
    
    row = [model_labels[i]]
    
    # Pull out F1 score, precision and recall for both classes
    for cls in ('Pre-NFT', 'iNFT'):
        cls_df = model_df[model_df.label == cls]
        
        scr = cls_df[f'Precision ({cls})']
        row.append(f'{scr.mean():.2f} ± {scr.std():.2f}')
        
        scr = cls_df[f'Recall ({cls})']
        row.append(f'{scr.mean():.2f} ± {scr.std():.2f}')
        
        scr = cls_df[f'F1 score ({cls})']
        row.append(f'{scr.mean():.2f} ± {scr.std():.2f}')
        
    # Add the micro score
    cls_df = model_df[model_df.label == 'all']
    scr = cls_df['macro F1 score']
    row.append(f'{scr.mean():.2f} ± {scr.std():.2f}')
    
    iteration_df.append(row)
        
iteration_df = DataFrame(
    iteration_df, 
    columns=['', 'Pre-NFT', '', '', 'iNFT', '', '', '']
)
iteration_df.to_csv(join(save_dir, 'iteration-models.csv'), index=False)
iteration_df

## Model Assisted Labeling Time Plot

In [None]:
# Read the ROIs used during model assisted labeling.
mal_df = read_csv('/workspace/data/datasets/model-assisted-labeling/model-assisted-labeling.csv')
mal_df.iloc[0].fp

In [None]:
# Plot performance (micro F1-score) for the iteration models in the same plot
# as time.
xs = range(1, 9)

# Get model performance
y1 = []
y2 = []

for x in xs:  
    y1.append([
        model_results[
            (model_results.model == f'iteration{x}') & \
            (model_results.dataset == 'test-roi') & \
            (model_results.label == 'all')
        ]['micro F1 score'].mean()
    ])
    
    with open(join(dataset_dir, f'timer-logs/{x}.txt'), 'r') as fh:
        t = 0
        
        for line in fh.readlines():
            if len(line):
                t += int(line.strip())
                
        # Convert to minutes
#         t = int(t / 60)
        t = int(t)
        
    # Get ROIs for this iteration.
    iter_df = mal_df[mal_df.iteration == x]
    
    n = 0
    
    for _, r in iter_df.iterrows():
        label_fp = im_to_txt_path(r.fp)
        
        if isfile(label_fp):
            boxes = read_yolo_label(label_fp)
            
            n += len(boxes)
    
    y2.append(t / n)
        
fig, ax1 = plt.subplots()
ax1.plot(xs, y1, c=f'#{cf.colors[0]}', marker='o')
plt.yticks(fontweight='bold', fontsize=16)
ax2 = ax1.twinx()
ax2.plot(xs, y2, c=f'#{cf.colors[1]}', marker='^')
plt.yticks(fontweight='bold', fontsize=16)
fig.tight_layout()
ax1.legend(['Micro F1 Score'], bbox_to_anchor=(0.5, -0.32, 0.5, 0.5))
ax2.legend(['Iteration Time'], loc='lower right')
ax1.set_ylabel('Micro F1 Score', fontsize=18, fontweight='bold')
ax2.set_ylabel('Time for each object (seconds)', fontsize=18, fontweight='bold')
ax1.set_xlabel(
    'Model-assisted Labeling Iteration', fontsize=18, fontweight='bold')
plt.title('Model performance & Iteration Time', fontsize=18, fontweight='bold')
ax1.spines['right'].set_linewidth(3)
ax1.spines['top'].set_visible(False)
ax1.spines['bottom'].set_linewidth(3)
ax1.spines['left'].set_linewidth(3)
ax2.spines['right'].set_linewidth(3)
ax2.spines['top'].set_visible(False)
ax2.spines['bottom'].set_linewidth(3)
ax2.spines['left'].set_linewidth(3)
ax1.tick_params(axis='both', which='both', direction='out', length=10, width=3)
ax2.tick_params(axis='both', which='both', direction='out', length=10, width=3)
plt.savefig(
    join(save_dir, 'iteration-performance-and-time.png'), dpi=300, 
    bbox_inches='tight'
)
plt.show()

## Compare annotator models against best set of models

In [None]:
#
models = [
    'novice1', 'novice2', 'novice3', 'expert1', 'expert2', 'expert3', 'expert4',
    'expert5', '2-models-consensus', '4-models-consensus', 'iteration8', 
    'iteration8-cleaned-only'
]

datasets3 = ['test-roi', 'test-external-roi']

model_bars(
    model_results, 
    datasets=datasets, 
    models=models,
    **kwargs
)

## Confusion Matrices

In [None]:
# Interactive of confusion matrices.
datasets = ['test-roi', 'test-external-roi']
models = sorted(list(model_results.model.unique()))

confusion_matrices(model_results, datasets=datasets, models=models)

## Extras

## Model trained with only cleaned up ROIs

In [None]:
models = [
    'iteration8', 'iteration8-cleaned-only', '2-models-consensus'
]

datasets3 = ['test-roi', 'test-external-roi']

model_bars(
    model_results, 
    datasets=datasets, 
    models=models,
    **kwargs
)

### Models Trained on Region Datasets

In [None]:
models = [
    'iteration8', 'iteration8-Hippocampus', 'iteration8-Amygdala', 
    'iteration8-Temporal', 'iteration8-Occipital'
]

datasets3 = ['test-roi', 'test-external-roi']

model_bars(
    model_results, 
    datasets=datasets, 
    models=models,
    **kwargs
)

## Models with Additional Background ROIs

In [None]:
models = [
    'iteration8-cleaned-only', 'additional-background-rois'
]

datasets = ['val', 'test-roi', 'test-external-roi']

model_bars(
    model_results, 
    datasets=datasets, 
    models=models,
    **kwargs
)

## Choose Which Models to Plot

In [None]:
models = [
    'iteration8-cleaned-only', 'additional-background-rois', 'inter-annotator-agreement'
]

datasets = ['val', 'test-roi', 'test-external-roi']

model_bars(
    model_results, 
    datasets=datasets, 
    models=models,
    **kwargs
)