# Introduction

This notebook creates plots that summarise results over all experiments and all knock-out types. The available metrics to compare are:

- Speed summary 
- Mask proportion summary

HTML files containing the interactive plots are saved in the directory of this notebook (`analysis/hbec/`) and can be shared:

- right-click the HTML file in the JupyterLab file explorer and click download
- the HTML file will open in the current browser, after which one can save it using `ctrl+s`
- this will save both the HTML file and a corresponding folder containing vega files
- when sharing the HTML file, one must also share the fodler containing the vega files 


**Note that when a new experiment has been added, an experiment abbreviation needs to be added to the function `generate_mask_and_speed_summary_df` in `/src/fam13a/utils.py`. This will keep the labels in the plots shorter.**

# Imports

In [None]:
import numpy as np
import pandas as pd
import scipy as sp
import os
import re
import cv2
import json

from matplotlib import pyplot as plt
from tqdm import tqdm
from fam13a import utils
from skimage.morphology import erosion, disk, closing

import altair as alt
from altair_saver import save

# Constants

In [None]:
PROJ_ROOT = utils.here()
# declare the data input directory
HBEC_ROOT = os.path.join(PROJ_ROOT, 'data', 'processed', 'hbec')
# print list of experiment IDs
experiment_list = os.listdir(HBEC_ROOT); experiment_list
# set ncpus to the number of experiments in the experiment list - make
# sure this value is less than the number of cpus in the node
NCPUS = 11

# Create Summary DF

In [None]:
summary_df, zero_mask_df = utils.generate_summary_df_for_all_experiments(experiment_list, NCPUS)

In [None]:
zero_mask_df

The `zero_mask_df` contains a list of data points that will be missing on the speed plots below. If the mask-type is 1_motion, then there will be no average speed value for the data point for both the motion mask and coordination mask. If the mask_type is 2_coordination, then there will be no average speed value for the data point for just the coordination mask

# Visualise Mask Summary Stats (Coordination vs Segmentation)

In [None]:
mask_summary_df = summary_df.copy().drop(columns=['speed'])
mask_summary_df = mask_summary_df.drop_duplicates()

std = mask_summary_df.groupby(
    ['group_id','mask_type']
).mask_ratio.std().reset_index().rename(columns={'mask_ratio':'std'})
mean = mask_summary_df.groupby(
    ['group_id','mask_type']
).mask_ratio.mean().reset_index().rename(columns={'mask_ratio':'mean'})

mask_agg_df = pd.merge(mean, std, on = ['mask_type','group_id'], how='inner');

std = mask_summary_df.groupby(
    ['group_id','mask_type', 'batch_id']
).mask_ratio.std().reset_index().rename(columns={'mask_ratio':'std'})
mean = mask_summary_df.groupby(
    ['group_id','mask_type', 'batch_id']
).mask_ratio.mean().reset_index().rename(columns={'mask_ratio':'mean'})

mask_agg_df_per_group_per_batch = pd.merge(mean, std, on = ['mask_type', 'group_id', 'batch_id'], how='inner');

In [None]:
normalised_mask_summary_df = mask_summary_df.copy()
for exp in normalised_mask_summary_df.experiment.unique(): 
    nt_average = np.mean(
        normalised_mask_summary_df.loc[
            (normalised_mask_summary_df.mask_type=='1_motion') &
            (normalised_mask_summary_df.group_id.str.contains('NT')) &
            (normalised_mask_summary_df.experiment == exp)
            , 'mask_ratio'
        ])
    normalised_mask_summary_df.loc[
        (normalised_mask_summary_df.mask_type=='1_motion') &
        (normalised_mask_summary_df.experiment == exp)
        , 'mask_ratio'
    ] = normalised_mask_summary_df.loc[
        (normalised_mask_summary_df.mask_type=='1_motion') &
        (normalised_mask_summary_df.experiment == exp)
        , 'mask_ratio'
    ]/nt_average

In [None]:
def produce_porportion_summary_plot(df, x_axis_label):
    base = alt.Chart(
        df
    ).properties(
        width=600,
        height=250
    )
    colorPalette = 'dark2'

    selection_mean = alt.selection_multi(fields=['experiment'], bind='legend')

    points = base.mark_square(filled=False, color='black',size=300).encode(
        x=alt.X('mask_ratio:Q', aggregate='mean', axis=alt.Axis(title=x_axis_label)),
        y=alt.Y('group_id:N', axis=alt.Axis(title='experiment'), sort=knockout_order),
        color=alt.Color('experiment', scale=alt.Scale(scheme=colorPalette), sort=exp_order),
        opacity=alt.condition(selection_mean, alt.value(1), alt.value(0.2))
    ).add_selection(
        selection_mean
    ).interactive()

    selection = alt.selection_multi(fields=['experiment'], bind='legend')

    all_points = base.mark_square(size=25, filled=True).encode(
        y=alt.Y('group_id:N', axis=alt.Axis(title='experiment'), sort=knockout_order),
        x=alt.X('mask_ratio:Q'),
        color=alt.Color('experiment', scale=alt.Scale(scheme=colorPalette), sort=exp_order),
        tooltip='batch_id:N',
        opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
    ).add_selection(
        selection
    ).interactive()
    
    mean_error_bars = base.mark_errorbar(extent='stdev').encode(
      x=alt.X('mask_ratio:Q'),
      y=alt.Y('group_id:N', sort=knockout_order),
        color=alt.Color('experiment', scale=alt.Scale(scheme=colorPalette), sort=exp_order),
        opacity=alt.condition(selection_mean, alt.value(1), alt.value(0.2))
    ).add_selection(
        selection_mean
    ).interactive()

    visualise_chart = (points + all_points + mean_error_bars)

    return visualise_chart

In [None]:
a = produce_porportion_summary_plot(mask_summary_df.loc[mask_summary_df.mask_type == '1_motion'], 'motion mask')
b = produce_porportion_summary_plot(normalised_mask_summary_df.loc[normalised_mask_summary_df.mask_type == '1_motion'], 'motion mask (NT normalised)')
c = produce_porportion_summary_plot(mask_summary_df.loc[mask_summary_df.mask_type == '2_coordination'], 'coordinated mask')

In [None]:
visualise_chart = ((a | b) & c).configure_axis(
    labelFontSize=18,
    titleFontSize=18
).configure_legend(
    labelFontSize = 24
).configure_header(
    labelFontSize=24
)

visualise_chart.save('mask-proportion-summary-across-all-experiments.html')
visualise_chart

In [None]:
vc = ((a | b)).configure_axis(
    labelFontSize=18,
    titleFontSize=18
).configure_legend(
    labelFontSize = 24
).configure_header(
    labelFontSize=24
); vc
# save csv files
mask_summary_df.to_csv('mask-proportion-summary-across-all-experiments.csv') 
normalised_mask_summary_df.to_csv('mask-proportion-summary-across-all-experiments-normalised.csv') 
vc = ((a | b)).configure_axis(
    labelFontSize=18,
    titleFontSize=18
).configure_legend(
    labelFontSize = 24
).configure_header(
    labelFontSize=24
); vc