In [None]:
# Setting options for the plots
%matplotlib inline
%config InlineBackend.figure_formats={'retina', 'svg'}
%config InlineBackend.rc={'savefig.dpi': 150}

# Experiment Report 

In [None]:
import itertools
import os
import re
import pickle
import platform
import time
import warnings

from functools import partial
from os.path import abspath, exists, join

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
from matplotlib import pyplot as plt

from IPython import sys_info
from IPython.display import display, HTML, Image, Javascript, Markdown, SVG
from rsmtool.reader import DataReader
from rsmtool.writer import DataWriter
from rsmtool.utils import (float_format_func,
                           int_or_float_format_func,
                           compute_subgroup_plot_params,
                           bold_highlighter,
                           color_highlighter,
                           show_thumbnail)

from rsmtool.version import VERSION as rsmtool_version

with warnings.catch_warnings():
    warnings.filterwarnings('ignore', category=FutureWarning)
    import statsmodels.api as sm

sns.set_context('notebook')

<style type="text/css">
  div.prompt.output_prompt { 
    color: white; 
  }
  
  span.highlight_color {
    color: red;
  }
  
  span.highlight_bold {
    font-weight: bold;  
  }
    
  @media print {
    @page {
      size: landscape;
      margin: 0cm 0cm 0cm 0cm;
    }

    * {
      margin: 0px;
      padding: 0px;
    }

    #toc {
      display: none;
    }

    span.highlight_color, span.highlight_bold {
        font-weight: bolder;
        text-decoration: underline;
    }

    div.prompt.output_prompt {
      display: none;
    }
    
    h3#Python-packages, div#packages {
      display: none;
  }
</style>

In [None]:
# NOTE: you will need to set the following manually
# if you are using this notebook interactively.
experiment_id = os.environ.get('EXPERIMENT_ID')
description = os.environ.get('DESCRIPTION')
context = os.environ.get('CONTEXT')
train_file_location = os.environ.get('TRAIN_FILE_LOCATION')
test_file_location = os.environ.get('TEST_FILE_LOCATION')
output_dir = os.environ.get('OUTPUT_DIR')
figure_dir = os.environ.get('FIGURE_DIR')
model_name = os.environ.get('MODEL_NAME')
model_type = os.environ.get('MODEL_TYPE')
skll_objective = os.environ.get('SKLL_OBJECTIVE')
file_format = os.environ.get('FILE_FORMAT')
length_column = os.environ.get('LENGTH_COLUMN')
second_human_score_column = os.environ.get('H2_COLUMN')
scaled = os.environ.get('SCALED')
standardize_features = os.environ.get('STANDARDIZE_FEATURES') == '1'
use_scaled_predictions = scaled == '1'
exclude_zero_scores = os.environ.get('EXCLUDE_ZEROS') == '1'
feature_subset_file = os.environ.get('FEATURE_SUBSET_FILE')
min_items = int(os.environ.get('MIN_ITEMS'))
use_thumbnails = os.environ.get('USE_THUMBNAILS') == '1'
predict_expected_scores = os.environ.get('PREDICT_EXPECTED_SCORES') == '1'


# groups for analysis by prompt or subgroup.
# set to 'prompt' for the standard analysis of 'prompt%%subgroup1%%subgroup2' for subgroup analysis.
groups_desc_string = os.environ.get('GROUPS_FOR_DESCRIPTIVES') 
groups_desc = groups_desc_string.split('%%')
groups_eval_string = os.environ.get('GROUPS_FOR_EVALUATIONS') 
groups_eval = groups_eval_string.split('%%')

# javascript path
javascript_path = os.environ.get("JAVASCRIPT_PATH")

In [None]:
# initialize counter for thumbnail IDs
id_generator = itertools.count(1)

In [None]:
with open(join(javascript_path, "sort.js"), "r", encoding="utf-8") as sortf:
    display(Javascript(data=sortf.read()))

In [None]:
Markdown('''This report presents the analysis for **{}**: {}'''.format(experiment_id, description))

In [None]:
markdown_str = ''
if use_thumbnails:
    markdown_str += ("""\n  - Images in this report have been converted to """
                     """clickable thumbnails.""")
if predict_expected_scores:
    markdown_str += ("""\n  - Predictions analyzed in this report are *expected scores*, """
                     """i.e., probability-weighted averages over all score points.""")

if markdown_str:
    markdown_str = '**Notes**:' + markdown_str
    display(Markdown(markdown_str))

In [None]:
HTML(time.strftime('%c'))

In [None]:
%%html
<div id="toc"></div>

In [None]:
# Read in the training and testing features, both raw and pre-processed
# Make sure that the `spkitemid` and `candidate` columns are read as strings 
# to preserve any leading zeros
# We filter DtypeWarnings that pop up mostly in very large files

string_columns = ['spkitemid', 'candidate']
converter_dict = {column: str for column in string_columns}

with warnings.catch_warnings():
    warnings.filterwarnings('ignore', category=pd.io.common.DtypeWarning)
    if exists(train_file_location):
        df_train_orig = DataReader.read_from_file(train_file_location)

    train_file = join(output_dir, '{}_train_features.{}'.format(experiment_id,
                                                                file_format))
    if exists(train_file):
        df_train = DataReader.read_from_file(train_file, converters=converter_dict)

    train_metadata_file = join(output_dir, '{}_train_metadata.{}'.format(experiment_id,
                                                                         file_format))    
    if exists(train_metadata_file):
        df_train_metadata = DataReader.read_from_file(train_metadata_file, converters=converter_dict)

    train_other_columns_file = join(output_dir, '{}_train_other_columns.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(train_other_columns_file):
        df_train_other_columns = DataReader.read_from_file(train_other_columns_file, converters=converter_dict)

    train_length_file = join(output_dir, '{}_train_response_lengths.{}'.format(experiment_id,
                                                                               file_format))
    if exists(train_length_file):
        df_train_length = DataReader.read_from_file(train_length_file, converters=converter_dict)

    train_excluded_file = join(output_dir, '{}_train_excluded_responses.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(train_excluded_file):
        df_train_excluded = DataReader.read_from_file(train_excluded_file, converters=converter_dict)

    train_responses_with_excluded_flags_file = join(output_dir, '{}_train_responses_with_excluded_flags.{}'.format(experiment_id,
                                                                                                                   file_format))
    if exists(train_responses_with_excluded_flags_file):
        df_train_responses_with_excluded_flags = DataReader.read_from_file(train_responses_with_excluded_flags_file,
                                                                           converters=converter_dict)

    train_preproc_file = join(output_dir, '{}_train_preprocessed_features.{}'.format(experiment_id,
                                                                                     file_format))    
    if exists(train_preproc_file):
        df_train_preproc = DataReader.read_from_file(train_preproc_file, converters=converter_dict)

    if exists(test_file_location):
        df_test_orig = DataReader.read_from_file(test_file_location)

    test_file = join(output_dir, '{}_test_features.{}'.format(experiment_id,
                                                              file_format))
    if exists(test_file):
        df_test = DataReader.read_from_file(test_file, converters=converter_dict)

    test_metadata_file = join(output_dir, '{}_test_metadata.{}'.format(experiment_id,
                                                                       file_format))    
    if exists(test_metadata_file):
        df_test_metadata = DataReader.read_from_file(test_metadata_file, converters=converter_dict)

    test_other_columns_file = join(output_dir, '{}_test_other_columns.{}'.format(experiment_id,
                                                                                 file_format))
    if exists(test_other_columns_file):
        df_test_other_columns = DataReader.read_from_file(test_other_columns_file, converters=converter_dict)

    test_human_scores_file = join(output_dir, '{}_test_human_scores.{}'.format(experiment_id,
                                                                               file_format))
    if exists(test_human_scores_file):
        df_test_human_scores = DataReader.read_from_file(test_human_scores_file, converters=converter_dict)

    test_excluded_file = join(output_dir, '{}_test_excluded_responses.{}'.format(experiment_id,
                                                                                 file_format))
    if exists(test_excluded_file):
        df_test_excluded = DataReader.read_from_file(test_excluded_file, converters=converter_dict)

    test_responses_with_excluded_flags_file = join(output_dir, '{}_test_responses_with_excluded_flags.{}'.format(experiment_id,
                                                                                                                 file_format))
    if exists(test_responses_with_excluded_flags_file):
        df_test_responses_with_excluded_flags = DataReader.read_from_file(test_responses_with_excluded_flags_file,
                                                                          converters=converter_dict)

    test_preproc_file = join(output_dir, '{}_test_preprocessed_features.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(test_preproc_file):
        df_test_preproc = DataReader.read_from_file(test_preproc_file, converters=converter_dict)

    pred_preproc_file = join(output_dir, '{}_pred_processed.{}'.format(experiment_id,
                                                                       file_format))
    if exists(pred_preproc_file):
        df_pred_preproc = DataReader.read_from_file(pred_preproc_file, converters=converter_dict)

    feature_file = join(output_dir, '{}_feature.{}'.format(experiment_id,
                                                           file_format))
    if exists(feature_file):
        df_features = DataReader.read_from_file(feature_file, converters=converter_dict)
        features_used = [c for c in df_features.feature.values]

    betas_file = join(output_dir, '{}_betas.{}'.format(experiment_id,
                                                       file_format))
    if exists(betas_file):
        df_betas = DataReader.read_from_file(betas_file)

    if exists(feature_subset_file):
        df_feature_subset_specs = DataReader.read_from_file(feature_subset_file)
    else:
        df_feature_subset_specs = None

In [None]:
# check for continuous human scores in the evaluation set
continuous_human_score = False

if exists(pred_preproc_file):
    if not df_pred_preproc['sc1'].equals(np.round(df_pred_preproc['sc1'])):
        continuous_human_score = True

## Description of the data

In [None]:
try:
    num_excluded_train = len(df_train_responses_with_excluded_flags)
except NameError:
    num_excluded_train = 0

try:
    num_excluded_test = len(df_test_responses_with_excluded_flags)
except NameError:
    num_excluded_test = 0

if context == 'rsmtool':
    pct_excluded_train = round(100*num_excluded_train/len(df_train_orig), 2)
pct_excluded_test = round(100*num_excluded_test/len(df_test_orig), 2)

if (num_excluded_train != 0 or num_excluded_test != 0):
    display(Markdown("### Responses excluded due to flags"))

    display(Markdown("Total number of responses excluded due to flags:"))
    if context=='rsmtool':
        display(Markdown("Training set: {} responses ({:.1f}% of the original {} responses)".format(num_excluded_train, pct_excluded_train, len(df_train_orig))))
    display(Markdown("Evaluation set: {} responses ({:.1f}% of the original {} responses)".format(num_excluded_test, pct_excluded_test, len(df_test_orig))))


### Responses excluded due to non-numeric feature values or scores

In [None]:
try:
    num_missing_rows_train = len(df_train_excluded)
except NameError:
    num_missing_rows_train = 0

if context == 'rsmtool':
    pct_missing_rows_train = 100*num_missing_rows_train/len(df_train_orig)

try:
    num_missing_rows_test = len(df_test_excluded)
except:
    num_missing_rows_test = 0
pct_missing_rows_test = 100*num_missing_rows_test/len(df_test_orig)

In [None]:
excluded_candidates_note = Markdown("Note: if a candidate had less than {} responses left for analysis after applying all filters, "
                                    "all responses from that "
                                    "candidate were excluded from further analysis.".format(min_items))

In [None]:
if context == 'rsmtool':
    display(Markdown("**Training set**"))
    display(Markdown('Total number of excluded responses: {} ({:.1f}% of the original {})'.format(num_missing_rows_train, pct_missing_rows_train, len(df_train_orig))))
    if num_missing_rows_train != 0:
        train_excluded_analysis_file = join(output_dir, '{}_train_excluded_composition.{}'.format(experiment_id,
                                                                                                  file_format))
        df_train_excluded_analysis = DataReader.read_from_file(train_excluded_analysis_file)
        display(HTML(df_train_excluded_analysis.to_html(classes=['sortable'], float_format=float_format_func, index=False))) 
        if min_items > 0:
            display(excluded_candidates_note)

In [None]:
display(Markdown('**Evaluation set**'))
display(Markdown('Total number of excluded responses: {} ({:.1f}% of the original {})'.format(num_missing_rows_test, pct_missing_rows_test, len(df_test_orig))))
if num_missing_rows_test != 0:
    test_excluded_analysis_file = join(output_dir, '{}_test_excluded_composition.{}'.format(experiment_id,
                                                                                            file_format))
    df_test_excluded_analysis = DataReader.read_from_file(test_excluded_analysis_file)
    display(HTML(df_test_excluded_analysis.to_html(classes=['sortable'], float_format=float_format_func, index=False)))
    if min_items > 0:
        display(excluded_candidates_note)

The rest of this report is based only on the responses that were not excluded above.

In [None]:
if context == 'rsmtool':
    display(Markdown('### Composition of the training and evaluation sets'))
elif context == 'rsmeval':
    display(Markdown('### Composition of the evaluation set'))

In [None]:
# show the table showing candidate (speaker), prompt 
# and responses stats for training and test

# feature descriptives extra table
data_composition_file = join(output_dir, '{}_data_composition.{}'.format(experiment_id, file_format))
df_data_desc = DataReader.read_from_file(data_composition_file)
display(HTML(df_data_desc.to_html(classes=['sortable'], float_format=float_format_func, index=False)))

try:
    num_double_scored_responses = len(df_test_human_scores[df_test_human_scores['sc2'].notnull()])
except NameError:
    pass
else:
    zeros_included_or_excluded = 'excluded' if exclude_zero_scores else 'included'
    display(Markdown("Total number of double scored responses in the evaluation set" 
                     " used: {} (zeros {})".format(num_double_scored_responses,
                                                   zeros_included_or_excluded)))

In [None]:
consistency_file = join(output_dir, '{}_consistency.{}'.format(experiment_id, file_format))
degradation_file = join(output_dir, '{}_degradation.{}'.format(experiment_id, file_format))
disattenuation_file = join(output_dir, '{}_disattenuated_correlations.{}'.format(experiment_id, file_format))
eval_file = join(output_dir, '{}_eval.{}'.format(experiment_id,
                                                 file_format))

if exists(consistency_file) and exists(degradation_file) and exists(disattenuation_file):
    df_consistency = DataReader.read_from_file(consistency_file, index_col=0)
    df_degradation = DataReader.read_from_file(degradation_file, index_col=0)
    df_dis_corrs = DataReader.read_from_file(disattenuation_file, index_col=0)
    df_eval = DataReader.read_from_file(eval_file, index_col=0)
    markdown_strs = ['## Consistency']
    markdown_strs.append('*Note: this section assumes that the score used for evaluating machine scores '
                         'is the score assigned by the first rater.*')
    markdown_strs.append('### Human-human agreement')
    markdown_strs.append("This table shows the human-human agreement on the "
                         "double-scored evaluation data.")
    if continuous_human_score:
        markdown_strs.append('For the computation of `kappa` and `wtkappa` '
                             'human scores have beeen rounded to the nearest integer.')
        
    markdown_strs.append("The following are <span class='highlight_color'>highlighted </span>: ")
    markdown_strs.append(' - Exact agreement (`exact_agr`) < 50%')
    markdown_strs.append(' - Adjacent agreement (`adj_agr`) < 95%')
    markdown_strs.append(' - Quadratic weighted kappa (`wtkappa`) < 0.7')
    markdown_strs.append(' - Pearson correlation (`corr`) < 0.7')
    display(Markdown('\n'.join(markdown_strs)))
    
    # display the HTML for the table with the various formatters
    formatter_exact_agr = partial(color_highlighter, low=50, high=100)
    formatter_adj_agr = partial(color_highlighter, low=95, high=100)
    formatter_wtkappa_corr = partial(color_highlighter, low=0.7)
    formatter_dict = {'exact_agr': formatter_exact_agr, 
                      'adj_agr': formatter_adj_agr,
                      'wtkappa': formatter_wtkappa_corr, 
                      'corr': formatter_wtkappa_corr}
    display(HTML(df_consistency.to_html(index=False,
                                        escape=False,
                                        float_format=float_format_func,
                                        formatters=formatter_dict)))
    
    markdown_strs = ['### Degradation']
    markdown_strs.append('The next table shows the degradation in the evaluation metrics '
                         '(`diff`) when comparing the machine (`H-M`) to a second human (`H-H`). '
                         'A positive degradation value indicates better human-machine performance. '
                         'Note that the human-machine agreement is computed on the full '
                         'dataset (to get a reliable estimate) whereas the human-human '
                         'agreement is computed on the subset of responses that were double-scored.')
    markdown_strs.append("\nThe following degradation values are "
                         "<span class='highlight_color'>highlighted</span>")
    markdown_strs.append(' - `corr` < -0.1')
    markdown_strs.append(' - `wtkappa` < -0.1')
    display(Markdown('\n'.join(markdown_strs)))
    df_eval_for_degradation = df_eval[df_degradation.columns].copy()
    df_consistency_for_degradation = pd.concat([df_consistency]*len(df_eval))
    df_consistency_for_degradation = df_consistency_for_degradation[df_degradation.columns].copy()
    df_consistency_for_degradation.index = df_eval_for_degradation.index

    df_consistency_for_degradation['type'] = 'H-H'
    df_eval_for_degradation['type'] = 'H-M'
    df_degradation['type'] = 'diff'

    df = pd.concat([df_consistency_for_degradation, df_eval_for_degradation, df_degradation])
    df = df[['type','corr', 'kappa', 'wtkappa', 'exact_agr', 'adj_agr', 'SMD']]
    df = df.reset_index()
    df = df.set_index(['index', 'type']).sort_index(level='index')
    df.index.names = [None, None]
    
    # display the HTML for the table with the various formatters
    formatter_corr = partial(color_highlighter, low=-0.1, high=100)
    formatter_wtkappa = partial(color_highlighter, low=-0.1, high=100)
    formatter_dict = {'corr': formatter_corr, 'wtkappa': formatter_wtkappa}
    display(HTML(df.to_html(float_format=float_format_func, 
                            formatters=formatter_dict, escape=False)))
    
    
    markdown_strs = ['### Disattenuated correlations']
    markdown_strs.append('The next table shows the correlations between human and machine scores, '
                         'the correlations between two human scores, '  
                         'and disattenuated correlations between human and machine scores computed as '
                         'human-machine correlations divided by the square root of human-human '
                         'correlation. '
                         'Note that the human-machine correlation is computed on the full '
                         'dataset (to get a reliable estimate) whereas the human-human '
                         'correlation is computed on the subset of responses that were double-scored.')
    markdown_strs.append("\nThe following values are "
                         "<span class='highlight_color'>highlighted</span>")
    markdown_strs.append(' - `disattenuated_corr` < -0.9')
    display(Markdown('\n'.join(markdown_strs)))
    # display the HTML for the table with the various formatters
    formatter_dis_corr = partial(color_highlighter, low=0.9)
    formatter_dict = {'corr_disattenuated': formatter_dis_corr}
    display(HTML(df_dis_corrs.to_html(index=True,
                                      escape=False,
                                      classes=['sortable'],
                                      float_format=float_format_func,
                                      formatters=formatter_dict)))
    
    
    

## Evaluation results

### Overall association statistics

The tables in this section show the standard association metrics between human scores and different types of machine scores. These results are computed on the evaluation set. `Trim` (`bound`) scores are truncated to [min-0.4998, max+.4998]. `Trim-round` scores are computed by first truncating and then rounding the predicted score. Scaled scores are computed by re-scaling the predicted scores using mean and standard deviation of human scores as observed on the training data and mean and standard deviation of machine scores as predicted for the training set. 


#### Descriptive holistic score statistics

The table shows distributional properties of human and system scores. SMD values lower then -0.15 or higher than 0.15 are <span class="highlight_color">highlighted</span>.

*Please note that for raw scores, SMD values are likely to be affected by possible differences in scale.*

In [None]:
raw_or_scaled = "scaled" if use_scaled_predictions else "raw"
eval_file = join(output_dir, '{}_eval.{}'.format(experiment_id, file_format))
df_eval = DataReader.read_from_file(eval_file, index_col=0)
distribution_columns = ['N', 'h_mean', 'sys_mean', 'h_sd',  'sys_sd', 'h_min', 'sys_min', 'h_max', 'sys_max', 'SMD']
association_columns = ['N'] + [column for column in df_eval.columns if not column in distribution_columns]
df_distribution = df_eval[distribution_columns]
df_association = df_eval[association_columns]

In [None]:
pd.options.display.width=10
formatter = partial(color_highlighter, low=-0.15, high=0.15)
HTML('<span style="font-size:95%">'+ df_distribution.to_html(classes=['sortable'], 
                                                             escape=False,
                                                             formatters={'SMD': formatter},
                                                             float_format=float_format_func) + '</span>')

#### Association statistics


In [None]:
markdown_str = ['The table shows the standard association metrics between human scores and machine scores.']
if continuous_human_score:
    markdown_str.append("Note that for computation of `kappa` and `wtkappa` both human and machine scores are rounded.")
else:
    markdown_str_append("Note that for computation of `kappa` and `wtkappa` all machine scores are rounded.")

Markdown('\n'.join(markdown_str))

In [None]:
pd.options.display.width=10
HTML('<span style="font-size:95%">'+ df_association.to_html(classes=['sortable'], 
                                                            escape=False,
                                                            float_format=float_format_func) + '</span>')

### Confusion Matrix

In [None]:
markdown_str = ["Confusion matrix using {} trimmed rounded scores and human scores (rows=system, columns=human).".format(raw_or_scaled)]

if continuous_human_score:
    markdown_str.append("*Human scores have beeen rounded to the nearest integer.*")
            
Markdown('\n'.join(markdown_str))

In [None]:
confmat_file = join(output_dir, '{}_confMatrix.{}'.format(experiment_id, file_format))
df_confmat = DataReader.read_from_file(confmat_file, index_col=0)
df_confmat

### Distribution of human and machine scores

In [None]:
markdown_strs = ["The histogram and the table below show the distibution of "
                 "rounded human scores and {} trimmed rounded machine scores "
                 "(as % of all responses).".format(raw_or_scaled)]
markdown_strs.append("Differences in the table between human and machine distributions "
                     "larger than 5 percentage points are <span class='highlight_color'>highlighted</span>.")
if continuous_human_score:
    markdown_str.append("*Human scores have beeen rounded to the nearest integer.*")
    
display(Markdown('\n'.join(markdown_strs)))

In [None]:
scoredist_file = join(output_dir, '{}_score_dist.{}'.format(experiment_id, file_format))
df_scoredist = DataReader.read_from_file(scoredist_file, index_col=0)
df_scoredist_melted = pd.melt(df_scoredist, id_vars=['score'])
df_scoredist_melted = df_scoredist_melted[df_scoredist_melted['variable'] != 'difference']

# get the colors for the plot
colors = sns.color_palette("Greys", 2)

with sns.axes_style('whitegrid'):

    # make a barplot without a legend since we will 
    # add one manually later
    p = sns.factorplot("score", "value", "variable", kind="bar",
                       palette=colors, data=df_scoredist_melted, 
                       size=3, aspect=2, legend=False)
    p.set_axis_labels('score', '% of responses')
    
    # add a legend with the right colors
    axis = p.axes[0][0]
    legend = axis.legend(labels=('Human', 'Machine'), title='', frameon=True, fancybox=True)
    legend.legendHandles[0].set_color(colors[0])
    legend.legendHandles[1].set_color(colors[1])

    imgfile = join(figure_dir, '{}_score_dist.svg'.format(experiment_id))
    plt.savefig(imgfile)

    if use_thumbnails:
        show_thumbnail(imgfile, next(id_generator))
    else:
        plt.show()

In [None]:
formatter = partial(color_highlighter, low=0, high=5, absolute=True)
df_html = df_scoredist.to_html(classes=['sortable'], index=False, 
                               escape=False, formatters={'difference': formatter})
display(HTML(df_html))

## System information

In [None]:
system_name = platform.system()

# People might not know what 'Darwin' is, so we should replace that with 'Mac OS X'
if system_name == 'Darwin':
    system_name = 'Mac OS X'
    
# get the architecture
architecture = platform.architecture()[0]

# get the rsmtool version
rsmtool_version_str = '.'.join(map(str, rsmtool_version))

display(Markdown('This report was generated using rsmtool v{} on a '
                 '{} computer running {}.'.format(rsmtool_version_str, 
                                                  architecture, 
                                                  system_name)))

### Python packages

In [None]:
import pip
package_names = '\n'.join(sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()]))
display(HTML('<div id="packages"><pre>{}</pre></div>'.format(package_names)))

In [None]:
%%javascript

// Code to dynamically generate table of contents at the top of the HTML file
var tocEntries = ['<ul>'];
var anchors = $('a.anchor-link');
var headingTypes = $(anchors).parent().map(function() { return $(this).prop('tagName')});
var headingTexts = $(anchors).parent().map(function() { return $(this).text()});
var subList = false;

$.each(anchors, function(i, anch) {
    var hType = headingTypes[i];
    var hText = headingTexts[i];
    hText = hText.substr(0, hText.length - 1);
    if (hType == 'H2') {
        if (subList) {
            tocEntries.push('</ul>')
            subList = false;
        }
        tocEntries.push('<li><a href="' + anch + '"</a>' + hText + '</li>')
    }
    else if (hType == 'H3') {
        if (!subList) {
            subList = true;
            tocEntries.push('<ul>')
        }
        tocEntries.push('<li><a href="' + anch + '"</a>' + hText + '</li>')
    }
});
tocEntries.push('</ul>')
$('#toc').html(tocEntries.join(' '))