In [None]:
# Setting options for the plots
%matplotlib inline
%config InlineBackend.figure_formats={'retina', 'svg'}
%config InlineBackend.rc={'savefig.dpi': 150}

# Summary Report 

In [None]:
import itertools
import json
import os
import re
import pickle
import platform
import time

from collections import defaultdict as dd
from functools import partial
from os.path import abspath, dirname, exists, join
from string import Template

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import statsmodels.api as sm
from matplotlib import pyplot as plt

from IPython import sys_info
from IPython.display import display, HTML, Image, Markdown, SVG

from rsmtool.version import VERSION as rsmtool_version

In [None]:
%%javascript

/* sortttable v2 from http://www.kryogenix.org/code/browser/sorttable */
function dean_addEvent(t,e,r){if(t.addEventListener)t.addEventListener(e,r,!1);else{r.$$guid||(r.$$guid=dean_addEvent.guid++),t.events||(t.events={});var o=t.events[e];o||(o=t.events[e]={},t["on"+e]&&(o[0]=t["on"+e])),o[r.$$guid]=r,t["on"+e]=handleEvent}}function removeEvent(t,e,r){t.removeEventListener?t.removeEventListener(e,r,!1):t.events&&t.events[e]&&delete t.events[e][r.$$guid]}function handleEvent(t){var e=!0;t=t||fixEvent(((this.ownerDocument||this.document||this).parentWindow||window).event);var r=this.events[t.type];for(var o in r)this.$$handleEvent=r[o],this.$$handleEvent(t)===!1&&(e=!1);return e}function fixEvent(t){return t.preventDefault=fixEvent.preventDefault,t.stopPropagation=fixEvent.stopPropagation,t}var stIsIE=!1;if(sorttable={init:function(){arguments.callee.done||(arguments.callee.done=!0,_timer&&clearInterval(_timer),document.createElement&&document.getElementsByTagName&&(sorttable.DATE_RE=/^(\d\d?)[\/\.-](\d\d?)[\/\.-]((\d\d)?\d\d)$/,forEach(document.getElementsByTagName("table"),function(t){-1!=t.className.search(/\bsortable\b/)&&sorttable.makeSortable(t)})))},makeSortable:function(t){if(0==t.getElementsByTagName("thead").length&&(the=document.createElement("thead"),the.appendChild(t.rows[0]),t.insertBefore(the,t.firstChild)),null==t.tHead&&(t.tHead=t.getElementsByTagName("thead")[0]),1==t.tHead.rows.length){sortbottomrows=[];for(var e=0;e<t.rows.length;e++)-1!=t.rows[e].className.search(/\bsortbottom\b/)&&(sortbottomrows[sortbottomrows.length]=t.rows[e]);if(sortbottomrows){null==t.tFoot&&(tfo=document.createElement("tfoot"),t.appendChild(tfo));for(var e=0;e<sortbottomrows.length;e++)tfo.appendChild(sortbottomrows[e]);delete sortbottomrows}headrow=t.tHead.rows[0].cells;for(var e=0;e<headrow.length;e++)headrow[e].className.match(/\bsorttable_nosort\b/)||(mtch=headrow[e].className.match(/\bsorttable_([a-z0-9]+)\b/),mtch&&(override=mtch[1]),headrow[e].sorttable_sortfunction=mtch&&"function"==typeof sorttable["sort_"+override]?sorttable["sort_"+override]:sorttable.guessType(t,e),headrow[e].sorttable_columnindex=e,headrow[e].sorttable_tbody=t.tBodies[0],dean_addEvent(headrow[e],"click",sorttable.innerSortFunction=function(){if(-1!=this.className.search(/\bsorttable_sorted\b/))return sorttable.reverse(this.sorttable_tbody),this.className=this.className.replace("sorttable_sorted","sorttable_sorted_reverse"),this.removeChild(document.getElementById("sorttable_sortfwdind")),sortrevind=document.createElement("span"),sortrevind.id="sorttable_sortrevind",sortrevind.innerHTML=stIsIE?'&nbsp<font face="webdings">5</font>':"&nbsp;&#x25B4;",void this.appendChild(sortrevind);if(-1!=this.className.search(/\bsorttable_sorted_reverse\b/))return sorttable.reverse(this.sorttable_tbody),this.className=this.className.replace("sorttable_sorted_reverse","sorttable_sorted"),this.removeChild(document.getElementById("sorttable_sortrevind")),sortfwdind=document.createElement("span"),sortfwdind.id="sorttable_sortfwdind",sortfwdind.innerHTML=stIsIE?'&nbsp<font face="webdings">6</font>':"&nbsp;&#x25BE;",void this.appendChild(sortfwdind);theadrow=this.parentNode,forEach(theadrow.childNodes,function(t){1==t.nodeType&&(t.className=t.className.replace("sorttable_sorted_reverse",""),t.className=t.className.replace("sorttable_sorted",""))}),sortfwdind=document.getElementById("sorttable_sortfwdind"),sortfwdind&&sortfwdind.parentNode.removeChild(sortfwdind),sortrevind=document.getElementById("sorttable_sortrevind"),sortrevind&&sortrevind.parentNode.removeChild(sortrevind),this.className+=" sorttable_sorted",sortfwdind=document.createElement("span"),sortfwdind.id="sorttable_sortfwdind",sortfwdind.innerHTML=stIsIE?'&nbsp<font face="webdings">6</font>':"&nbsp;&#x25BE;",this.appendChild(sortfwdind),row_array=[],col=this.sorttable_columnindex,rows=this.sorttable_tbody.rows;for(var t=0;t<rows.length;t++)row_array[row_array.length]=[sorttable.getInnerText(rows[t].cells[col]),rows[t]];row_array.sort(this.sorttable_sortfunction),tb=this.sorttable_tbody;for(var t=0;t<row_array.length;t++)tb.appendChild(row_array[t][1]);delete row_array}))}},guessType:function(t,e){sortfn=sorttable.sort_alpha;for(var r=0;r<t.tBodies[0].rows.length;r++)if(text=sorttable.getInnerText(t.tBodies[0].rows[r].cells[e]),""!=text){if(text.match(/^-?[£$¤]?[\d,.]+%?$/))return sorttable.sort_numeric;if(possdate=text.match(sorttable.DATE_RE)){if(first=parseInt(possdate[1]),second=parseInt(possdate[2]),first>12)return sorttable.sort_ddmm;if(second>12)return sorttable.sort_mmdd;sortfn=sorttable.sort_ddmm}}return sortfn},getInnerText:function(t){if(!t)return"";if(hasInputs="function"==typeof t.getElementsByTagName&&t.getElementsByTagName("input").length,null!=t.getAttribute("sorttable_customkey"))return t.getAttribute("sorttable_customkey");if("undefined"!=typeof t.textContent&&!hasInputs)return t.textContent.replace(/^\s+|\s+$/g,"");if("undefined"!=typeof t.innerText&&!hasInputs)return t.innerText.replace(/^\s+|\s+$/g,"");if("undefined"!=typeof t.text&&!hasInputs)return t.text.replace(/^\s+|\s+$/g,"");switch(t.nodeType){case 3:if("input"==t.nodeName.toLowerCase())return t.value.replace(/^\s+|\s+$/g,"");case 4:return t.nodeValue.replace(/^\s+|\s+$/g,"");case 1:case 11:for(var e="",r=0;r<t.childNodes.length;r++)e+=sorttable.getInnerText(t.childNodes[r]);return e.replace(/^\s+|\s+$/g,"");default:return""}},reverse:function(t){newrows=[];for(var e=0;e<t.rows.length;e++)newrows[newrows.length]=t.rows[e];for(var e=newrows.length-1;e>=0;e--)t.appendChild(newrows[e]);delete newrows},sort_numeric:function(t,e){return aa=parseFloat(t[0].replace(/[^0-9.-]/g,"")),isNaN(aa)&&(aa=0),bb=parseFloat(e[0].replace(/[^0-9.-]/g,"")),isNaN(bb)&&(bb=0),aa-bb},sort_alpha:function(t,e){return t[0]==e[0]?0:t[0]<e[0]?-1:1},sort_ddmm:function(t,e){return mtch=t[0].match(sorttable.DATE_RE),y=mtch[3],m=mtch[2],d=mtch[1],1==m.length&&(m="0"+m),1==d.length&&(d="0"+d),dt1=y+m+d,mtch=e[0].match(sorttable.DATE_RE),y=mtch[3],m=mtch[2],d=mtch[1],1==m.length&&(m="0"+m),1==d.length&&(d="0"+d),dt2=y+m+d,dt1==dt2?0:dt2>dt1?-1:1},sort_mmdd:function(t,e){return mtch=t[0].match(sorttable.DATE_RE),y=mtch[3],d=mtch[2],m=mtch[1],1==m.length&&(m="0"+m),1==d.length&&(d="0"+d),dt1=y+m+d,mtch=e[0].match(sorttable.DATE_RE),y=mtch[3],d=mtch[2],m=mtch[1],1==m.length&&(m="0"+m),1==d.length&&(d="0"+d),dt2=y+m+d,dt1==dt2?0:dt2>dt1?-1:1},shaker_sort:function(t,e){for(var r=0,o=t.length-1,n=!0;n;){n=!1;for(var s=r;o>s;++s)if(e(t[s],t[s+1])>0){var a=t[s];t[s]=t[s+1],t[s+1]=a,n=!0}if(o--,!n)break;for(var s=o;s>r;--s)if(e(t[s],t[s-1])<0){var a=t[s];t[s]=t[s-1],t[s-1]=a,n=!0}r++}}},document.addEventListener&&document.addEventListener("DOMContentLoaded",sorttable.init,!1),/WebKit/i.test(navigator.userAgent))var _timer=setInterval(function(){/loaded|complete/.test(document.readyState)&&sorttable.init()},10);window.onload=sorttable.init,dean_addEvent.guid=1,fixEvent.preventDefault=function(){this.returnValue=!1},fixEvent.stopPropagation=function(){this.cancelBubble=!0},Array.forEach||(Array.forEach=function(t,e,r){for(var o=0;o<t.length;o++)e.call(r,t[o],o,t)}),Function.prototype.forEach=function(t,e,r){for(var o in t)"undefined"==typeof this.prototype[o]&&e.call(r,t[o],o,t)},String.forEach=function(t,e,r){Array.forEach(t.split(""),function(o,n){e.call(r,o,n,t)})};var forEach=function(t,e,r){if(t){var o=Object;if(t instanceof Function)o=Function;else{if(t.forEach instanceof Function)return void t.forEach(e,r);"string"==typeof t?o=String:"number"==typeof t.length&&(o=Array)}o.forEach(t,e,r)}};

<style type="text/css">
  div.prompt.output_prompt { 
    color: white; 
  }
  
  span.highlight_color {
    color: red;
  }
  
  span.highlight_bold {
    font-weight: bold;  
  }
    
  @media print {
    @page {
      size: landscape;
      margin: 0cm 0cm 0cm 0cm;
    }

    * {
      margin: 0px;
      padding: 0px;
    }

    #toc {
      display: none;
    }

    span.highlight_color, span.highlight_bold {
        font-weight: bolder;
        text-decoration: underline;
    }

    div.prompt.output_prompt {
      display: none;
    }
    
    h3#Python-packages, div#packages {
      display: none;
  }
</style>

In [None]:
# NOTE: you will need to set the following manually
# if you are using this notebook interactively.
summary_id = os.environ.get('SUMMARY_ID')
description = os.environ.get('DESCRIPTION')
jsons_string = os.environ.get('JSONS')
jsons = jsons_string.split('%%')

# groups for analysis by prompt or subgroup.
# set to 'prompt' for the standard analysis of 'prompt%%subgroup1%%subgroup2' for subgroup analysis.
groups_desc_string = os.environ.get('GROUPS_FOR_DESCRIPTIVES') 
groups_desc = groups_desc_string.split('%%')
groups_eval_string = os.environ.get('GROUPS_FOR_EVALUATIONS') 
groups_eval = groups_eval_string.split('%%')

In [None]:
# load the information about all models
model_list = []
for json_file in jsons:
    model_config = json.load(open(json_file))
    model_id = model_config['experiment_id']
    model_csvdir = dirname(json_file)
    model_list.append((model_id, model_config, model_csvdir))

# create a list of 

In [None]:
Markdown("This report presents the analysis for **{}**: {} \n ".format(summary_id, description))


In [None]:
HTML(time.strftime('%c'))

In [None]:
# get a matched list of model ids and descriptions
models_and_desc = zip([model_id for (model_id, config, csvdir, ) in model_list],
                      [config['description'] for (model_id, config, csvdir) in model_list])
model_desc_list = '\n\n'.join(['**{}**: {}'.format(m, d) for (m, d) in models_and_desc])

Markdown("The report compares the following models: \n\n {}".format(model_desc_list))

In [None]:
%%html
<div id="toc"></div>

In [None]:
# define float formatting functions
def float_format_func(x, prec=3):
    formatter_string = Template('{:.${prec}f}').substitute(prec=prec)
    return formatter_string.format(x)

def int_or_float_format_func(x, prec=3):
    if float.is_integer(x):
        ans = '{}'.format(int(x))
    else:
        ans = float_format_func(x, prec=prec)
    return ans

def bold_highlighter(x, low=0, high=1, prec=3, absolute=False):
    abs_x = abs(x) if absolute else x
    val = float_format_func(x, prec=prec)
    ans = '<span class="highlight_bold">{}</span>'.format(val) if abs_x < low or abs_x > high else val
    return ans

def color_highlighter(x, low=0, high=1, prec=3, absolute=False):
    abs_x = abs(x) if absolute else x
    val = float_format_func(x, prec=prec)
    ans = '<span class="highlight_color">{}</span>'.format(val) if abs_x < low or abs_x > high else val
    return ans

## Model

The table shows main model parameters for each experiment. 

In [None]:
def summarize_models(model_list):
    summs = []
    for (model_id, config, csvdir) in model_list:
        coef_file = os.path.join(csvdir, '{}_betas.csv'.format(model_id))
        if os.path.exists(coef_file):
            coefs = pd.read_csv(coef_file)
            model_summary = pd.DataFrame({'model': [model_id],
                                          'N features': len(coefs),
                                          'N negative': len(coefs.ix[coefs['standardized']<0]),
                                          'learner': config['model'],
                                          'train_label': config['train_label_column']})
            summs.append(model_summary)
        else:
            if 'model' in config:
                model_summary = pd.DataFrame({'model': [model_id],
                                          'N features': '-',
                                          'N negative': '-',
                                          'learner': config['model'],
                                          'train_label': config['train_label_column']})
                summs.append(model_summary)
   
    if not len(summs) == 0:
        df_summ = pd.concat(summs)
        display(Markdown("Model summary"))
        display(HTML(df_summ[['model', 'N features', 'N negative', 'learner', 'train_label']].to_html(index=False, classes = ['sortable'],
                                                                                                      escape=False,
                                                                                                      float_format=int_or_float_format_func)))

summarize_models(model_list)

In [None]:
def summarize_model_fit():
    fits = []
    for (model_id, config, csvdir) in model_list:
        model_fit_file = os.path.join(csvdir, '{}_model_fit.csv'.format(model_id))
        if os.path.exists(model_fit_file):
            fit = pd.read_csv(model_fit_file)
            fit['model'] = model_id
            fits.append(fit)
    if len(fits)>0:
        df_fit = pd.concat(fits)
        display(Markdown("## Model fit"))
        display(HTML(df_fit[['model', 'N responses','N features','R2','R2_adjusted']].to_html(index=False, classes = ['sortable'],
                                                                                              escape=False,
                                                                                             float_format=int_or_float_format_func)))
    
summarize_model_fit()

## Evaluation results

### Overall association statistics

The tables in this section show the standard association metrics between human scores and different types of machine scores. These results are computed on the evaluation set. The scores for each model have been truncated to [min-0.4998, max+.4998].When indicated, scaled scores are computed by re-scaling the predicted scores using mean and standard deviation of human scores as observed on the training data and mean and standard deviation of machine scores as predicted for the training set. 


In [None]:
def read_evals(model_list):
    evals = []
    for (model_id, config, csvdir) in model_list:
        csv_file = os.path.join(csvdir, '{}_eval_short.csv'.format(model_id))
        if os.path.exists(csv_file):
            df_eval = pd.read_csv(csv_file, index_col=0)
            df_eval['model'] = model_id
            
            # figure out whether the score was scaled
            df_eval['system score type'] = ('scale' if config.get('use_scaled_predictions') == True or
                                            config.get('scale_with') is not None else 'raw')      
            #rename the columns to remove reference to scale/raw scores
            new_column_names = [col.split('.')[0] if not 'round' in col 
                                else '{} (rounded)'.format(col.split('.')[0])
                                for col in df_eval.columns ]
            df_eval.columns = new_column_names
            evals.append(df_eval)          
    if len(evals) > 0:
        df_evals = pd.concat(evals)
        df_evals.reset_index(drop=True, inplace=True)
    else:
        df_evals = pd.DataFrame()
    return(df_evals)

df_eval = read_evals(model_list)

#### Descriptive holistic score statistics

The table shows distributional properties of human and system scores. SMD values lower then -0.15 or higher than 0.15 are <span class="highlight_color">highlighted</span>.

In [None]:
pd.options.display.width=10
formatter = partial(color_highlighter, low=-0.15, high=0.15)
if not df_eval.empty:
     display(HTML(df_eval[['model', 'N', 'system score type', 'h_mean', 'h_sd',  'sys_mean', 'sys_sd',  'SMD']].to_html(index=False, classes = ['sortable'],
                                                                                                                   escape=False,
                                                                                                                   formatters={'SMD': formatter},
                                                                                                                   float_format=int_or_float_format_func)))
else:
     display(Markdown("No information available for any of the models"))


#### Association statistics

The table shows the standard association metrics between human scores and machine scores. Note that some evaluations are based on rounded (`Trim-round`) scores computed by first truncating and then rounding the predicted score.

In [None]:
if not df_eval.empty:
     display(HTML(df_eval[['model',
                           'N',
                           'system score type',
                           'corr', 'R2', 'RMSE',
                           'wtkappa (rounded)', 'kappa (rounded)',
                           'exact_agr (rounded)', 'adj_agr (rounded)']].to_html(index=False, classes = ['sortable'],
                                                                                escape=False,
                                                                                float_format = int_or_float_format_func)))
else:
     display(Markdown("No information available for any of the models"))


## System information

In [None]:
system_name = platform.system()

# People might not know what 'Darwin' is, so we should replace that with 'Mac OS X'
if system_name == 'Darwin':
    system_name = 'Mac OS X'
    
# get the architecture
architecture = platform.architecture()[0]

# get the rsmtool version
rsmtool_version_str = '.'.join(map(str, rsmtool_version))

display(Markdown('This report was generated using rsmtool v{} on a '
                 '{} computer running {}.'.format(rsmtool_version_str, 
                                                  architecture, 
                                                  system_name)))

### Python packages

In [None]:
import pip
package_names = '\n'.join(sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()]))
display(HTML('<div id="packages"><pre>{}</pre></div>'.format(package_names)))

In [None]:
%%javascript

// Code to dynamically generate table of contents at the top of the HTML file
var tocEntries = ['<ul>'];
var anchors = $('a.anchor-link');
var headingTypes = $(anchors).parent().map(function() { return $(this).prop('tagName')});
var headingTexts = $(anchors).parent().map(function() { return $(this).text()});
var subList = false;

$.each(anchors, function(i, anch) {
    var hType = headingTypes[i];
    var hText = headingTexts[i];
    hText = hText.substr(0, hText.length - 1);
    if (hType == 'H2') {
        if (subList) {
            tocEntries.push('</ul>')
            subList = false;
        }
        tocEntries.push('<li><a href="' + anch + '"</a>' + hText + '</li>')
    }
    else if (hType == 'H3') {
        if (!subList) {
            subList = true;
            tocEntries.push('<ul>')
        }
        tocEntries.push('<li><a href="' + anch + '"</a>' + hText + '</li>')
    }
});
tocEntries.push('</ul>')
$('#toc').html(tocEntries.join(' '))