In [None]:
# Setting options for the plots
%matplotlib inline
%config InlineBackend.figure_formats={'retina', 'svg'}
%config InlineBackend.rc={'savefig.dpi': 150}

# Experiment Report 

In [None]:
import itertools
import os
import re
import pickle
import platform
import time
import warnings

from functools import partial
from os.path import abspath, relpath, exists, join

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
from matplotlib import pyplot as plt

from IPython import sys_info
from IPython.display import display, HTML, Image, Javascript, Markdown, SVG
from rsmtool.reader import DataReader
from rsmtool.writer import DataWriter
from rsmtool.utils import (float_format_func,
                           int_or_float_format_func,
                           compute_subgroup_plot_params,
                           parse_json_with_comments,
                           bold_highlighter,
                           color_highlighter,
                           show_thumbnail)

from rsmtool.version import VERSION as rsmtool_version

with warnings.catch_warnings():
    warnings.filterwarnings('ignore', category=FutureWarning)
    import statsmodels.api as sm

sns.set_context('notebook')

In [None]:
rsm_report_dir = os.environ.get('RSM_REPORT_DIR', None)
if rsm_report_dir is None:
    rsm_report_dir = os.getcwd()

rsm_environ_config = join(rsm_report_dir, '.environ.json')
if not exists(rsm_environ_config):
    raise FileNotFoundError('The file {} cannot be located. '
                            'Please make sure that either (1) '
                            'you have set the correct directory with the `RSM_REPORT_DIR` '
                            'environment variable, or (2) that your `.environ.json` '
                            'file is in the same directory as your notebook.'.format(rsm_environ_config))
    
environ_config = parse_json_with_comments(rsm_environ_config)

<style type="text/css">
  div.prompt.output_prompt { 
    color: white; 
  }
  
  span.highlight_color {
    color: red;
  }
  
  span.highlight_bold {
    font-weight: bold;  
  }
    
  @media print {
    @page {
      size: landscape;
      margin: 0cm 0cm 0cm 0cm;
    }

    * {
      margin: 0px;
      padding: 0px;
    }

    #toc {
      display: none;
    }

    span.highlight_color, span.highlight_bold {
        font-weight: bolder;
        text-decoration: underline;
    }

    div.prompt.output_prompt {
      display: none;
    }
    
    h3#Python-packages, div#packages {
      display: none;
  }
</style>

In [None]:
# NOTE: you will need to set the following manually
# if you are using this notebook interactively.
experiment_id = environ_config.get('EXPERIMENT_ID')
description = environ_config.get('DESCRIPTION')
context = environ_config.get('CONTEXT')
train_file_location = environ_config.get('TRAIN_FILE_LOCATION')
test_file_location = environ_config.get('TEST_FILE_LOCATION')
output_dir = environ_config.get('OUTPUT_DIR')
figure_dir = environ_config.get('FIGURE_DIR')
model_name = environ_config.get('MODEL_NAME')
model_type = environ_config.get('MODEL_TYPE')
skll_objective = environ_config.get('SKLL_OBJECTIVE')
file_format = environ_config.get('FILE_FORMAT')
length_column = environ_config.get('LENGTH_COLUMN')
second_human_score_column = environ_config.get('H2_COLUMN')
use_scaled_predictions = environ_config.get('SCALED')
standardize_features = environ_config.get('STANDARDIZE_FEATURES')
exclude_zero_scores = environ_config.get('EXCLUDE_ZEROS')
feature_subset_file = environ_config.get('FEATURE_SUBSET_FILE', ' ')
min_items = environ_config.get('MIN_ITEMS')
use_thumbnails = environ_config.get('USE_THUMBNAILS')
predict_expected_scores = environ_config.get('PREDICT_EXPECTED_SCORES')

# groups for analysis by prompt or subgroup.
groups_desc = environ_config.get('GROUPS_FOR_DESCRIPTIVES') 
groups_eval = environ_config.get('GROUPS_FOR_EVALUATIONS') 

# javascript path
javascript_path = environ_config.get("JAVASCRIPT_PATH")

In [None]:
# initialize counter for thumbnail IDs
id_generator = itertools.count(1)

In [None]:
with open(join(javascript_path, "sort.js"), "r", encoding="utf-8") as sortf:
    display(Javascript(data=sortf.read()))

In [None]:
Markdown('''This report presents the analysis for **{}**: {}'''.format(experiment_id, description))

In [None]:
markdown_str = ''
if use_thumbnails:
    markdown_str += ("""\n  - Images in this report have been converted to """
                     """clickable thumbnails.""")
if predict_expected_scores:
    markdown_str += ("""\n  - Predictions analyzed in this report are *expected scores*, """
                     """i.e., probability-weighted averages over all score points.""")

if markdown_str:
    markdown_str = '**Notes**:' + markdown_str
    display(Markdown(markdown_str))

In [None]:
HTML(time.strftime('%c'))

In [None]:
%%html
<div id="toc"></div>

In [None]:
# Read in the training and testing features, both raw and pre-processed
# Make sure that the `spkitemid` and `candidate` columns are read as strings 
# to preserve any leading zeros
# We filter DtypeWarnings that pop up mostly in very large files

string_columns = ['spkitemid', 'candidate']
converter_dict = {column: str for column in string_columns}

with warnings.catch_warnings():
    warnings.filterwarnings('ignore', category=pd.io.common.DtypeWarning)
    if exists(train_file_location):
        df_train_orig = DataReader.read_from_file(train_file_location)

    train_file = join(output_dir, '{}_train_features.{}'.format(experiment_id,
                                                                file_format))
    if exists(train_file):
        df_train = DataReader.read_from_file(train_file, converters=converter_dict)

    train_metadata_file = join(output_dir, '{}_train_metadata.{}'.format(experiment_id,
                                                                         file_format))    
    if exists(train_metadata_file):
        df_train_metadata = DataReader.read_from_file(train_metadata_file, converters=converter_dict)

    train_other_columns_file = join(output_dir, '{}_train_other_columns.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(train_other_columns_file):
        df_train_other_columns = DataReader.read_from_file(train_other_columns_file, converters=converter_dict)

    train_length_file = join(output_dir, '{}_train_response_lengths.{}'.format(experiment_id,
                                                                               file_format))
    if exists(train_length_file):
        df_train_length = DataReader.read_from_file(train_length_file, converters=converter_dict)

    train_excluded_file = join(output_dir, '{}_train_excluded_responses.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(train_excluded_file):
        df_train_excluded = DataReader.read_from_file(train_excluded_file, converters=converter_dict)

    train_responses_with_excluded_flags_file = join(output_dir, '{}_train_responses_with_excluded_flags.{}'.format(experiment_id,
                                                                                                                   file_format))
    if exists(train_responses_with_excluded_flags_file):
        df_train_responses_with_excluded_flags = DataReader.read_from_file(train_responses_with_excluded_flags_file,
                                                                           converters=converter_dict)

    train_preproc_file = join(output_dir, '{}_train_preprocessed_features.{}'.format(experiment_id,
                                                                                     file_format))    
    if exists(train_preproc_file):
        df_train_preproc = DataReader.read_from_file(train_preproc_file, converters=converter_dict)

    if exists(test_file_location):
        df_test_orig = DataReader.read_from_file(test_file_location)

    test_file = join(output_dir, '{}_test_features.{}'.format(experiment_id,
                                                              file_format))
    if exists(test_file):
        df_test = DataReader.read_from_file(test_file, converters=converter_dict)

    test_metadata_file = join(output_dir, '{}_test_metadata.{}'.format(experiment_id,
                                                                       file_format))    
    if exists(test_metadata_file):
        df_test_metadata = DataReader.read_from_file(test_metadata_file, converters=converter_dict)

    test_other_columns_file = join(output_dir, '{}_test_other_columns.{}'.format(experiment_id,
                                                                                 file_format))
    if exists(test_other_columns_file):
        df_test_other_columns = DataReader.read_from_file(test_other_columns_file, converters=converter_dict)

    test_human_scores_file = join(output_dir, '{}_test_human_scores.{}'.format(experiment_id,
                                                                               file_format))
    if exists(test_human_scores_file):
        df_test_human_scores = DataReader.read_from_file(test_human_scores_file, converters=converter_dict)

    test_excluded_file = join(output_dir, '{}_test_excluded_responses.{}'.format(experiment_id,
                                                                                 file_format))
    if exists(test_excluded_file):
        df_test_excluded = DataReader.read_from_file(test_excluded_file, converters=converter_dict)

    test_responses_with_excluded_flags_file = join(output_dir, '{}_test_responses_with_excluded_flags.{}'.format(experiment_id,
                                                                                                                 file_format))
    if exists(test_responses_with_excluded_flags_file):
        df_test_responses_with_excluded_flags = DataReader.read_from_file(test_responses_with_excluded_flags_file,
                                                                          converters=converter_dict)

    test_preproc_file = join(output_dir, '{}_test_preprocessed_features.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(test_preproc_file):
        df_test_preproc = DataReader.read_from_file(test_preproc_file, converters=converter_dict)

    pred_preproc_file = join(output_dir, '{}_pred_processed.{}'.format(experiment_id,
                                                                       file_format))
    if exists(pred_preproc_file):
        df_pred_preproc = DataReader.read_from_file(pred_preproc_file, converters=converter_dict)

    feature_file = join(output_dir, '{}_feature.{}'.format(experiment_id,
                                                           file_format))
    if exists(feature_file):
        df_features = DataReader.read_from_file(feature_file, converters=converter_dict)
        features_used = [c for c in df_features.feature.values]

    betas_file = join(output_dir, '{}_betas.{}'.format(experiment_id,
                                                       file_format))
    if exists(betas_file):
        df_betas = DataReader.read_from_file(betas_file)

    if exists(feature_subset_file):
        df_feature_subset_specs = DataReader.read_from_file(feature_subset_file)
    else:
        df_feature_subset_specs = None

In [None]:
# check for continuous human scores in the evaluation set
continuous_human_score = False

if exists(pred_preproc_file):
    if not df_pred_preproc['sc1'].equals(np.round(df_pred_preproc['sc1'])):
        continuous_human_score = True