Skip to content

Commit

Permalink
Merge pull request #469 from EducationalTestingService/468-explicit-n…
Browse files Browse the repository at this point in the history
…ormalization-for-coefficient-pie-chart

Fix pie chart and other minor changes
  • Loading branch information
desilinguist committed Sep 29, 2020
2 parents cbc66e9 + ee4b0e4 commit 486cd0e
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 37 deletions.
2 changes: 2 additions & 0 deletions doc/contributing.rst
Expand Up @@ -179,6 +179,8 @@ Here are some advanced tips and tricks when working with RSMTool tests.

4. The ``--pdb-errors`` and ``--pdb-failures`` options for ``nosetests`` are your friends. If you encounter test errors or test failures where the cause may not be immediately clear, re-run the ``nosetests`` command with the appropriate option. Doing so will drop you into an interactive PDB session as soon as a error (or failure) is encountered and then you inspect the variables at that point (or use "u" and "d" to go up and down the call stack). This may be particularly useful for tests in ``tests/test_cli.py`` that use ``subprocess.run()``. If these tests are erroring out, use ``--pdb-errors`` and inspect the "stderr" variable in the resulting PDB session to see what the error is.

5. In RSMTool 8.0.1 and later, the tests will pass even if any of the reports contain warnings. To catch any warnings that may appear in the reports, run the tests in strict mode (``STRICT=1 nosetests --nologcapture tests``).

.. rubric:: Footnotes

.. [#] For older versions of conda, you may have to do ``source activate rsmtool`` on Linux/macOS and ``activate rsmtool`` on Windows.
85 changes: 52 additions & 33 deletions rsmtool/configuration_parser.py
@@ -1,4 +1,6 @@
"""
Configuration parser.
Classes related to parsing configuration files
and creating configuration objects.
Expand All @@ -11,6 +13,7 @@

import json
import logging
import warnings
import re

from copy import copy, deepcopy
Expand Down Expand Up @@ -41,6 +44,8 @@

def configure(context, config_file_or_obj_or_dict):
"""
Create a Configuration object.
Get the configuration for ``context`` from the input
``config_file_or_obj_or_dict``.
Expand Down Expand Up @@ -112,7 +117,9 @@ def configure(context, config_file_or_obj_or_dict):

class Configuration:
"""
Configuration class, which encapsulates all of the
Configuration class.
Encapsulates all of the
configuration parameters and methods to access these
parameters.
"""
Expand Down Expand Up @@ -145,7 +152,6 @@ def __init__(self,
The context of the tool.
Defaults to 'rsmtool'.
"""

if not isinstance(configdict, dict):
raise TypeError('The input must be a dictionary.')

Expand All @@ -166,6 +172,8 @@ def __init__(self,

def __contains__(self, key):
"""
Key check.
Check if configuration object
contains a given key.
Expand Down Expand Up @@ -223,6 +231,8 @@ def __len__(self):

def __str__(self):
"""
Return string representation.
Return a string representation of the underlying configuration
dictionary.
Expand Down Expand Up @@ -255,6 +265,8 @@ def __iter__(self):
@property
def configdir(self):
"""
Get the path to configuration directory.
Get the path to the configuration reference directory that
will be used to resolve any relative paths in
the configuration.
Expand All @@ -269,7 +281,7 @@ def configdir(self):
@configdir.setter
def configdir(self, new_path):
"""
Set a new configuration reference directory
Set a new configuration reference directory.
Parameters
----------
Expand All @@ -278,26 +290,22 @@ def configdir(self, new_path):
directory used to resolve any relative paths
in the configuration object.
"""

if new_path is None:
raise ValueError("The `configdir` attribute cannot be set to `None` ")

# TODO: replace `Path(abspath(new_path))` with `Path(new_path).resolve()
# once this Windows bug is fixed: https://bugs.python.org/issue38671
self._configdir = Path(abspath(new_path))


@property
def context(self):
"""
Get the context.
"""
"""Get the context."""
return self._context

@context.setter
def context(self, new_context):
"""
Set a new context
Set a new context.
Parameters
----------
Expand Down Expand Up @@ -373,6 +381,8 @@ def items(self):

def pop(self, key, default=None):
"""
Remove and return value.
Remove and returns an element from
the object having the given key.
Expand Down Expand Up @@ -419,7 +429,6 @@ def save(self, output_dir=None):
output_dir : str
The path to the output directory.
"""

# save a copy of the main config into the output directory
if output_dir is None:
output_dir = Path(getcwd())
Expand All @@ -438,6 +447,8 @@ def save(self, output_dir=None):

def check_exclude_listwise(self):
"""
Check for candidate exclusion.
Check if we are excluding candidates
based on number of responses, and
add this to the configuration file
Expand All @@ -456,8 +467,9 @@ def check_flag_column(self,
flag_column='flag_column',
partition='unknown'):
"""
Make sure the `flag_column` field is in the correct format. Get
flag columns and values for filtering if any and convert single
Make sure the `flag_column` field is in the correct format.
Get flag columns and values for filtering if any and convert single
values to lists. Raises an exception if `flag_column` is not
correctly specified.
Expand Down Expand Up @@ -549,9 +561,10 @@ def check_flag_column(self,
model_eval))
return new_filter_dict


def get_trim_min_max_tolerance(self):
"""
Get trim min, trim max, and tolerance.
Get the specified trim min and max,
and trim_tolerance if any,
and make sure they are numeric.
Expand Down Expand Up @@ -621,6 +634,7 @@ def get_default_converter(self):
def get_names_and_paths(self, keys, names):
"""
Get a a list of values, given keys.
Remove any values that are None.
Parameters
Expand All @@ -641,7 +655,6 @@ def get_names_and_paths(self, keys, names):
ValueError
If there are any duplicate keys or names.
"""

assert len(keys) == len(names)

# Make sure keys are not duplicated
Expand Down Expand Up @@ -676,9 +689,7 @@ def get_names_and_paths(self, keys, names):


class ConfigurationParser:
"""
A `ConfigurationParser` class to create a `Configuration` object.
"""
"""A ``ConfigurationParser`` class to create a ``Configuration`` object."""

def __init__(self, pathlike):
"""
Expand Down Expand Up @@ -728,7 +739,9 @@ def __init__(self, pathlike):
@staticmethod
def _fix_json(json_string):
"""
Takes a bit of JSON that might have bad quotes
Fix json.
Take a bit of JSON that might have bad quotes
or capitalized booleans and fixes that stuff.
Parameters
Expand All @@ -748,6 +761,8 @@ def _fix_json(json_string):

def _parse_json_file(self, filepath):
"""
Parse json.
A private method to parse JSON configuration files and return
a Python dictionary.
Expand Down Expand Up @@ -778,9 +793,10 @@ def _parse_json_file(self, filepath):

return configdict


def parse(self, context='rsmtool'):
"""
Parse configuration file.
Parse the configuration file for which this parser was
instantiated.
Expand All @@ -801,7 +817,6 @@ def parse(self, context='rsmtool'):
A Configuration object containing the parameters in the
file that we instantiated the parser for.
"""

filepath = self._configdir / self._filename
configdict = self._parse_json_file(filepath)

Expand All @@ -815,6 +830,8 @@ def parse(self, context='rsmtool'):
@classmethod
def validate_config(cls, config, context='rsmtool'):
"""
Validate configuration file.
Ensure that all required fields are specified, add default values
values for all unspecified fields, and ensure that all specified
fields are valid.
Expand Down Expand Up @@ -844,7 +861,6 @@ def validate_config(cls, config, context='rsmtool'):
ValueError
If config does not exist, and no config passed.
"""

# make a copy of the given parameter dictionary
new_config = deepcopy(config)

Expand Down Expand Up @@ -954,8 +970,8 @@ def validate_config(cls, config, context='rsmtool'):
# linear regression model
if new_config['skll_objective']:
if not is_skll_model(new_config['model']):
logging.warning("You specified a custom SKLL objective but also chose a "
"non-SKLL model. The objective will be ignored.")
warnings.warn("You specified a custom SKLL objective but also chose a "
"non-SKLL model. The objective will be ignored.")
else:
if new_config['skll_objective'] not in SCORERS:
raise ValueError("Invalid SKLL objective. Please refer to the SKLL "
Expand All @@ -968,9 +984,9 @@ def validate_config(cls, config, context='rsmtool'):
# at run time for any invalid parameters
if new_config['skll_fixed_parameters']:
if not is_skll_model(new_config['model']):
logging.warning("You specified custom SKLL fixed parameters but "
"also chose a non-SKLL model. The parameters will "
"be ignored.")
warnings.warn("You specified custom SKLL fixed parameters but "
"also chose a non-SKLL model. The parameters will "
"be ignored.")

# 10. Check that if we are running rsmtool to ask for
# expected scores then the SKLL model type must actually
Expand All @@ -994,12 +1010,14 @@ def validate_config(cls, config, context='rsmtool'):
# 12. Raise a warning if we are specifiying a feature file but also
# telling the system to automatically select transformations
if new_config['features'] and new_config['select_transformations']:
logging.warning("You specified a feature file but also set "
"`select_transformations` to True. Any "
"transformations or signs specified in "
"the feature file will be overwritten by "
"the automatically selected transformations "
"and signs.")
# Show a warning unless a user passed a list of features.
if not isinstance(new_config['features'], list):
warnings.warn("You specified a feature file but also set "
"`select_transformations` to True. Any "
"transformations or signs specified in "
"the feature file will be overwritten by "
"the automatically selected transformations "
"and signs.")

# 13. If we have `experiment_names`, check that the length of the list
# matches the list of experiment_dirs.
Expand Down Expand Up @@ -1039,6 +1057,8 @@ def validate_config(cls, config, context='rsmtool'):
@classmethod
def process_config(cls, config):
"""
Process configuration file.
Converts fields which are read in as string to the
appropriate format. Fields which can take multiple
string values are converted to lists if they have
Expand All @@ -1061,7 +1081,6 @@ def process_config(cls, config):
NameError
If config does not exist, or no config read.
"""

# Get the parameter dictionary
new_config = deepcopy(config)

Expand Down
10 changes: 6 additions & 4 deletions rsmtool/notebooks/builtin_model.ipynb
Expand Up @@ -151,11 +151,13 @@
" ax1.set_title('Values of standardized coefficients')\n",
" ax1.set_xlabel('')\n",
" ax1.set_ylabel('')\n",
" # no pie chart if we have more than 15 features or if the feature names are long (pie chart looks ugly)\n",
" if len(features_used) <= 15 and longest_feature_name <= 10:\n",
" # no pie chart if we have more than 15 features,\n",
" # if the feature names are long (pie chart looks ugly)\n",
" # or if there are any negative coefficients.\n",
" if len(features_used) <= 15 and longest_feature_name <= 10 and (df_betas_sorted['relative']>=0).all():\n",
" ax2=fig.add_subplot(133, aspect=True)\n",
" ax2.pie(abs(df_betas_sorted['relative'].values), colors=grey_colors, \n",
" labels=df_betas_sorted['feature'].values)\n",
" labels=df_betas_sorted['feature'].values, normalize=True)\n",
" ax2.set_title('Proportional contribution of each feature')\n",
" else:\n",
" fig.set_size_inches(len(features_used), 3)\n",
Expand Down Expand Up @@ -245,7 +247,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.8.5"
}
},
"nbformat": 4,
Expand Down
29 changes: 29 additions & 0 deletions tests/test_configuration_parser.py
Expand Up @@ -3,6 +3,7 @@
import os
import tempfile
import pandas as pd
import warnings

from io import StringIO
from os import getcwd
Expand Down Expand Up @@ -305,6 +306,34 @@ def test_validate_config_min_n_without_subgroups(self):

_ = ConfigurationParser.validate_config(data)

def test_validate_config_warning_feature_file_and_transformations(self):
data = {'experiment_id': 'experiment_1',
'train_file': 'data/rsmtool_smTrain.csv',
'test_file': 'data/rsmtool_smEval.csv',
'model': 'LinearRegression',
'select_transformations': True,
'features': 'some_file.csv'}

with warnings.catch_warnings(record=True) as warning_list:
_ = ConfigurationParser.validate_config(data)
eq_(len(warning_list), 1)
ok_(issubclass(warning_list[0].category, UserWarning))


def test_validate_config_warning_feature_list_and_transformations(self):
# this should no show warnings
data = {'experiment_id': 'experiment_1',
'train_file': 'data/rsmtool_smTrain.csv',
'test_file': 'data/rsmtool_smEval.csv',
'model': 'LinearRegression',
'select_transformations': True,
'features': ['feature1', 'feature2']}

with warnings.catch_warnings(record=True) as warning_list:
_ = ConfigurationParser.validate_config(data)
eq_(len(warning_list), 0)


def test_process_fields(self):
data = {'experiment_id': 'experiment_1',
'train_file': 'data/rsmtool_smTrain.csv',
Expand Down

0 comments on commit 486cd0e

Please sign in to comment.