Merge pull request #469 from EducationalTestingService/468-explicit-n…

…ormalization-for-coefficient-pie-chart Fix pie chart and other minor changes
EducationalTestingService · Sep 29, 2020 · 486cd0e · 486cd0e
2 parents cbc66e9 + ee4b0e4
commit 486cd0e
Show file tree

Hide file tree

Showing 4 changed files with 89 additions and 37 deletions.
diff --git a/doc/contributing.rst b/doc/contributing.rst
@@ -179,6 +179,8 @@ Here are some advanced tips and tricks when working with RSMTool tests.
 
 4. The ``--pdb-errors`` and ``--pdb-failures`` options for ``nosetests`` are your friends. If you encounter test errors or test failures where the cause may not be immediately clear, re-run the ``nosetests`` command with the appropriate option. Doing so will drop you into an interactive PDB session as soon as a error (or failure) is encountered and then you inspect the variables at that point (or use "u" and "d" to go up and down the call stack). This may be particularly useful for tests in ``tests/test_cli.py`` that use ``subprocess.run()``. If these tests are erroring out, use ``--pdb-errors`` and inspect the "stderr" variable in the resulting PDB session to see what the error is.
 
+5. In RSMTool 8.0.1 and later, the tests will pass even if any of the reports contain warnings. To catch any warnings that may appear in the reports, run the tests in strict mode (``STRICT=1 nosetests --nologcapture tests``).
+
 .. rubric:: Footnotes
 
 .. [#] For older versions of conda, you may have to do ``source activate rsmtool`` on Linux/macOS and ``activate rsmtool`` on Windows.
diff --git a/rsmtool/configuration_parser.py b/rsmtool/configuration_parser.py
@@ -1,4 +1,6 @@
 """
+Configuration parser.
+
 Classes related to parsing configuration files
 and creating configuration objects.
 
@@ -11,6 +13,7 @@
 
 import json
 import logging
+import warnings
 import re
 
 from copy import copy, deepcopy
@@ -41,6 +44,8 @@
 
 def configure(context, config_file_or_obj_or_dict):
     """
+    Create a Configuration object.
+
     Get the configuration for ``context`` from the input
     ``config_file_or_obj_or_dict``.
 
@@ -112,7 +117,9 @@ def configure(context, config_file_or_obj_or_dict):
 
 class Configuration:
     """
-    Configuration class, which encapsulates all of the
+    Configuration class.
+
+    Encapsulates all of the
     configuration parameters and methods to access these
     parameters.
     """
@@ -145,7 +152,6 @@ def __init__(self,
             The context of the tool.
             Defaults to 'rsmtool'.
         """
-
         if not isinstance(configdict, dict):
             raise TypeError('The input must be a dictionary.')
 
@@ -166,6 +172,8 @@ def __init__(self,
 
     def __contains__(self, key):
         """
+        Key check.
+
         Check if configuration object
         contains a given key.
 
@@ -223,6 +231,8 @@ def __len__(self):
 
     def __str__(self):
         """
+        Return string representation.
+
         Return a string representation of the underlying configuration
         dictionary.
 
@@ -255,6 +265,8 @@ def __iter__(self):
     @property
     def configdir(self):
         """
+        Get the path to configuration directory.
+
         Get the path to the configuration reference directory that
         will be used to resolve any relative paths in
         the configuration.
@@ -269,7 +281,7 @@ def configdir(self):
     @configdir.setter
     def configdir(self, new_path):
         """
-        Set a new configuration reference directory
+        Set a new configuration reference directory.
 
         Parameters
         ----------
@@ -278,26 +290,22 @@ def configdir(self, new_path):
             directory used to resolve any relative paths
             in the configuration object.
         """
-
         if new_path is None:
             raise ValueError("The `configdir` attribute cannot be set to `None` ")
 
         # TODO: replace `Path(abspath(new_path))` with `Path(new_path).resolve()
         # once this Windows bug is fixed: https://bugs.python.org/issue38671
         self._configdir = Path(abspath(new_path))
 
-
     @property
     def context(self):
-        """
-        Get the context.
-        """
+        """Get the context."""
         return self._context
 
     @context.setter
     def context(self, new_context):
         """
-        Set a new context
+        Set a new context.
 
         Parameters
         ----------
@@ -373,6 +381,8 @@ def items(self):
 
     def pop(self, key, default=None):
         """
+        Remove and return value.
+
         Remove and returns an element from
         the object having the given key.
 
@@ -419,7 +429,6 @@ def save(self, output_dir=None):
         output_dir : str
             The path to the output directory.
         """
-
         # save a copy of the main config into the output directory
         if output_dir is None:
             output_dir = Path(getcwd())
@@ -438,6 +447,8 @@ def save(self, output_dir=None):
 
     def check_exclude_listwise(self):
         """
+        Check for candidate exclusion.
+
         Check if we are excluding candidates
         based on number of responses, and
         add this to the configuration file
@@ -456,8 +467,9 @@ def check_flag_column(self,
                           flag_column='flag_column',
                           partition='unknown'):
         """
-        Make sure the `flag_column` field is in the correct format. Get
-        flag columns and values for filtering if any and convert single
+        Make sure the `flag_column` field is in the correct format.
+
+        Get flag columns and values for filtering if any and convert single
         values to lists. Raises an exception if `flag_column` is not
         correctly specified.
 
@@ -549,9 +561,10 @@ def check_flag_column(self,
                                               model_eval))
         return new_filter_dict
 
-
     def get_trim_min_max_tolerance(self):
         """
+        Get trim min, trim max, and tolerance.
+
         Get the specified trim min and max,
         and trim_tolerance if any,
         and make sure they are numeric.
@@ -621,6 +634,7 @@ def get_default_converter(self):
     def get_names_and_paths(self, keys, names):
         """
         Get a a list of values, given keys.
+
         Remove any values that are None.
 
         Parameters
@@ -641,7 +655,6 @@ def get_names_and_paths(self, keys, names):
         ValueError
             If there are any duplicate keys or names.
         """
-
         assert len(keys) == len(names)
 
         # Make sure keys are not duplicated
@@ -676,9 +689,7 @@ def get_names_and_paths(self, keys, names):
 
 
 class ConfigurationParser:
-    """
-    A `ConfigurationParser` class to create a `Configuration` object.
-    """
+    """A ``ConfigurationParser`` class to create a ``Configuration`` object."""
 
     def __init__(self, pathlike):
         """
@@ -728,7 +739,9 @@ def __init__(self, pathlike):
     @staticmethod
     def _fix_json(json_string):
         """
-        Takes a bit of JSON that might have bad quotes
+        Fix json.
+
+        Take a bit of JSON that might have bad quotes
         or capitalized booleans and fixes that stuff.
 
         Parameters
@@ -748,6 +761,8 @@ def _fix_json(json_string):
 
     def _parse_json_file(self, filepath):
         """
+        Parse json.
+
         A private method to parse JSON configuration files and return
         a Python dictionary.
 
@@ -778,9 +793,10 @@ def _parse_json_file(self, filepath):
 
         return configdict
 
-
     def parse(self, context='rsmtool'):
         """
+        Parse configuration file.
+
         Parse the configuration file for which this parser was
         instantiated.
 
@@ -801,7 +817,6 @@ def parse(self, context='rsmtool'):
             A Configuration object containing the parameters in the
             file that we instantiated the parser for.
         """
-
         filepath = self._configdir / self._filename
         configdict = self._parse_json_file(filepath)
 
@@ -815,6 +830,8 @@ def parse(self, context='rsmtool'):
     @classmethod
     def validate_config(cls, config, context='rsmtool'):
         """
+        Validate configuration file.
+
         Ensure that all required fields are specified, add default values
         values for all unspecified fields, and ensure that all specified
         fields are valid.
@@ -844,7 +861,6 @@ def validate_config(cls, config, context='rsmtool'):
         ValueError
             If config does not exist, and no config passed.
         """
-
         # make a copy of the given parameter dictionary
         new_config = deepcopy(config)
 
@@ -954,8 +970,8 @@ def validate_config(cls, config, context='rsmtool'):
         # linear regression model
         if new_config['skll_objective']:
             if not is_skll_model(new_config['model']):
-                logging.warning("You specified a custom SKLL objective but also chose a "
-                                "non-SKLL model. The objective will be ignored.")
+                warnings.warn("You specified a custom SKLL objective but also chose a "
+                              "non-SKLL model. The objective will be ignored.")
             else:
                 if new_config['skll_objective'] not in SCORERS:
                     raise ValueError("Invalid SKLL objective. Please refer to the SKLL "
@@ -968,9 +984,9 @@ def validate_config(cls, config, context='rsmtool'):
         # at run time for any invalid parameters
         if new_config['skll_fixed_parameters']:
             if not is_skll_model(new_config['model']):
-                logging.warning("You specified custom SKLL fixed parameters but "
-                                "also chose a non-SKLL model. The parameters will "
-                                "be ignored.")
+                warnings.warn("You specified custom SKLL fixed parameters but "
+                              "also chose a non-SKLL model. The parameters will "
+                              "be ignored.")
 
         # 10. Check that if we are running rsmtool to ask for
         # expected scores then the SKLL model type must actually
@@ -994,12 +1010,14 @@ def validate_config(cls, config, context='rsmtool'):
         # 12. Raise a warning if we are specifiying a feature file but also
         # telling the system to automatically select transformations
         if new_config['features'] and new_config['select_transformations']:
-            logging.warning("You specified a feature file but also set "
-                            "`select_transformations` to True. Any "
-                            "transformations or signs specified in "
-                            "the feature file will be overwritten by "
-                            "the automatically selected transformations "
-                            "and signs.")
+            # Show a warning unless a user passed a list of features.
+            if not isinstance(new_config['features'], list):
+                warnings.warn("You specified a feature file but also set "
+                              "`select_transformations` to True. Any "
+                              "transformations or signs specified in "
+                              "the feature file will be overwritten by "
+                              "the automatically selected transformations "
+                              "and signs.")
 
         # 13. If we have `experiment_names`, check that the length of the list
         # matches the list of experiment_dirs.
@@ -1039,6 +1057,8 @@ def validate_config(cls, config, context='rsmtool'):
     @classmethod
     def process_config(cls, config):
         """
+        Process configuration file.
+
         Converts fields which are read in as string to the
         appropriate format. Fields which can take multiple
         string values are converted to lists if they have
@@ -1061,7 +1081,6 @@ def process_config(cls, config):
         NameError
             If config does not exist, or no config read.
         """
-
         # Get the parameter dictionary
         new_config = deepcopy(config)
 

diff --git a/rsmtool/notebooks/builtin_model.ipynb b/rsmtool/notebooks/builtin_model.ipynb
@@ -151,11 +151,13 @@
     "    ax1.set_title('Values of standardized coefficients')\n",
     "    ax1.set_xlabel('')\n",
     "    ax1.set_ylabel('')\n",
-    "    # no pie chart if we have more than 15 features or if the feature names are long (pie chart looks ugly)\n",
-    "    if len(features_used) <= 15 and longest_feature_name <= 10:\n",
+    "    # no pie chart if we have more than 15 features,\n",
+    "    # if the feature names are long (pie chart looks ugly)\n",
+    "    # or if there are any negative coefficients.\n",
+    "    if len(features_used) <= 15 and longest_feature_name <= 10 and (df_betas_sorted['relative']>=0).all():\n",
     "        ax2=fig.add_subplot(133, aspect=True)\n",
     "        ax2.pie(abs(df_betas_sorted['relative'].values), colors=grey_colors, \n",
-    "            labels=df_betas_sorted['feature'].values)\n",
+    "            labels=df_betas_sorted['feature'].values, normalize=True)\n",
     "        ax2.set_title('Proportional contribution of each feature')\n",
     "    else:\n",
     "        fig.set_size_inches(len(features_used), 3)\n",
@@ -245,7 +247,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,

diff --git a/tests/test_configuration_parser.py b/tests/test_configuration_parser.py
@@ -3,6 +3,7 @@
 import os
 import tempfile
 import pandas as pd
+import warnings
 
 from io import StringIO
 from os import getcwd
@@ -305,6 +306,34 @@ def test_validate_config_min_n_without_subgroups(self):
 
         _ = ConfigurationParser.validate_config(data)
 
+    def test_validate_config_warning_feature_file_and_transformations(self):
+        data = {'experiment_id': 'experiment_1',
+                'train_file': 'data/rsmtool_smTrain.csv',
+                'test_file': 'data/rsmtool_smEval.csv',
+                'model': 'LinearRegression',
+                'select_transformations': True,
+                'features': 'some_file.csv'}
+
+        with warnings.catch_warnings(record=True) as warning_list:
+            _ = ConfigurationParser.validate_config(data)
+            eq_(len(warning_list), 1)
+            ok_(issubclass(warning_list[0].category, UserWarning))
+
+
+    def test_validate_config_warning_feature_list_and_transformations(self):
+        # this should no show warnings
+        data = {'experiment_id': 'experiment_1',
+                'train_file': 'data/rsmtool_smTrain.csv',
+                'test_file': 'data/rsmtool_smEval.csv',
+                'model': 'LinearRegression',
+                'select_transformations': True,
+                'features': ['feature1', 'feature2']}
+
+        with warnings.catch_warnings(record=True) as warning_list:
+            _ = ConfigurationParser.validate_config(data)
+            eq_(len(warning_list), 0)
+
+
     def test_process_fields(self):
         data = {'experiment_id': 'experiment_1',
                 'train_file': 'data/rsmtool_smTrain.csv',