Skip to content

Commit

Permalink
Merge branch 'refactor-configuration-parser' into unpin-pandas-and-numpy
Browse files Browse the repository at this point in the history
  • Loading branch information
aloukina committed Mar 5, 2020
2 parents 13939a9 + fa56c99 commit 5fcbd9a
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 64 deletions.
97 changes: 50 additions & 47 deletions rsmtool/configuration_parser.py
Expand Up @@ -18,13 +18,9 @@
from copy import copy, deepcopy
from collections import Counter
from configparser import ConfigParser
from os import getcwd
from os.path import abspath

from os import getcwd, makedirs
from os.path import (abspath,
basename,
dirname,
join,
splitext)
from pathlib import Path
from ruamel import yaml

Expand All @@ -43,7 +39,7 @@
from skll.metrics import SCORERS

if HAS_RSMEXTRA:
from rsmextra.settings import (default_feature_subset_file,
from rsmextra.settings import (default_feature_subset_file, # noqa
default_feature_sign)


Expand Down Expand Up @@ -81,24 +77,24 @@ def wrapper(*args, **kwargs):
# split filepath into
# configdir and filename
filepath = args[-1]
kwargs['filename'] = basename(filepath)
kwargs['configdir'] = dirname(abspath(filepath))
kwargs['filename'] = Path(filepath).name
kwargs['configdir'] = Path(filepath).resolve().parent
# remove filepath from positional arguments
args = args[:-1]
return f(*args, **kwargs)
return wrapper
return decorator


def configure(tool_name, config_file_or_obj_or_dict):
def configure(context, config_file_or_obj_or_dict):
"""
Get the configuration for ``tool_name`` from the input
Get the configuration for ``context`` from the input
``config_file_or_obj_or_dict``.
Parameters
----------
tool_name : str
The name of the tool that is being configured. Must be one of
context : str
The context that is being configured. Must be one of
``rsmtool``, ``rsmeval``, ``rsmcompare``, ``rsmsummarize``, or
``rsmpredict``.
config_file_or_obj_or_dict : str or pathlib.Path or dict or Configuration
Expand Down Expand Up @@ -131,13 +127,13 @@ def configure(tool_name, config_file_or_obj_or_dict):

# Instantiate configuration parser object
parser = ConfigurationParser(config_file_or_obj_or_dict)
configuration = parser.parse(context=tool_name)
configuration = parser.parse(context=context)

elif isinstance(config_file_or_obj_or_dict, dict):

# directly instantiate the Configuration from the dictionary
configuration = Configuration(config_file_or_obj_or_dict,
context=tool_name)
context=context)

elif isinstance(config_file_or_obj_or_dict, Configuration):

Expand Down Expand Up @@ -216,10 +212,10 @@ def __init__(self,

# set configdir to `cwd` if not given and let the user know
if configdir is None:
configdir = getcwd()
configdir = Path(getcwd())
logging.info("Configuration directory will be set to {}".format(configdir))
else:
configdir = abspath(configdir)
configdir = Path(configdir).resolve()

self._config = configdict
self._configdir = configdir
Expand Down Expand Up @@ -322,17 +318,21 @@ def filepath(self):
filepath : str
The path for the config file.
"""
if not self.filename:

# raise a deprecation warning first
warnings.warn("The `filepath` attribute of the Configuration "
"object will be removed in RSMTool v8.0."
"Use the `configdir` and `filename` attributes "
"if you need the full path to the "
"configuration file", DeprecationWarning)

# then compute the deprecated attribute if we can
if not self._filename:
raise AttributeError('The `filepath` attribute is not defined '
'for this Configuration object.')
else:
warnings.warn("The `filepath` attribute of the Configuration "
"object will be removed in RSMTool v8.0."
"Use the `configdir` and `filename` attributes "
"if you need the full path to the "
"configuration file", DeprecationWarning)
filepath = join(self.configdir, self.filename)
return filepath
filepath = self._configdir / self._filename
return str(filepath)

@filepath.setter
def filepath(self, new_path):
Expand All @@ -353,8 +353,8 @@ def filepath(self, new_path):
"use `configdir` and `filename` if you "
"need to set a new path to the "
"configuration file", DeprecationWarning)
new_filename = basename(new_path)
new_configdir = dirname(abspath(new_path))
new_filename = Path(new_path).name
new_configdir = Path(new_path).resolve().parent
self._filename = new_filename
self._configdir = new_configdir

Expand All @@ -370,7 +370,7 @@ def configdir(self):
configdir : str
The path to the configuration reference directory
"""
return self._configdir
return str(self._configdir)

@configdir.setter
def configdir(self, new_path):
Expand All @@ -387,7 +387,10 @@ def configdir(self, new_path):

if new_path is None:
raise ValueError("The `configdir` attribute cannot be set to `None` ")
self._configdir = abspath(new_path)

# TODO: replace `Path(abspath(new_path))` with `Path(new_path).resolve()
# once this Windows bug is fixed: https://bugs.python.org/issue38671
self._configdir = Path(abspath(new_path))

@property
def filename(self):
Expand All @@ -402,7 +405,7 @@ def filename(self):
return self._filename

@filename.setter
def filename(self, new_path):
def filename(self, new_name):
"""
Set a new name of the configuration file
Expand All @@ -411,7 +414,7 @@ def filename(self, new_path):
new_name : str
New name of the configuration file
"""
self._filename = new_path
self._filename = new_name

@property
def context(self):
Expand Down Expand Up @@ -548,22 +551,22 @@ def save(self, output_dir=None):

# save a copy of the main config into the output directory
if output_dir is None:
output_dir = getcwd()
output_dir = Path(getcwd())

# Create output directory, if it does not exist
output_dir = join(output_dir, 'output')
makedirs(output_dir, exist_ok=True)
output_dir = Path(output_dir).resolve() / 'output'
output_dir.mkdir(parents=True, exist_ok=True)

id_field = ID_FIELDS[self._context]
outjson = join(output_dir,
'{}_{}.json'.format(self._config[id_field],
self._context))
experiment_id = self._config[id_field]
context = self._context
outjson = output_dir / f"{experiment_id}_{context}.json"

expected_fields = (CHECK_FIELDS[self._context]['required'] +
CHECK_FIELDS[self._context]['optional'])

output_config = {k: v for k, v in self._config.items() if k in expected_fields}
with open(outjson, 'w') as outfile:
with outjson.open(mode='w') as outfile:
json.dump(output_config, outfile, indent=4, separators=(',', ': '))

def check_exclude_listwise(self):
Expand Down Expand Up @@ -870,8 +873,8 @@ def _parse_json_file(self, filepath):
Parameters
----------
filepath : str
Path to the JSON configuration file.
filepath : pathlib.Path
A ``pathlib.Path`` object containing the JSON configuration filepath.
Returns
-------
Expand All @@ -898,12 +901,12 @@ def _parse_json_file(self, filepath):
def _parse_cfg_file(self, filepath):
"""
A private method to parse INI-style configuration files and
return a Python dictionary
return a Python dictionary.
Parameters
----------
filepath : str
Path to the CFG configuration file.
filepath : pathlib.Path
A ``pathlib.Path`` object containing the CFG configuration filepath.
Returns
-------
Expand Down Expand Up @@ -973,8 +976,8 @@ def parse(self, context='rsmtool'):
A Configuration object containing the parameters in the
file that we instantiated the parser for.
"""
_, extension = splitext(self._filename)
filepath = join(self._configdir, self._filename)
extension = Path(self._filename).suffix.lower()
filepath = self._configdir / self._filename
if extension == '.json':
configdict = self._parse_json_file(filepath)
else:
Expand Down Expand Up @@ -1163,7 +1166,7 @@ def validate_config(cls, config, context='rsmtool'):

# Check if we have the default subset file from rsmextra
if HAS_RSMEXTRA:
default_basename = basename(default_feature_subset_file)
default_basename = Path(default_feature_subset_file).name
new_config['feature_subset_file'] = default_feature_subset_file
logging.warning("You requested feature subsets but did not "
"specify any feature file. "
Expand All @@ -1178,7 +1181,7 @@ def validate_config(cls, config, context='rsmtool'):

# Check if we have the default subset file from rsmextra
if HAS_RSMEXTRA:
default_basename = basename(default_feature_subset_file)
default_basename = Path(default_feature_subset_file).name
new_config['feature_subset_file'] = default_feature_subset_file
logging.warning("You specified the expected sign of "
"correlation but did not specify a feature "
Expand Down
6 changes: 0 additions & 6 deletions rsmtool/preprocessor.py
Expand Up @@ -1673,12 +1673,6 @@ def process_data_rsmtool(self, config_obj, data_container_obj):
if feature_subset_file is not None:
feature_subset_file = DataReader.locate_files(feature_subset_file, configdir)

# get the experiment ID
experiment_id = config_obj['experiment_id']

# get the description
description = config_obj['description']

# get the column name for the labels for the training and testing data
train_label_column = config_obj['train_label_column']
test_label_column = config_obj['test_label_column']
Expand Down
15 changes: 11 additions & 4 deletions rsmtool/utils.py
Expand Up @@ -308,16 +308,19 @@ def convert_to_float(value):
return int_to_float(string_to_number(value))


def parse_json_with_comments(filename):
def parse_json_with_comments(pathlike):
"""
Parse a JSON file after removing any comments.
Comments can use either ``//`` for single-line
comments or or ``/* ... */`` for multi-line comments.
The input filepath can be a string or ``pathlib.Path``.
Parameters
----------
filename : str
Path to the input JSON file.
filename : str or os.PathLike
Path to the input JSON file either as a string
or as a ``pathlib.Path`` object.
Returns
-------
Expand All @@ -334,7 +337,11 @@ def parse_json_with_comments(filename):
comment_re = re.compile(r'(^)?[^\S\n]*/(?:\*(.*?)\*/[^\S\n]*|/[^\n]*)($)?',
re.DOTALL | re.MULTILINE)

with open(filename) as file_buff:
# if we passed in a string, convert it to a Path
if isinstance(pathlike, str):
pathlike = Path(pathlike)

with open(pathlike, 'r') as file_buff:
content = ''.join(file_buff.readlines())

# Looking for comments
Expand Down
14 changes: 7 additions & 7 deletions tests/test_configuration_parser.py
Expand Up @@ -8,7 +8,7 @@
from io import StringIO
from os import getcwd
from os.path import abspath, dirname, join

from pathlib import Path
from shutil import rmtree

from numpy.testing import assert_array_equal
Expand Down Expand Up @@ -92,7 +92,7 @@ def test_load_config_from_dict(self):
"flag_column": "abc",
"model": 'LinearRegression'}

configdir = abspath('/path/to/configdir')
configdir = Path('/path/to/configdir').resolve()
configuration = Configuration(data, configdir=configdir)
eq_(configuration._configdir, configdir)

Expand All @@ -103,7 +103,7 @@ def test_load_config_from_dict_no_configdir(self):
"flag_column": "abc",
"model": 'LinearRegression'}

configdir = getcwd()
configdir = Path(getcwd())
configuration = Configuration(data)
eq_(configuration._configdir, configdir)

Expand Down Expand Up @@ -513,7 +513,7 @@ def test_init_with_filepath_as_positional_argument(self):

config = Configuration(config_dict, filepath)
eq_(config._filename, 'file.json')
eq_(config._configdir, abspath('some/path'))
eq_(config._configdir, Path('some/path').resolve())
assert len(w) == 1
assert issubclass(w[-1].category, DeprecationWarning)

Expand All @@ -526,7 +526,7 @@ def test_init_with_filename_as_kword_argument(self):

config = Configuration(config_dict, filename=filename)
eq_(config._filename, filename)
eq_(config.configdir, abspath(getcwd()))
eq_(config.configdir, getcwd())

def test_init_with_filename_and_configdir_as_kword_argument(self):
filename = 'file.json'
Expand All @@ -540,7 +540,7 @@ def test_init_with_filename_and_configdir_as_kword_argument(self):
filename=filename,
configdir=configdir)
eq_(config._filename, filename)
eq_(config._configdir, abspath(configdir))
eq_(config._configdir, Path(configdir).resolve())

def test_init_with_configdir_only_as_kword_argument(self):
configdir = 'some/path'
Expand All @@ -552,7 +552,7 @@ def test_init_with_configdir_only_as_kword_argument(self):
config = Configuration(config_dict,
configdir=configdir)
eq_(config._filename, None)
eq_(config._configdir, abspath(configdir))
eq_(config._configdir, Path(configdir).resolve())

@ raises(ValueError)
def test_init_with_filepath_positional_and_filename_keyword(self):
Expand Down

0 comments on commit 5fcbd9a

Please sign in to comment.