Skip to content

Commit

Permalink
Merge pull request #645 from EducationalTestingService/string_concate…
Browse files Browse the repository at this point in the history
…nation_etc

Fix up some string concatenation, line length issues, etc.
  • Loading branch information
desilinguist committed Nov 24, 2020
2 parents 48dc473 + 45d13af commit 26b96c2
Show file tree
Hide file tree
Showing 32 changed files with 1,236 additions and 994 deletions.
5 changes: 3 additions & 2 deletions examples/make_titanic_example_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ def main():
"""
Create directories and split CSV files into subsets.
"""
logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - '
'%(message)s'), level=logging.INFO)
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - '
'%(message)s',
level=logging.INFO)
logger = logging.getLogger(__name__)

# Create dictionary of subsets to use for creating split feature files
Expand Down
10 changes: 4 additions & 6 deletions skll/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,12 +757,10 @@ def parse_config_file(config_path, log_level=logging.INFO):
'learning_curve.')
if task == 'learning_curve':
if len(grid_objectives) > 0:
raise ValueError("The \"objectives\" option "
"is no longer supported for the "
"\"learning_curve\" "
"task. Please use the \"metrics\" "
"option in the [Output] "
"section instead.")
raise ValueError("The \"objectives\" option is no longer supported"
" for the \"learning_curve\" task. Please use the"
" \"metrics\" option in the [Output] section "
"instead.")
if len(output_metrics) == 0:
raise ValueError('The "metrics" option must be set when '
'the task is "learning_curve".')
Expand Down
5 changes: 2 additions & 3 deletions skll/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,8 @@ def _parse_and_validate_metrics(metrics, option_name, logger=None):
# `mean_squared_error` is no longer supported.
# It has been replaced by `neg_mean_squared_error`
if 'mean_squared_error' in metrics:
raise ValueError("The metric \"mean_squared_error\" "
"is no longer supported."
" please use the metric "
raise ValueError("The metric \"mean_squared_error\" is no longer "
"supported. please use the metric "
"\"neg_mean_squared_error\" instead.")

return metrics
4 changes: 2 additions & 2 deletions skll/data/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def for_path(cls, path, feature_set, **kwargs):
appropriate for the given path.
"""
# Get lowercase extension for file extension checking
ext = '.' + path.rsplit('.', 1)[-1].lower()
ext = f'.{path.rsplit(".", 1)[-1].lower()}'
return EXT_TO_WRITER[ext](path, feature_set, **kwargs)

def write(self):
Expand Down Expand Up @@ -410,7 +410,7 @@ def __init__(self, path, feature_set, pandas_kwargs=None, **kwargs):
self.id_col = kwargs.pop('id_col', 'id')
super(CSVWriter, self).__init__(path, feature_set, **kwargs)
self._pandas_kwargs = {} if pandas_kwargs is None else pandas_kwargs
self._sep = self._pandas_kwargs.pop('sep', str(','))
self._sep = self._pandas_kwargs.pop('sep', ',')
self._index = self._pandas_kwargs.pop('index', False)
self._use_pandas = True

Expand Down
37 changes: 18 additions & 19 deletions skll/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def _classify_featureset(args):

# write out the cv folds if required
if task == 'cross_validate' and save_cv_folds:
skll_fold_ids_file = experiment_name + '_skll_fold_ids.csv'
skll_fold_ids_file = f'{experiment_name}_skll_fold_ids.csv'
with open(join(results_path, skll_fold_ids_file),
'w') as output_file:
_write_skll_folds(skll_fold_ids, output_file)
Expand Down Expand Up @@ -602,21 +602,21 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
expanded_fs.append(sorted(featureset -
set(excluded_features)))
expanded_fs_names.append(
featureset_name
+ '_minus_'
+ _munge_featureset_name(excluded_features))
f'{featureset_name}_minus_'
f'{_munge_featureset_name(excluded_features)}'
)
# Otherwise, just expand removing the specified number at a time
else:
for excluded_features in combinations(features, ablation):
expanded_fs.append(sorted(featureset -
set(excluded_features)))
expanded_fs_names.append(
featureset_name
+ '_minus_'
+ _munge_featureset_name(excluded_features))
f'{featureset_name}_minus_'
f'{_munge_featureset_name(excluded_features)}'
)
# Also add version with nothing removed as baseline
expanded_fs.append(features)
expanded_fs_names.append(featureset_name + '_all')
expanded_fs_names.append(f'{featureset_name}_all')

# Replace original feature set lists
featuresets = expanded_fs
Expand All @@ -636,15 +636,14 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
# allowed
for featureset_name in featureset_names:
if len(featureset_name) > 210:
raise OSError('System generated file length '
f'"{featureset_name}" exceeds the maximum '
'length supported. Please specify names of '
'your datasets with "featureset_names". If you'
' are running ablation experiment, please '
'reduce the length of the features in '
'"featuresets" because the auto-generated name '
'would be longer than the file system can '
'handle')
raise OSError(
f'System generated file length "{featureset_name}" '
'exceeds the maximum length supported. Please specify '
'names of your datasets with "featureset_names". If you '
'are running ablation experiment, please reduce the '
'length of the features in "featuresets" because the '
'auto-generated name would be longer than the file system'
' can handle')

# if the task is learning curve, and ``metrics`` was specified, then
# assign the value of ``metrics`` to ``grid_objectives`` - this lets
Expand Down Expand Up @@ -781,14 +780,14 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',

# write out the summary results file
if (task == 'cross_validate' or task == 'evaluate') and write_summary:
summary_file_name = experiment_name + '_summary.tsv'
summary_file_name = f'{experiment_name}_summary.tsv'
with open(join(results_path,
summary_file_name), 'w', newline='') as output_file:
_write_summary_file(result_json_paths,
output_file,
ablation=ablation)
elif task == 'learning_curve':
output_file_name = experiment_name + '_summary.tsv'
output_file_name = f'{experiment_name}_summary.tsv'
output_file_path = join(results_path, output_file_name)
with open(output_file_path, 'w', newline='') as output_file:
_write_learning_curve_file(result_json_paths, output_file)
Expand Down
3 changes: 1 addition & 2 deletions skll/experiments/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,7 @@ def _print_fancy_output(learner_result_dicts, output_file=sys.stdout):
raise ValueError('Result dictionary list is empty!')

lrd = learner_result_dicts[0]
print(f'Experiment Name: {lrd["experiment_name"]}',
file=output_file)
print(f'Experiment Name: {lrd["experiment_name"]}', file=output_file)
print(f'SKLL Version: {lrd["version"]}', file=output_file)
print(f'Training Set: {lrd["train_set_name"]}', file=output_file)
print(f'Training Set Size: {lrd["train_set_size"]}', file=output_file)
Expand Down

0 comments on commit 26b96c2

Please sign in to comment.