Merge pull request #645 from EducationalTestingService/string_concate…

…nation_etc Fix up some string concatenation, line length issues, etc.
EducationalTestingService · Nov 24, 2020 · 26b96c2 · 26b96c2
2 parents 48dc473 + 45d13af
commit 26b96c2
Show file tree

Hide file tree

Showing 32 changed files with 1,236 additions and 994 deletions.
diff --git a/examples/make_titanic_example_data.py b/examples/make_titanic_example_data.py
@@ -19,8 +19,9 @@ def main():
     """
     Create directories and split CSV files into subsets.
     """
-    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - '
-                                '%(message)s'), level=logging.INFO)
+    logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - '
+                               '%(message)s',
+                        level=logging.INFO)
     logger = logging.getLogger(__name__)
 
     # Create dictionary of subsets to use for creating split feature files

diff --git a/skll/config/__init__.py b/skll/config/__init__.py
@@ -757,12 +757,10 @@ def parse_config_file(config_path, log_level=logging.INFO):
                          'learning_curve.')
     if task == 'learning_curve':
         if len(grid_objectives) > 0:
-            raise ValueError("The \"objectives\" option "
-                             "is no longer supported for the "
-                             "\"learning_curve\" "
-                             "task. Please use the \"metrics\" "
-                             "option in the [Output] "
-                             "section instead.")
+            raise ValueError("The \"objectives\" option is no longer supported"
+                             " for the \"learning_curve\" task. Please use the"
+                             " \"metrics\" option in the [Output] section "
+                             "instead.")
         if len(output_metrics) == 0:
             raise ValueError('The "metrics" option must be set when '
                              'the task is "learning_curve".')

diff --git a/skll/config/utils.py b/skll/config/utils.py
@@ -173,9 +173,8 @@ def _parse_and_validate_metrics(metrics, option_name, logger=None):
     # `mean_squared_error` is no longer supported.
     # It has been replaced by `neg_mean_squared_error`
     if 'mean_squared_error' in metrics:
-        raise ValueError("The metric \"mean_squared_error\" "
-                         "is no longer supported."
-                         " please use the metric "
+        raise ValueError("The metric \"mean_squared_error\" is no longer "
+                         "supported. please use the metric "
                          "\"neg_mean_squared_error\" instead.")
 
     return metrics
diff --git a/skll/data/writers.py b/skll/data/writers.py
@@ -110,7 +110,7 @@ def for_path(cls, path, feature_set, **kwargs):
             appropriate for the given path.
         """
         # Get lowercase extension for file extension checking
-        ext = '.' + path.rsplit('.', 1)[-1].lower()
+        ext = f'.{path.rsplit(".", 1)[-1].lower()}'
         return EXT_TO_WRITER[ext](path, feature_set, **kwargs)
 
     def write(self):
@@ -410,7 +410,7 @@ def __init__(self, path, feature_set, pandas_kwargs=None, **kwargs):
         self.id_col = kwargs.pop('id_col', 'id')
         super(CSVWriter, self).__init__(path, feature_set, **kwargs)
         self._pandas_kwargs = {} if pandas_kwargs is None else pandas_kwargs
-        self._sep = self._pandas_kwargs.pop('sep', str(','))
+        self._sep = self._pandas_kwargs.pop('sep', ',')
         self._index = self._pandas_kwargs.pop('index', False)
         self._use_pandas = True
 

diff --git a/skll/experiments/__init__.py b/skll/experiments/__init__.py
@@ -473,7 +473,7 @@ def _classify_featureset(args):
 
         # write out the cv folds if required
         if task == 'cross_validate' and save_cv_folds:
-            skll_fold_ids_file = experiment_name + '_skll_fold_ids.csv'
+            skll_fold_ids_file = f'{experiment_name}_skll_fold_ids.csv'
             with open(join(results_path, skll_fold_ids_file),
                       'w') as output_file:
                 _write_skll_folds(skll_fold_ids, output_file)
@@ -602,21 +602,21 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
                             expanded_fs.append(sorted(featureset -
                                                       set(excluded_features)))
                             expanded_fs_names.append(
-                                featureset_name
-                                + '_minus_'
-                                + _munge_featureset_name(excluded_features))
+                                f'{featureset_name}_minus_'
+                                f'{_munge_featureset_name(excluded_features)}'
+                            )
                 # Otherwise, just expand removing the specified number at a time
                 else:
                     for excluded_features in combinations(features, ablation):
                         expanded_fs.append(sorted(featureset -
                                                   set(excluded_features)))
                         expanded_fs_names.append(
-                            featureset_name
-                            + '_minus_'
-                            + _munge_featureset_name(excluded_features))
+                            f'{featureset_name}_minus_'
+                            f'{_munge_featureset_name(excluded_features)}'
+                        )
                 # Also add version with nothing removed as baseline
                 expanded_fs.append(features)
-                expanded_fs_names.append(featureset_name + '_all')
+                expanded_fs_names.append(f'{featureset_name}_all')
 
             # Replace original feature set lists
             featuresets = expanded_fs
@@ -636,15 +636,14 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
         # allowed
         for featureset_name in featureset_names:
             if len(featureset_name) > 210:
-                raise OSError('System generated file length '
-                              f'"{featureset_name}" exceeds the maximum '
-                              'length supported.  Please specify names of '
-                              'your datasets with "featureset_names".  If you'
-                              ' are running ablation experiment, please '
-                              'reduce the length of the features in '
-                              '"featuresets" because the auto-generated name '
-                              'would be longer than the file system can '
-                              'handle')
+                raise OSError(
+                    f'System generated file length "{featureset_name}" '
+                    'exceeds the maximum length supported.  Please specify '
+                    'names of your datasets with "featureset_names".  If you '
+                    'are running ablation experiment, please reduce the '
+                    'length of the features in "featuresets" because the '
+                    'auto-generated name would be longer than the file system'
+                    ' can handle')
 
         # if the task is learning curve, and ``metrics`` was specified, then
         # assign the value of ``metrics`` to ``grid_objectives`` - this lets
@@ -781,14 +780,14 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
 
         # write out the summary results file
         if (task == 'cross_validate' or task == 'evaluate') and write_summary:
-            summary_file_name = experiment_name + '_summary.tsv'
+            summary_file_name = f'{experiment_name}_summary.tsv'
             with open(join(results_path,
                            summary_file_name), 'w', newline='') as output_file:
                 _write_summary_file(result_json_paths,
                                     output_file,
                                     ablation=ablation)
         elif task == 'learning_curve':
-            output_file_name = experiment_name + '_summary.tsv'
+            output_file_name = f'{experiment_name}_summary.tsv'
             output_file_path = join(results_path, output_file_name)
             with open(output_file_path, 'w', newline='') as output_file:
                 _write_learning_curve_file(result_json_paths, output_file)

diff --git a/skll/experiments/output.py b/skll/experiments/output.py
@@ -196,8 +196,7 @@ def _print_fancy_output(learner_result_dicts, output_file=sys.stdout):
         raise ValueError('Result dictionary list is empty!')
 
     lrd = learner_result_dicts[0]
-    print(f'Experiment Name: {lrd["experiment_name"]}',
-          file=output_file)
+    print(f'Experiment Name: {lrd["experiment_name"]}', file=output_file)
     print(f'SKLL Version: {lrd["version"]}', file=output_file)
     print(f'Training Set: {lrd["train_set_name"]}', file=output_file)
     print(f'Training Set Size: {lrd["train_set_size"]}', file=output_file)