Merge pull request #642 from EducationalTestingService/feature/reduce…

…_code_duplication_in_tests Simplify and deduplicate test code
EducationalTestingService · Nov 13, 2020 · 91f4d19 · 91f4d19
2 parents 3dafc5f + 2daaa24
commit 91f4d19
Show file tree

Hide file tree

Showing 18 changed files with 764 additions and 1,215 deletions.
diff --git a/.gitignore b/.gitignore
@@ -20,43 +20,3 @@ examples/*/test
 examples/*/train
 examples/*/train+dev
 examples/*/output
-
-tests/configs/test_cv_folds1.cfg
-tests/configs/test_cv_folds2.cfg
-tests/configs/test_predict.cfg
-tests/configs/test_regression1.cfg
-tests/configs/test_summary.cfg
-tests/output
-tests/test
-tests/train/test_cv_folds1.csv
-tests/train/test_cv_folds1.jsonlines
-tests/train/test_regression1.jsonlines
-tests/train/test_summary.jsonlines
-tests/configs/test_sparse.cfg
-tests/train/test_sparse.jsonlines
-tests/train/test_merging
-tests/train/test_cv_folds.*
-tests/configs/test_ablation.cfg
-tests/configs/test_ablation_feature_hasher.cfg
-tests/configs/test_ablation_feature_hasher_sampler.cfg
-tests/configs/test_ablation_sampler.cfg
-tests/configs/test_class_map.cfg
-tests/configs/test_class_map_feature_hasher.cfg
-tests/configs/test_cv_folds1_feature_hasher_sampler.cfg
-tests/configs/test_cv_folds1_sampler.cfg
-tests/configs/test_cv_folds2_feature_hasher.cfg
-tests/configs/test_cv_folds2_feature_hasher_sampler.cfg
-tests/configs/test_cv_folds2_sampler.cfg
-tests/configs/test_predict_feature_hasher.cfg
-tests/configs/test_regression1_feature_hasher.cfg
-tests/configs/test_scaling_with.cfg
-tests/configs/test_scaling_with_feature_hasher.cfg
-tests/configs/test_scaling_without.cfg
-tests/configs/test_scaling_without_feature_hasher.cfg
-tests/configs/test_single_file.cfg
-tests/configs/test_sparse_feature_hasher.cfg
-tests/configs/test_sparse_feature_hasher_sampler.cfg
-tests/configs/test_sparse_sampler.cfg
-tests/configs/test_summary_feature_hasher.cfg
-tests/train/*.jsonlines
-tests/train/test_conversion
diff --git a/skll/data/readers.py b/skll/data/readers.py
@@ -280,7 +280,7 @@ def feat_dict_generator():
                 for ex_num, (_, _, feat_dict) in enumerate(self._sub_read(f)):
                     yield feat_dict
                     if ex_num % 100 == 0:
-                        self._print_progress('{100 * ex_num / total:.8}%')
+                        self._print_progress(f'{100 * ex_num / total:.8}%')
                 self._print_progress("100%")
 
         # extract the features dictionary

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,10 @@
+from os.path import abspath, dirname, join
+
+_my_dir = abspath(dirname(__file__))
+config_dir = join(_my_dir, 'configs')
+backward_compatibility_dir = join(_my_dir, 'backward_compatibility')
+examples_dir = join(dirname(_my_dir), 'examples')
+output_dir = join(_my_dir, 'output')
+other_dir = join(_my_dir, 'other')
+train_dir = join(_my_dir, 'train')
+test_dir = join(_my_dir, 'test')
diff --git a/tests/test_ablation.py b/tests/test_ablation.py
@@ -10,51 +10,43 @@
 
 import csv
 import json
-import os
 
 from glob import glob
-from os.path import abspath, dirname, exists, join
+from os.path import join
+from pathlib import Path
 
 from nose.tools import eq_
 from skll.experiments import run_configuration
 from skll.utils.constants import KNOWN_DEFAULT_PARAM_GRIDS
 
+from tests import config_dir, output_dir, test_dir, train_dir
 from tests.utils import (create_jsonlines_feature_files,
-                         fill_in_config_paths)
+                         fill_in_config_paths,
+                         remove_jsonlines_feature_files,
+                         unlink)
 
 
 _ALL_MODELS = list(KNOWN_DEFAULT_PARAM_GRIDS.keys())
-_my_dir = abspath(dirname(__file__))
 
 
 def setup():
     """
     Create necessary directories for testing.
     """
-    train_dir = join(_my_dir, 'train')
-    if not exists(train_dir):
-        os.makedirs(train_dir)
-    test_dir = join(_my_dir, 'test')
-    if not exists(test_dir):
-        os.makedirs(test_dir)
-    output_dir = join(_my_dir, 'output')
-    if not exists(output_dir):
-        os.makedirs(output_dir)
+    for dir_path in [train_dir, test_dir, output_dir]:
+        Path(dir_path).mkdir(exist_ok=True)
 
     # create jsonlines feature files
-    train_path = join(_my_dir, 'train')
-    create_jsonlines_feature_files(train_path)
+    create_jsonlines_feature_files(train_dir)
 
 
 def tearDown():
     """
     Clean up after tests.
     """
-    output_dir = join(_my_dir, 'output')
-    config_dir = join(_my_dir, 'configs')
 
     for output_file in glob(join(output_dir, 'ablation_cv_*')):
-        os.unlink(output_file)
+        unlink(output_file)
 
     config_files = ['test_ablation.cfg',
                     'test_ablation_all_combos.cfg',
@@ -65,8 +57,9 @@ def tearDown():
                     'test_ablation_feature_hasher_sampler.cfg',
                     'test_ablation_feature_hasher_sampler_all_combos.cfg']
     for cf in config_files:
-        if exists(join(config_dir, cf)):
-            os.unlink(join(config_dir, cf))
+        unlink(Path(config_dir) / cf)
+
+    remove_jsonlines_feature_files(train_dir)
 
 
 def check_ablation_rows(reader):
@@ -98,24 +91,21 @@ def test_ablation_cv():
     Test ablation + cross-validation
     """
 
-    config_template_path = join(_my_dir,
-                                'configs',
-                                'test_ablation.template.cfg')
+    config_template_path = join(config_dir, 'test_ablation.template.cfg')
     config_path = fill_in_config_paths(config_template_path)
 
     run_configuration(config_path, quiet=True, ablation=1)
 
     # read in the summary file and make sure it has
     # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
     # lines
-    with open(join(_my_dir, 'output', 'ablation_cv_plain_summary.tsv')) as f:
+    with open(join(output_dir, 'ablation_cv_plain_summary.tsv')) as f:
         reader = csv.DictReader(f, dialect=csv.excel_tab)
         num_rows = check_ablation_rows(reader)
         eq_(num_rows, 154)
 
     # make sure there are 7 ablated featuresets * 2 learners = 12 results files
-    num_result_files = len(glob(join(_my_dir,
-                                     'output',
+    num_result_files = len(glob(join(output_dir,
                                      'ablation_cv_plain*.results')))
     eq_(num_result_files, 14)
 
@@ -125,8 +115,7 @@ def test_ablation_cv_all_combos():
     Test ablation all-combos + cross-validation
     """
 
-    config_template_path = join(_my_dir,
-                                'configs',
+    config_template_path = join(config_dir,
                                 'test_ablation_all_combos.template.cfg')
     config_path = fill_in_config_paths(config_template_path)
 
@@ -135,15 +124,14 @@ def test_ablation_cv_all_combos():
     # read in the summary file and make sure it has
     # 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
     # lines
-    with open(join(_my_dir, 'output', 'ablation_cv_plain_all_combos_summary.tsv')) as f:
+    with open(join(output_dir, 'ablation_cv_plain_all_combos_summary.tsv')) as f:
         reader = csv.DictReader(f, dialect=csv.excel_tab)
         num_rows = check_ablation_rows(reader)
         eq_(num_rows, 220)
 
     # make sure there are 10 ablated featuresets * 2 learners = 20 results
     # files
-    num_result_files = len(glob(join(_my_dir,
-                                     'output',
+    num_result_files = len(glob(join(output_dir,
                                      'ablation_cv_plain_all_combos*results')))
     eq_(num_result_files, 20)
 
@@ -153,8 +141,7 @@ def test_ablation_cv_feature_hasher():
     Test ablation + cross-validation + feature hashing
     """
 
-    config_template_path = join(_my_dir,
-                                'configs',
+    config_template_path = join(config_dir,
                                 'test_ablation_feature_hasher.template.cfg')
     config_path = fill_in_config_paths(config_template_path)
 
@@ -163,16 +150,14 @@ def test_ablation_cv_feature_hasher():
     # read in the summary file and make sure it has
     # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
     # lines
-    with open(join(_my_dir,
-                   'output',
+    with open(join(output_dir,
                    'ablation_cv_feature_hasher_summary.tsv')) as f:
         reader = csv.DictReader(f, dialect=csv.excel_tab)
         num_rows = check_ablation_rows(reader)
         eq_(num_rows, 154)
 
     # make sure there are 7 ablated featuresets * 2 learners = 14 results files
-    num_result_files = len(glob(join(_my_dir,
-                                     'output',
+    num_result_files = len(glob(join(output_dir,
                                      'ablation_cv_feature_hasher_*.results')))
     eq_(num_result_files, 14)
 
@@ -182,8 +167,7 @@ def test_ablation_cv_feature_hasher_all_combos():
     Test ablation all-combos + cross-validation + feature hashing
     """
 
-    config_template_path = join(_my_dir,
-                                'configs',
+    config_template_path = join(config_dir,
                                 'test_ablation_feature_hasher_all_combos.template.cfg')
     config_path = fill_in_config_paths(config_template_path)
 
@@ -194,17 +178,15 @@ def test_ablation_cv_feature_hasher_all_combos():
     #      * (10 folds + 1 average line)
     #      * 2 learners
     #    = 220 lines in total
-    with open(join(_my_dir,
-                   'output',
+    with open(join(output_dir,
                    'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
         reader = csv.DictReader(f, dialect=csv.excel_tab)
         num_rows = check_ablation_rows(reader)
         eq_(num_rows, 220)
 
     # make sure there are 10 ablated featuresets * 2 learners = 20 results
     # files
-    num_result_files = len(glob(join(_my_dir,
-                                     'output',
+    num_result_files = len(glob(join(output_dir,
                                      'ablation_cv_feature_hasher_all_combos*.results')))
     eq_(num_result_files, 20)
 
@@ -214,8 +196,7 @@ def test_ablation_cv_sampler():
     Test ablation + cross-validation + samplers
     """
 
-    config_template_path = join(_my_dir,
-                                'configs',
+    config_template_path = join(config_dir,
                                 'test_ablation_sampler.template.cfg')
     config_path = fill_in_config_paths(config_template_path)
 
@@ -224,14 +205,13 @@ def test_ablation_cv_sampler():
     # read in the summary file and make sure it has
     # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
     # lines
-    with open(join(_my_dir, 'output', 'ablation_cv_sampler_summary.tsv')) as f:
+    with open(join(output_dir, 'ablation_cv_sampler_summary.tsv')) as f:
         reader = csv.DictReader(f, dialect=csv.excel_tab)
         num_rows = check_ablation_rows(reader)
         eq_(num_rows, 154)
 
     # make sure there are 6 ablated featuresets * 2 learners = 12 results files
-    num_result_files = len(glob(join(_my_dir,
-                                     'output',
+    num_result_files = len(glob(join(output_dir,
                                      'ablation_cv_sampler*.results')))
     eq_(num_result_files, 14)
 
@@ -241,8 +221,7 @@ def test_ablation_cv_all_combos_sampler():
     Test ablation all-combos + cross-validation + samplers
     """
 
-    config_template_path = join(_my_dir,
-                                'configs',
+    config_template_path = join(config_dir,
                                 'test_ablation_sampler_all_combos.template.cfg')
     config_path = fill_in_config_paths(config_template_path)
 
@@ -251,15 +230,14 @@ def test_ablation_cv_all_combos_sampler():
     # read in the summary file and make sure it has
     # 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
     # lines
-    with open(join(_my_dir, 'output', 'ablation_cv_sampler_all_combos_summary.tsv')) as f:
+    with open(join(output_dir, 'ablation_cv_sampler_all_combos_summary.tsv')) as f:
         reader = csv.DictReader(f, dialect=csv.excel_tab)
         num_rows = check_ablation_rows(reader)
         eq_(num_rows, 220)
 
     # make sure there are 10 ablated featuresets * 2 learners = 20 results
     # files
-    num_result_files = len(glob(join(_my_dir,
-                                     'output',
+    num_result_files = len(glob(join(output_dir,
                                      'ablation_cv_sampler_all_combos*.results')))
     eq_(num_result_files, 20)
 
@@ -269,8 +247,7 @@ def test_ablation_cv_feature_hasher_sampler():
     Test ablation + cross-validation + feature hashing + samplers
     """
 
-    config_template_path = join(_my_dir,
-                                'configs',
+    config_template_path = join(config_dir,
                                 'test_ablation_feature_hasher_sampler.template.cfg')
     config_path = fill_in_config_paths(config_template_path)
 
@@ -279,16 +256,14 @@ def test_ablation_cv_feature_hasher_sampler():
     # read in the summary file and make sure it has
     # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
     # lines
-    with open(join(_my_dir,
-                   'output',
+    with open(join(output_dir,
                    'ablation_cv_feature_hasher_sampler_summary.tsv')) as f:
         reader = csv.DictReader(f, dialect=csv.excel_tab)
         num_rows = check_ablation_rows(reader)
         eq_(num_rows, 154)
 
     # make sure there are 7 ablated featuresets * 2 learners = 14 results files
-    num_result_files = len(glob(join(_my_dir,
-                                     'output',
+    num_result_files = len(glob(join(output_dir,
                                      'ablation_cv_feature_hasher_sampler*.results')))
     eq_(num_result_files, 14)
 
@@ -298,8 +273,7 @@ def test_ablation_cv_feature_hasher_all_combos_sampler():
     Test ablation all-combos + cross-validation + feature hashing + samplers
     """
 
-    config_template_path = join(_my_dir,
-                                'configs',
+    config_template_path = join(config_dir,
                                 'test_ablation_feature_hasher_sampler_all_combos.template.cfg')
     config_path = fill_in_config_paths(config_template_path)
 
@@ -308,16 +282,14 @@ def test_ablation_cv_feature_hasher_all_combos_sampler():
     # read in the summary file and make sure it has
     # 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
     # lines
-    with open(join(_my_dir,
-                   'output',
+    with open(join(output_dir,
                    'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
         reader = csv.DictReader(f, dialect=csv.excel_tab)
         num_rows = check_ablation_rows(reader)
         eq_(num_rows, 220)
 
     # make sure there are 10 ablated featuresets * 2 learners = 20 results
     # files
-    num_result_files = len(glob(join(_my_dir,
-                                     'output',
+    num_result_files = len(glob(join(output_dir,
                                      'ablation_cv_feature_hasher_sampler_all_combos*.results')))
     eq_(num_result_files, 20)