Skip to content

Commit

Permalink
Merge pull request #642 from EducationalTestingService/feature/reduce…
Browse files Browse the repository at this point in the history
…_code_duplication_in_tests

Simplify and deduplicate test code
  • Loading branch information
desilinguist committed Nov 13, 2020
2 parents 3dafc5f + 2daaa24 commit 91f4d19
Show file tree
Hide file tree
Showing 18 changed files with 764 additions and 1,215 deletions.
40 changes: 0 additions & 40 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,43 +20,3 @@ examples/*/test
examples/*/train
examples/*/train+dev
examples/*/output

tests/configs/test_cv_folds1.cfg
tests/configs/test_cv_folds2.cfg
tests/configs/test_predict.cfg
tests/configs/test_regression1.cfg
tests/configs/test_summary.cfg
tests/output
tests/test
tests/train/test_cv_folds1.csv
tests/train/test_cv_folds1.jsonlines
tests/train/test_regression1.jsonlines
tests/train/test_summary.jsonlines
tests/configs/test_sparse.cfg
tests/train/test_sparse.jsonlines
tests/train/test_merging
tests/train/test_cv_folds.*
tests/configs/test_ablation.cfg
tests/configs/test_ablation_feature_hasher.cfg
tests/configs/test_ablation_feature_hasher_sampler.cfg
tests/configs/test_ablation_sampler.cfg
tests/configs/test_class_map.cfg
tests/configs/test_class_map_feature_hasher.cfg
tests/configs/test_cv_folds1_feature_hasher_sampler.cfg
tests/configs/test_cv_folds1_sampler.cfg
tests/configs/test_cv_folds2_feature_hasher.cfg
tests/configs/test_cv_folds2_feature_hasher_sampler.cfg
tests/configs/test_cv_folds2_sampler.cfg
tests/configs/test_predict_feature_hasher.cfg
tests/configs/test_regression1_feature_hasher.cfg
tests/configs/test_scaling_with.cfg
tests/configs/test_scaling_with_feature_hasher.cfg
tests/configs/test_scaling_without.cfg
tests/configs/test_scaling_without_feature_hasher.cfg
tests/configs/test_single_file.cfg
tests/configs/test_sparse_feature_hasher.cfg
tests/configs/test_sparse_feature_hasher_sampler.cfg
tests/configs/test_sparse_sampler.cfg
tests/configs/test_summary_feature_hasher.cfg
tests/train/*.jsonlines
tests/train/test_conversion
2 changes: 1 addition & 1 deletion skll/data/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def feat_dict_generator():
for ex_num, (_, _, feat_dict) in enumerate(self._sub_read(f)):
yield feat_dict
if ex_num % 100 == 0:
self._print_progress('{100 * ex_num / total:.8}%')
self._print_progress(f'{100 * ex_num / total:.8}%')
self._print_progress("100%")

# extract the features dictionary
Expand Down
10 changes: 10 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from os.path import abspath, dirname, join

_my_dir = abspath(dirname(__file__))
config_dir = join(_my_dir, 'configs')
backward_compatibility_dir = join(_my_dir, 'backward_compatibility')
examples_dir = join(dirname(_my_dir), 'examples')
output_dir = join(_my_dir, 'output')
other_dir = join(_my_dir, 'other')
train_dir = join(_my_dir, 'train')
test_dir = join(_my_dir, 'test')
102 changes: 37 additions & 65 deletions tests/test_ablation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,51 +10,43 @@

import csv
import json
import os

from glob import glob
from os.path import abspath, dirname, exists, join
from os.path import join
from pathlib import Path

from nose.tools import eq_
from skll.experiments import run_configuration
from skll.utils.constants import KNOWN_DEFAULT_PARAM_GRIDS

from tests import config_dir, output_dir, test_dir, train_dir
from tests.utils import (create_jsonlines_feature_files,
fill_in_config_paths)
fill_in_config_paths,
remove_jsonlines_feature_files,
unlink)


_ALL_MODELS = list(KNOWN_DEFAULT_PARAM_GRIDS.keys())
_my_dir = abspath(dirname(__file__))


def setup():
"""
Create necessary directories for testing.
"""
train_dir = join(_my_dir, 'train')
if not exists(train_dir):
os.makedirs(train_dir)
test_dir = join(_my_dir, 'test')
if not exists(test_dir):
os.makedirs(test_dir)
output_dir = join(_my_dir, 'output')
if not exists(output_dir):
os.makedirs(output_dir)
for dir_path in [train_dir, test_dir, output_dir]:
Path(dir_path).mkdir(exist_ok=True)

# create jsonlines feature files
train_path = join(_my_dir, 'train')
create_jsonlines_feature_files(train_path)
create_jsonlines_feature_files(train_dir)


def tearDown():
"""
Clean up after tests.
"""
output_dir = join(_my_dir, 'output')
config_dir = join(_my_dir, 'configs')

for output_file in glob(join(output_dir, 'ablation_cv_*')):
os.unlink(output_file)
unlink(output_file)

config_files = ['test_ablation.cfg',
'test_ablation_all_combos.cfg',
Expand All @@ -65,8 +57,9 @@ def tearDown():
'test_ablation_feature_hasher_sampler.cfg',
'test_ablation_feature_hasher_sampler_all_combos.cfg']
for cf in config_files:
if exists(join(config_dir, cf)):
os.unlink(join(config_dir, cf))
unlink(Path(config_dir) / cf)

remove_jsonlines_feature_files(train_dir)


def check_ablation_rows(reader):
Expand Down Expand Up @@ -98,24 +91,21 @@ def test_ablation_cv():
Test ablation + cross-validation
"""

config_template_path = join(_my_dir,
'configs',
'test_ablation.template.cfg')
config_template_path = join(config_dir, 'test_ablation.template.cfg')
config_path = fill_in_config_paths(config_template_path)

run_configuration(config_path, quiet=True, ablation=1)

# read in the summary file and make sure it has
# 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
# lines
with open(join(_my_dir, 'output', 'ablation_cv_plain_summary.tsv')) as f:
with open(join(output_dir, 'ablation_cv_plain_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 154)

# make sure there are 7 ablated featuresets * 2 learners = 12 results files
num_result_files = len(glob(join(_my_dir,
'output',
num_result_files = len(glob(join(output_dir,
'ablation_cv_plain*.results')))
eq_(num_result_files, 14)

Expand All @@ -125,8 +115,7 @@ def test_ablation_cv_all_combos():
Test ablation all-combos + cross-validation
"""

config_template_path = join(_my_dir,
'configs',
config_template_path = join(config_dir,
'test_ablation_all_combos.template.cfg')
config_path = fill_in_config_paths(config_template_path)

Expand All @@ -135,15 +124,14 @@ def test_ablation_cv_all_combos():
# read in the summary file and make sure it has
# 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
# lines
with open(join(_my_dir, 'output', 'ablation_cv_plain_all_combos_summary.tsv')) as f:
with open(join(output_dir, 'ablation_cv_plain_all_combos_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 220)

# make sure there are 10 ablated featuresets * 2 learners = 20 results
# files
num_result_files = len(glob(join(_my_dir,
'output',
num_result_files = len(glob(join(output_dir,
'ablation_cv_plain_all_combos*results')))
eq_(num_result_files, 20)

Expand All @@ -153,8 +141,7 @@ def test_ablation_cv_feature_hasher():
Test ablation + cross-validation + feature hashing
"""

config_template_path = join(_my_dir,
'configs',
config_template_path = join(config_dir,
'test_ablation_feature_hasher.template.cfg')
config_path = fill_in_config_paths(config_template_path)

Expand All @@ -163,16 +150,14 @@ def test_ablation_cv_feature_hasher():
# read in the summary file and make sure it has
# 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
# lines
with open(join(_my_dir,
'output',
with open(join(output_dir,
'ablation_cv_feature_hasher_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 154)

# make sure there are 7 ablated featuresets * 2 learners = 14 results files
num_result_files = len(glob(join(_my_dir,
'output',
num_result_files = len(glob(join(output_dir,
'ablation_cv_feature_hasher_*.results')))
eq_(num_result_files, 14)

Expand All @@ -182,8 +167,7 @@ def test_ablation_cv_feature_hasher_all_combos():
Test ablation all-combos + cross-validation + feature hashing
"""

config_template_path = join(_my_dir,
'configs',
config_template_path = join(config_dir,
'test_ablation_feature_hasher_all_combos.template.cfg')
config_path = fill_in_config_paths(config_template_path)

Expand All @@ -194,17 +178,15 @@ def test_ablation_cv_feature_hasher_all_combos():
# * (10 folds + 1 average line)
# * 2 learners
# = 220 lines in total
with open(join(_my_dir,
'output',
with open(join(output_dir,
'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 220)

# make sure there are 10 ablated featuresets * 2 learners = 20 results
# files
num_result_files = len(glob(join(_my_dir,
'output',
num_result_files = len(glob(join(output_dir,
'ablation_cv_feature_hasher_all_combos*.results')))
eq_(num_result_files, 20)

Expand All @@ -214,8 +196,7 @@ def test_ablation_cv_sampler():
Test ablation + cross-validation + samplers
"""

config_template_path = join(_my_dir,
'configs',
config_template_path = join(config_dir,
'test_ablation_sampler.template.cfg')
config_path = fill_in_config_paths(config_template_path)

Expand All @@ -224,14 +205,13 @@ def test_ablation_cv_sampler():
# read in the summary file and make sure it has
# 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
# lines
with open(join(_my_dir, 'output', 'ablation_cv_sampler_summary.tsv')) as f:
with open(join(output_dir, 'ablation_cv_sampler_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 154)

# make sure there are 6 ablated featuresets * 2 learners = 12 results files
num_result_files = len(glob(join(_my_dir,
'output',
num_result_files = len(glob(join(output_dir,
'ablation_cv_sampler*.results')))
eq_(num_result_files, 14)

Expand All @@ -241,8 +221,7 @@ def test_ablation_cv_all_combos_sampler():
Test ablation all-combos + cross-validation + samplers
"""

config_template_path = join(_my_dir,
'configs',
config_template_path = join(config_dir,
'test_ablation_sampler_all_combos.template.cfg')
config_path = fill_in_config_paths(config_template_path)

Expand All @@ -251,15 +230,14 @@ def test_ablation_cv_all_combos_sampler():
# read in the summary file and make sure it has
# 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
# lines
with open(join(_my_dir, 'output', 'ablation_cv_sampler_all_combos_summary.tsv')) as f:
with open(join(output_dir, 'ablation_cv_sampler_all_combos_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 220)

# make sure there are 10 ablated featuresets * 2 learners = 20 results
# files
num_result_files = len(glob(join(_my_dir,
'output',
num_result_files = len(glob(join(output_dir,
'ablation_cv_sampler_all_combos*.results')))
eq_(num_result_files, 20)

Expand All @@ -269,8 +247,7 @@ def test_ablation_cv_feature_hasher_sampler():
Test ablation + cross-validation + feature hashing + samplers
"""

config_template_path = join(_my_dir,
'configs',
config_template_path = join(config_dir,
'test_ablation_feature_hasher_sampler.template.cfg')
config_path = fill_in_config_paths(config_template_path)

Expand All @@ -279,16 +256,14 @@ def test_ablation_cv_feature_hasher_sampler():
# read in the summary file and make sure it has
# 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
# lines
with open(join(_my_dir,
'output',
with open(join(output_dir,
'ablation_cv_feature_hasher_sampler_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 154)

# make sure there are 7 ablated featuresets * 2 learners = 14 results files
num_result_files = len(glob(join(_my_dir,
'output',
num_result_files = len(glob(join(output_dir,
'ablation_cv_feature_hasher_sampler*.results')))
eq_(num_result_files, 14)

Expand All @@ -298,8 +273,7 @@ def test_ablation_cv_feature_hasher_all_combos_sampler():
Test ablation all-combos + cross-validation + feature hashing + samplers
"""

config_template_path = join(_my_dir,
'configs',
config_template_path = join(config_dir,
'test_ablation_feature_hasher_sampler_all_combos.template.cfg')
config_path = fill_in_config_paths(config_template_path)

Expand All @@ -308,16 +282,14 @@ def test_ablation_cv_feature_hasher_all_combos_sampler():
# read in the summary file and make sure it has
# 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
# lines
with open(join(_my_dir,
'output',
with open(join(output_dir,
'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 220)

# make sure there are 10 ablated featuresets * 2 learners = 20 results
# files
num_result_files = len(glob(join(_my_dir,
'output',
num_result_files = len(glob(join(output_dir,
'ablation_cv_feature_hasher_sampler_all_combos*.results')))
eq_(num_result_files, 20)

0 comments on commit 91f4d19

Please sign in to comment.