Skip to content

Commit

Permalink
Update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivan Gill committed Apr 21, 2020
1 parent 9063bee commit ecee394
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 58 deletions.
2 changes: 1 addition & 1 deletion lexmapr/tests/test_output/empty_buckets.tsv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Sample_Id Sample_Desc Cleaned_Sample Matched_Components Match_Status(Macro Level) Match_Status(Micro Level) LexMapr Classification (Full List) LexMapr Bucket Third Party Bucket Third Party Classification
Sample_Id Sample_Desc Cleaned_Sample Matched_Components Match_Status(Macro Level) Match_Status(Micro Level) Third Party Classification
2 changes: 1 addition & 1 deletion lexmapr/tests/test_output/empty_buckets_with_tsv_input.tsv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Sample_Id Sample_Desc Cleaned_Sample Matched_Components Match_Status(Macro Level) Match_Status(Micro Level) LexMapr Classification (Full List) LexMapr Bucket Third Party Bucket Third Party Classification
Sample_Id Sample_Desc Cleaned_Sample Matched_Components Match_Status(Macro Level) Match_Status(Micro Level) Third Party Classification
65 changes: 9 additions & 56 deletions lexmapr/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,8 @@ class TestPipeline(unittest.TestCase):
# Wikipedia-based collocation resource.
"test_full_term_wiki_match": {"input": "test_full_term_wiki_match"},
# Bucket classification
"empty_buckets_not_full": {"input": "empty", "full": False, "bucket": True},
"empty_buckets": {"input": "empty", "bucket": True},
"empty_buckets_not_full": {"input": "empty", "full": False, "bucket": "narms"},
"empty_buckets": {"input": "empty", "bucket": "narms"},
}

@classmethod
Expand Down Expand Up @@ -420,11 +420,11 @@ def setUpClass(cls):
cls.test_files["empty_buckets_not_full_with_tsv_input"] = {
"input": os.path.join(ROOT, "tests", "test_input", "empty_with_tsv_input.tsv"),
"full": False,
"bucket": True
"bucket": "narms"
}
cls.test_files["empty_buckets_with_tsv_input"] = {
"input": os.path.join(ROOT, "tests", "test_input", "empty_with_tsv_input.tsv"),
"bucket": True
"bucket": "narms"
}

# Temporary directory for output files
Expand Down Expand Up @@ -455,7 +455,7 @@ def test_pipeline_with_files(self):
# File path to store actual output of input file
actual_output_path = os.path.join(self.tmp_dir, "actual_output.tsv")
# Run pipeline.run using input_path and actual_output_path
default_args = {"full": True, "bucket": False}
default_args = {"full": True, "bucket": None}
default_args.update(pipeline_args)
pipeline.run(argparse.Namespace(input_file=default_args["input"], config=None,
full=default_args["full"],
Expand Down Expand Up @@ -594,7 +594,8 @@ def test_ontology_table_keys(self):
"standard_resource_label_permutations", "synonyms", "abbreviations",
"non_english_words", "spelling_mistakes", "inflection_exceptions",
"stop_words", "suffixes", "parents", "buckets_ifsactop",
"buckets_lexmapr", "ifsac_labels", "ifsac_refinement", "ifsac_default"]
"buckets_lexmapr", "ifsac_labels", "ifsac_refinement", "ifsac_default",
"bucket_labels"]

self.assertCountEqual(expected_keys, ontology_lookup_table.keys())

Expand All @@ -606,7 +607,8 @@ def test_ontology_table_keys_with_multiple_ontologies(self):
"standard_resource_label_permutations", "synonyms", "abbreviations",
"non_english_words", "spelling_mistakes", "inflection_exceptions",
"stop_words", "suffixes", "parents", "buckets_ifsactop",
"buckets_lexmapr", "ifsac_labels", "ifsac_refinement", "ifsac_default"]
"buckets_lexmapr", "ifsac_labels", "ifsac_refinement", "ifsac_default",
"bucket_labels"]

self.assertCountEqual(expected_keys, ontology_lookup_table.keys())

Expand Down Expand Up @@ -935,54 +937,5 @@ def test_ontology_table_resource_labels_prioritisation_pizza_two_first(self):
self.assertDictEqual(expected_resource_labels, actual_resource_labels)


class TestClassification(unittest.TestCase):
"""Tests processes of classification of samples into buckets.
This differs from the black-box approach taken in TestPipeline, as
we are concerned with the mechanics behind the classification.
"""
classification_table_path = os.path.join(ROOT, "resources", "classification_lookup_table.json")

@classmethod
def setUp(cls):
# Remove classification lookup table
if os.path.exists(cls.classification_table_path):
os.remove(cls.classification_table_path)

@staticmethod
def run_pipeline_with_args(bucket=None):
"""Run pipeline with some default arguments."""

# Path to input file used in all tests
small_simple_path = os.path.join(ROOT, "tests", "test_input", "small_simple.csv")

pipeline.run(argparse.Namespace(input_file=small_simple_path, config=None, full=None,
output=None, version=False, bucket=bucket, no_cache=False,
profile=None))

def get_classification_lookup_table(self):
with open(self.classification_table_path) as fp:
return json.load(fp)

def test_generate_classification_table(self):
self.run_pipeline_with_args()
self.assertFalse(os.path.exists(self.classification_table_path))

self.run_pipeline_with_args(bucket=True)
self.assertTrue(os.path.exists(self.classification_table_path))

def test_classification_table_keys(self):
self.run_pipeline_with_args(bucket=True)
classification_table = self.get_classification_lookup_table()

expected_keys = ["non_standard_resource_ids", "standard_resource_labels",
"standard_resource_label_permutations", "synonyms", "abbreviations",
"non_english_words", "spelling_mistakes", "inflection_exceptions",
"stop_words", "suffixes", "parents", "buckets_ifsactop",
"buckets_lexmapr", "ifsac_labels", "ifsac_refinement", "ifsac_default"]

self.assertCountEqual(expected_keys, classification_table.keys())


if __name__ == '__main__':
unittest.main()

0 comments on commit ecee394

Please sign in to comment.