From e12a289dd8c215324d583c17868a480bf37c8781 Mon Sep 17 00:00:00 2001 From: Thorsten Vitt Date: Tue, 6 Oct 2020 23:21:55 +0200 Subject: [PATCH] switched tests to pytest --- .gitignore | 3 ++ requirements.txt | 4 +- setup.cfg | 4 ++ test/conftest.py | 9 ++++ test/corpus_test.py | 105 +++++++++++++++++++++----------------------- test/deltas_test.py | 77 +++++++++++--------------------- 6 files changed, 93 insertions(+), 109 deletions(-) create mode 100644 test/conftest.py diff --git a/.gitignore b/.gitignore index ed640a3..157bfb1 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,6 @@ nosetests.xml .python-version NOTES +.cache +.pytest_cache +pytest.xml diff --git a/requirements.txt b/requirements.txt index 2cdda04..c9d3440 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,9 +5,7 @@ Cython # FIXME Update code to work with this: # git+https://github.com/Kornel/scikit-learn#egg=scikit_learn -nose -nose-progressive -nosexcover +pytest Sphinx ipykernel nbsphinx diff --git a/setup.cfg b/setup.cfg index 9daed5b..a44ff3e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,3 +4,7 @@ exe=1 with-xunit=1 with-xcoverage=true cover-package=delta + +[tool:pytest] +addopts = --doctest-modules --junit-xml pytest.xml +nb_diff_ignore = /cells/*/output diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..a251c02 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,9 @@ +import os +from pathlib import Path + +import pytest + + +@pytest.fixture(scope='session') +def testdir() -> str: + return os.fspath(Path(__file__).parent / 'corpus3') \ No newline at end of file diff --git a/test/corpus_test.py b/test/corpus_test.py index 4062c43..7abc492 100644 --- a/test/corpus_test.py +++ b/test/corpus_test.py @@ -1,78 +1,75 @@ +from pytest import approx + import delta as d import os -from nose.tools import eq_ +import pytest -testdir = None +@pytest.fixture +def feature_generator() -> d.FeatureGenerator: + return d.FeatureGenerator() -def setup_module(): - global testdir - testdir = os.path.join( - os.path.dirname( - os.path.abspath(__file__)), - 'corpus3') +def test_tokenize(): + assert list(d.FeatureGenerator().tokenize(["This is a", "simple test"])) \ + == ["This", "is", "a", "simple", "test"] -class FeatureGenerator_Test: - def setup(self): - self.gen = d.FeatureGenerator() +def test_tokenize_letters(): + fg1 = d.FeatureGenerator(token_pattern=d.LETTERS_PATTERN) + assert list(fg1.tokenize(["I don't like mondays."])) \ + == ["I", "don", "t", "like", "mondays"] - def test_tokenize(self): - assert list(self.gen.tokenize(["This is a", "simple test"])) \ - == ["This", "is", "a", "simple", "test"] - def test_tokenize_letters(self): - fg1 = d.FeatureGenerator(token_pattern=d.LETTERS_PATTERN) - assert list(fg1.tokenize(["I don't like mondays."])) \ - == ["I", "don", "t", "like", "mondays"] +def test_tokenize_words(): + fg1 = d.FeatureGenerator(token_pattern=d.WORD_PATTERN) + assert list(fg1.tokenize(["I don't like mondays."])) \ + == ["I", "don't", "like", "mondays"] - def test_tokenize_words(self): - fg1 = d.FeatureGenerator(token_pattern=d.WORD_PATTERN) - assert list(fg1.tokenize(["I don't like mondays."])) \ - == ["I", "don't", "like", "mondays"] - def test_count_tokens(self): - result = self.gen.count_tokens( +def test_count_tokens(feature_generator): + result = feature_generator.count_tokens( ["this is a test", "testing this generator"]) - assert result["this"] == 2 - assert result["generator"] == 1 - assert result.sum() == 7 + assert result["this"] == 2 + assert result["generator"] == 1 + assert result.sum() == 7 + + +def test_get_name(feature_generator): + assert feature_generator.get_name('foo/bar.baz.txt') == 'bar.baz' - def test_get_name(self): - assert self.gen.get_name('foo/bar.baz.txt') == 'bar.baz' - def test_call(self): - df = self.gen(testdir) - eq_(df.und.sum(), 25738.0) +def test_call_fg(feature_generator, testdir): + df = feature_generator(os.fspath(testdir)) + assert df.loc[:, 'und'].sum() == approx(25738.0) -class Corpus_Test: - def parse_test(self): - corpus = d.Corpus(testdir) - eq_(corpus.und.sum(), 25738.0) +## Corpus - def mfw_test(self): - corpus = d.Corpus(testdir) - rel_corpus = corpus.get_mfw_table(0) - eq_(rel_corpus.sum(axis=1).sum(), 9) +@pytest.fixture(scope='module') +def corpus(testdir): + return d.Corpus(testdir) +def test_corpus_parse(corpus): + assert corpus.und.sum() == approx(25738.0) +def test_corpus_mfw(corpus): + rel_corpus = corpus.get_mfw_table(0) + assert rel_corpus.sum(axis=1).sum() == approx(9) -class Cluster_Test: - def init_test(self): - # FIXME - corpus = d.Corpus(testdir).get_mfw_table(1000) - deltas = d.functions.cosine_delta(corpus) - hclust = d.Clustering(deltas) - fclust = hclust.fclustering() - print(fclust.describe()) - print(fclust.evaluate()) - assert fclust.data is not None +def test_integration_cluster(corpus): + # FIXME + top1000 = corpus.get_mfw_table(1000) + deltas = d.functions.cosine_delta(top1000) + hclust = d.Clustering(deltas) + fclust = hclust.fclustering() + print(fclust.describe()) + print(fclust.evaluate()) + assert fclust.adjusted_rand_index() == 1 -class Table_Describer_Test: - def md_test(self): - corpus = d.Corpus(testdir, document_describer=d.util.TableDocumentDescriber(testdir + '.csv', 'Author', 'Title')) - assert corpus.document_describer.group_name(corpus.index[-1]) == 'Raabe' +def test_table_describer(testdir): + corpus = d.Corpus(testdir, + document_describer=d.util.TableDocumentDescriber(testdir + '.csv', 'Author', 'Title')) + assert corpus.document_describer.group_name(corpus.index[-1]) in {'Raabe', 'Marlitt', 'Fontane'} diff --git a/test/deltas_test.py b/test/deltas_test.py index ebfdc73..7f1b365 100644 --- a/test/deltas_test.py +++ b/test/deltas_test.py @@ -1,63 +1,36 @@ +from pytest import approx + import delta as d import os -# from nose.tools import eq_ from math import log10, pow +import pytest -testdir = None -c1000 = None - - -def setup_module(): - global testdir - global c1000 - testdir = os.path.join( - os.path.dirname( - os.path.abspath(__file__)), - 'corpus3') - c1000 = d.Corpus(testdir).get_mfw_table(1000) - - -def feq_(result, expected, msg=None, threshold=None): - if threshold is None: - threshold = pow(10, log10(expected)-2) - if msg is None: - msg = "{} != {}".format(result, expected) - assert abs(expected - result) < threshold, msg - - -class Delta_Test: - - def check_function(self, function, expected_distance, expected_score=None): - distances = function(c1000) - sample = distances.at['Fontane,-Theodor_Der-Stechlin', - 'Fontane,-Theodor_Effi-Briest'] - feq_(sample, expected_distance, - "{} Stechlin/Effi distance is {} instead of {}!".format( - function.name, sample, expected_distance)) - if expected_score is not None: - feq_(expected_score, distances.simple_score(), - "{} simple score is {} instead of expected {}!".format( - function.name, distances.simple_score(), expected_score)) - def burrows_test(self): - self.check_function(d.functions.burrows, 0.7538867972199293) +@pytest.fixture(scope='module') +def c1000(testdir): + return d.Corpus(testdir).get_mfw_table(1000) - def linear_test(self): - self.check_function(d.functions.linear, 1149.434663563308) - def quadratic_test(self): - self.check_function(d.functions.quadratic, 1102.845003724634) +def fn_id(fn): + return fn.name if isinstance(fn, d.DeltaFunction) else None - def eder_test(self): - self.check_function(d.functions.eder, 0.3703309813454142) +@pytest.mark.parametrize("function,expected_distance", [(d.functions.burrows, 0.7538867972199293), + (d.functions.linear, 1149.434663563308), + (d.functions.quadratic, 1102.845003724634), + (d.functions.eder, 0.3703309813454142), + (d.functions.cosine_delta, 0.6156353166442046)], + ids=fn_id) +def test_distance(function, expected_distance, c1000): + distances = function(c1000) + sample = distances.at['Fontane,-Theodor_Der-Stechlin', + 'Fontane,-Theodor_Effi-Briest'] + assert sample == approx(expected_distance, rel=1e-2) - def cosine_delta_test(self): - self.check_function(d.functions.cosine_delta, 0.6156353166442046) - def composite_metric_test(self): - mcosine = d.MetricDeltaFunction('cosine', 'mcosine') - assert mcosine.fix_symmetry == True, "fix_symmetry is False!?" - mcd = d.CompositeDeltaFunction('mcosine-z_score', 'metric_cosine_delta') - assert mcd.basis.fix_symmetry == True, "basis.fix_symmetry is False!?" - self.check_function(d.functions.metric_cosine_delta, 0.6156353166442046) +def test_composite_metric(c1000): + mcosine = d.MetricDeltaFunction('cosine', 'mcosine') + assert mcosine.fix_symmetry == True, "fix_symmetry is False!?" + mcd = d.CompositeDeltaFunction('mcosine-z_score', 'metric_cosine_delta') + assert mcd.basis.fix_symmetry == True, "basis.fix_symmetry is False!?" + test_distance(d.functions.metric_cosine_delta, 0.6156353166442046, c1000)