Skip to content

Commit

Permalink
switched tests to pytest
Browse files Browse the repository at this point in the history
  • Loading branch information
thvitt committed Oct 6, 2020
1 parent d042938 commit e12a289
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 109 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -18,3 +18,6 @@ nosetests.xml
.python-version

NOTES
.cache
.pytest_cache
pytest.xml
4 changes: 1 addition & 3 deletions requirements.txt
Expand Up @@ -5,9 +5,7 @@ Cython
# FIXME Update code to work with this:
# git+https://github.com/Kornel/scikit-learn#egg=scikit_learn

nose
nose-progressive
nosexcover
pytest
Sphinx
ipykernel
nbsphinx
4 changes: 4 additions & 0 deletions setup.cfg
Expand Up @@ -4,3 +4,7 @@ exe=1
with-xunit=1
with-xcoverage=true
cover-package=delta

[tool:pytest]
addopts = --doctest-modules --junit-xml pytest.xml
nb_diff_ignore = /cells/*/output
9 changes: 9 additions & 0 deletions test/conftest.py
@@ -0,0 +1,9 @@
import os
from pathlib import Path

import pytest


@pytest.fixture(scope='session')
def testdir() -> str:
return os.fspath(Path(__file__).parent / 'corpus3')
105 changes: 51 additions & 54 deletions test/corpus_test.py
@@ -1,78 +1,75 @@
from pytest import approx

import delta as d
import os
from nose.tools import eq_
import pytest

testdir = None

@pytest.fixture
def feature_generator() -> d.FeatureGenerator:
return d.FeatureGenerator()

def setup_module():
global testdir
testdir = os.path.join(
os.path.dirname(
os.path.abspath(__file__)),
'corpus3')

def test_tokenize():
assert list(d.FeatureGenerator().tokenize(["This is a", "simple test"])) \
== ["This", "is", "a", "simple", "test"]

class FeatureGenerator_Test:

def setup(self):
self.gen = d.FeatureGenerator()
def test_tokenize_letters():
fg1 = d.FeatureGenerator(token_pattern=d.LETTERS_PATTERN)
assert list(fg1.tokenize(["I don't like mondays."])) \
== ["I", "don", "t", "like", "mondays"]

def test_tokenize(self):
assert list(self.gen.tokenize(["This is a", "simple test"])) \
== ["This", "is", "a", "simple", "test"]

def test_tokenize_letters(self):
fg1 = d.FeatureGenerator(token_pattern=d.LETTERS_PATTERN)
assert list(fg1.tokenize(["I don't like mondays."])) \
== ["I", "don", "t", "like", "mondays"]
def test_tokenize_words():
fg1 = d.FeatureGenerator(token_pattern=d.WORD_PATTERN)
assert list(fg1.tokenize(["I don't like mondays."])) \
== ["I", "don't", "like", "mondays"]

def test_tokenize_words(self):
fg1 = d.FeatureGenerator(token_pattern=d.WORD_PATTERN)
assert list(fg1.tokenize(["I don't like mondays."])) \
== ["I", "don't", "like", "mondays"]

def test_count_tokens(self):
result = self.gen.count_tokens(
def test_count_tokens(feature_generator):
result = feature_generator.count_tokens(
["this is a test", "testing this generator"])
assert result["this"] == 2
assert result["generator"] == 1
assert result.sum() == 7
assert result["this"] == 2
assert result["generator"] == 1
assert result.sum() == 7


def test_get_name(feature_generator):
assert feature_generator.get_name('foo/bar.baz.txt') == 'bar.baz'

def test_get_name(self):
assert self.gen.get_name('foo/bar.baz.txt') == 'bar.baz'

def test_call(self):
df = self.gen(testdir)
eq_(df.und.sum(), 25738.0)
def test_call_fg(feature_generator, testdir):
df = feature_generator(os.fspath(testdir))
assert df.loc[:, 'und'].sum() == approx(25738.0)

class Corpus_Test:

def parse_test(self):
corpus = d.Corpus(testdir)
eq_(corpus.und.sum(), 25738.0)
## Corpus

def mfw_test(self):
corpus = d.Corpus(testdir)
rel_corpus = corpus.get_mfw_table(0)
eq_(rel_corpus.sum(axis=1).sum(), 9)
@pytest.fixture(scope='module')
def corpus(testdir):
return d.Corpus(testdir)

def test_corpus_parse(corpus):
assert corpus.und.sum() == approx(25738.0)

def test_corpus_mfw(corpus):
rel_corpus = corpus.get_mfw_table(0)
assert rel_corpus.sum(axis=1).sum() == approx(9)

class Cluster_Test:

def init_test(self):
# FIXME
corpus = d.Corpus(testdir).get_mfw_table(1000)
deltas = d.functions.cosine_delta(corpus)
hclust = d.Clustering(deltas)
fclust = hclust.fclustering()
print(fclust.describe())
print(fclust.evaluate())
assert fclust.data is not None
def test_integration_cluster(corpus):
# FIXME
top1000 = corpus.get_mfw_table(1000)
deltas = d.functions.cosine_delta(top1000)
hclust = d.Clustering(deltas)
fclust = hclust.fclustering()
print(fclust.describe())
print(fclust.evaluate())
assert fclust.adjusted_rand_index() == 1

class Table_Describer_Test:

def md_test(self):
corpus = d.Corpus(testdir, document_describer=d.util.TableDocumentDescriber(testdir + '.csv', 'Author', 'Title'))
assert corpus.document_describer.group_name(corpus.index[-1]) == 'Raabe'
def test_table_describer(testdir):
corpus = d.Corpus(testdir,
document_describer=d.util.TableDocumentDescriber(testdir + '.csv', 'Author', 'Title'))
assert corpus.document_describer.group_name(corpus.index[-1]) in {'Raabe', 'Marlitt', 'Fontane'}
77 changes: 25 additions & 52 deletions test/deltas_test.py
@@ -1,63 +1,36 @@
from pytest import approx

import delta as d
import os
# from nose.tools import eq_
from math import log10, pow
import pytest

testdir = None
c1000 = None


def setup_module():
global testdir
global c1000
testdir = os.path.join(
os.path.dirname(
os.path.abspath(__file__)),
'corpus3')
c1000 = d.Corpus(testdir).get_mfw_table(1000)


def feq_(result, expected, msg=None, threshold=None):
if threshold is None:
threshold = pow(10, log10(expected)-2)
if msg is None:
msg = "{} != {}".format(result, expected)
assert abs(expected - result) < threshold, msg


class Delta_Test:

def check_function(self, function, expected_distance, expected_score=None):
distances = function(c1000)
sample = distances.at['Fontane,-Theodor_Der-Stechlin',
'Fontane,-Theodor_Effi-Briest']
feq_(sample, expected_distance,
"{} Stechlin/Effi distance is {} instead of {}!".format(
function.name, sample, expected_distance))

if expected_score is not None:
feq_(expected_score, distances.simple_score(),
"{} simple score is {} instead of expected {}!".format(
function.name, distances.simple_score(), expected_score))

def burrows_test(self):
self.check_function(d.functions.burrows, 0.7538867972199293)
@pytest.fixture(scope='module')
def c1000(testdir):
return d.Corpus(testdir).get_mfw_table(1000)

def linear_test(self):
self.check_function(d.functions.linear, 1149.434663563308)

def quadratic_test(self):
self.check_function(d.functions.quadratic, 1102.845003724634)
def fn_id(fn):
return fn.name if isinstance(fn, d.DeltaFunction) else None

def eder_test(self):
self.check_function(d.functions.eder, 0.3703309813454142)
@pytest.mark.parametrize("function,expected_distance", [(d.functions.burrows, 0.7538867972199293),
(d.functions.linear, 1149.434663563308),
(d.functions.quadratic, 1102.845003724634),
(d.functions.eder, 0.3703309813454142),
(d.functions.cosine_delta, 0.6156353166442046)],
ids=fn_id)
def test_distance(function, expected_distance, c1000):
distances = function(c1000)
sample = distances.at['Fontane,-Theodor_Der-Stechlin',
'Fontane,-Theodor_Effi-Briest']
assert sample == approx(expected_distance, rel=1e-2)

def cosine_delta_test(self):
self.check_function(d.functions.cosine_delta, 0.6156353166442046)

def composite_metric_test(self):
mcosine = d.MetricDeltaFunction('cosine', 'mcosine')
assert mcosine.fix_symmetry == True, "fix_symmetry is False!?"
mcd = d.CompositeDeltaFunction('mcosine-z_score', 'metric_cosine_delta')
assert mcd.basis.fix_symmetry == True, "basis.fix_symmetry is False!?"
self.check_function(d.functions.metric_cosine_delta, 0.6156353166442046)
def test_composite_metric(c1000):
mcosine = d.MetricDeltaFunction('cosine', 'mcosine')
assert mcosine.fix_symmetry == True, "fix_symmetry is False!?"
mcd = d.CompositeDeltaFunction('mcosine-z_score', 'metric_cosine_delta')
assert mcd.basis.fix_symmetry == True, "basis.fix_symmetry is False!?"
test_distance(d.functions.metric_cosine_delta, 0.6156353166442046, c1000)

0 comments on commit e12a289

Please sign in to comment.