From 09b746b9af646fad98801836bc5f09cfb7ee44d6 Mon Sep 17 00:00:00 2001 From: Atul Varma Date: Thu, 3 Nov 2016 10:18:49 -0400 Subject: [PATCH] Add min_length, max_permutations to get_best_permutations(). --- data_capture/templatetags/analyze_contract.py | 13 +++++--- data_capture/tests/test_analyze_contract.py | 33 ++++++++++++++----- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/data_capture/templatetags/analyze_contract.py b/data_capture/templatetags/analyze_contract.py index b53616f8c..f07d7dfbf 100644 --- a/data_capture/templatetags/analyze_contract.py +++ b/data_capture/templatetags/analyze_contract.py @@ -32,7 +32,7 @@ def powerset(iterable): return chain.from_iterable(combinations(s, r) for r in range(len(s)+1)) -def get_best_permutations(vocab, lexemes): +def get_best_permutations(vocab, lexemes, min_length=4, max_permutations=8): def compare(a, b): a_len = len(a) b_len = len(b) @@ -45,11 +45,16 @@ def compare(a, b): def vocab_val(iterable): return sum([vocab[i] for i in iterable]) - permutations = list(powerset(lexemes)) + # Remove the first element, as it's the empty set. + permutations = list(powerset(lexemes))[1:] + + permutations = list(filter( + lambda x: len(' '.join(x)) >= min_length, + permutations + )) permutations.sort(key=cmp_to_key(compare), reverse=True) - # Remove the last element, as it's the empty set. - return permutations[:-1] + return permutations[:max_permutations] def get_vocab(cursor, model=Contract, field='search_index', min_ndoc=100): diff --git a/data_capture/tests/test_analyze_contract.py b/data_capture/tests/test_analyze_contract.py index 8cc0f8dd3..614fe177e 100644 --- a/data_capture/tests/test_analyze_contract.py +++ b/data_capture/tests/test_analyze_contract.py @@ -3,16 +3,33 @@ from ..templatetags.analyze_contract import get_best_permutations -class AnalyzeContractTests(TestCase): - def test_get_best_permutations_works(self): - vocab = { - 'junior': 1, - 'administrative': 2, - 'engineer': 3, - } +VOCAB = { + 'junior': 1, + 'administrative': 2, + 'engineer': 3, + 'ii': 4, +} + +class GetBestPermutationsTests(TestCase): + def test_min_length_works(self): + self.assertEqual( + get_best_permutations(VOCAB, ['engineer', 'ii'], min_length=3), + [('engineer', 'ii'), + ('engineer',)], + ) + + def test_max_permutations_works(self): + self.assertEqual( + get_best_permutations(VOCAB, ['junior', 'administrative', + 'engineer'], max_permutations=2), + [('junior', 'administrative', 'engineer'), + ('administrative', 'engineer')] + ) + + def test_it_works(self): self.assertEqual( - get_best_permutations(vocab, ['junior', 'administrative', + get_best_permutations(VOCAB, ['junior', 'administrative', 'engineer']), [('junior', 'administrative', 'engineer'), ('administrative', 'engineer'),