Skip to content

Commit

Permalink
added tests for corpora commands, ignored W503 globally
Browse files Browse the repository at this point in the history
  • Loading branch information
Oliver Sherouse committed Oct 26, 2017
1 parent a885115 commit 65a919e
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 25 deletions.
25 changes: 9 additions & 16 deletions quantgov/__main__.py
Expand Up @@ -25,14 +25,6 @@
ENCODE_OUT = 'utf-8'


def get_public_functions(library):
return [
i for i in vars(library).values()
if hasattr(i, 'cli')
and isinstance(getattr(i, 'cli'), quantgov.utils.CLISpec)
]


def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
subparsers = parser.add_subparsers(dest='command')
Expand All @@ -43,12 +35,12 @@ def parse_args():
create.add_argument('--parent', default='master')
corpus = subparsers.add_parser('corpus')
corpus_subcommands = corpus.add_subparsers(dest='subcommand')
for func in get_public_functions(quantgov.corpora.builtins):
for command, builtin in quantgov.corpora.builtins.commands.items():
subcommand = corpus_subcommands.add_parser(
func.__name__, help=func.cli.help)
command, help=builtin.cli.help)
subcommand.add_argument(
'corpus', help='Path to a QuantGov Corpus directory')
for argument in func.cli.arguments:
for argument in builtin.cli.arguments:
flags = ((argument.flags,) if isinstance(argument.flags, str)
else argument.flags)
kwargs = {} if argument.kwargs is None else argument.kwargs
Expand All @@ -57,7 +49,7 @@ def parse_args():
'-o', '--outfile',
type=lambda x: open(x, 'w', newline='', encoding=ENCODE_OUT),
default=io.TextIOWrapper(
sys.stdout.buffer, encoding=ENCODE_OUT))
sys.stdout.buffer, newline='', encoding=ENCODE_OUT))
return parser.parse_args()


Expand Down Expand Up @@ -94,12 +86,13 @@ def start_component(args):
def run_corpus_builtin(args):
driver = quantgov.load_driver(args.corpus)
writer = csv.writer(args.outfile)
builtin = getattr(quantgov.corpora.builtins, args.subcommand)
writer.writerow(driver.index_labels + builtin.get_columns(args))
builtin = quantgov.corpora.builtins.commands[args.subcommand]
func_args = {i: j for i, j in vars(args).items()
if i not in {'command', 'subcommand', 'outfile', 'corpus'}}
writer.writerow(driver.index_labels + builtin.get_columns(func_args))
partial = functools.partial(
builtin.process_document,
**{i: j for i, j in vars(args).items()
if i not in {'command', 'subcommand', 'outfile', 'corpus'}}
**func_args
)
for i in quantgov.utils.lazy_parallel(partial, driver.stream()):
writer.writerow(i)
Expand Down
23 changes: 15 additions & 8 deletions quantgov/corpora/builtins.py
Expand Up @@ -6,6 +6,8 @@

import quantgov

commands = {}


class WordCounter():

Expand All @@ -24,26 +26,28 @@ class WordCounter():
)

@staticmethod
def get_columns():
def get_columns(args):
return ('words',)

@staticmethod
def process_document(doc, pattern):
return doc.index + (len(pattern.findall(doc.text)),)
def process_document(doc, word_pattern):
return doc.index + (len(word_pattern.findall(doc.text)),)


commands['count_words'] = WordCounter

class OccuranceCounter():

class OccurrenceCounter():

cli = quantgov.utils.CLISpec(
help="Term Counter for Specific Words",
arguments=[
quantgov.utils.CLIArg(
flags=('--terms'),
flags=('terms'),
kwargs={
'help': 'list of words to be counted',
'type': tuple,
'default': ('shall', 'must', 'may not',
'required', 'prohibited',),
'default': ['shall', 'must', 'may not',
'required', 'prohibited', ],
'nargs': '+'
}
),
Expand Down Expand Up @@ -71,3 +75,6 @@ def process_document(doc, terms, pattern):
i.groupdict()['match'] for i in combined_pattern.finditer(text)
)
return doc.index + tuple(term_counts[i] for i in terms)


commands['count_occurrences'] = OccurrenceCounter
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -3,7 +3,7 @@ universal = 1
[tool:pytest]
addopts = --flake8
flake8-ignore =
*.py W391
*.py W391 W503
*/__init__.py F401
tests/* F401 E402

9 changes: 9 additions & 0 deletions tests/pseudo_corpus/data/clean/1.txt
@@ -0,0 +1,9 @@
Lorem ipsum dolor sit amet, an dolor probatus deseruisse pri, ut nominati appellantur mel. His summo similique ei, ea eos bonorum tractatos, nam ex aperiam suscipit. Ex cum numquam signiferumque, cu eum etiam inermis dolores. Cu agam utamur debitis per, ipsum persequeris ut cum. Fuisset verterem et eum.

Porro putent ex est, at vim nulla dolor reprimique, per cu esse salutandi scribentur. In case civibus sea, pro dicam option luptatum te, mel tota nullam possim ad. His ne wisi ipsum sanctus, in porro ludus ornatus pro. Doming complectitur eu quo, diam vidit vitae no vim, sed ne numquam omittantur complectitur. Usu ex accusamus scripserit, in duo putent labitur conceptam.

Eu affert ubique eam, quod veri deserunt cu qui. Qui facilisi splendide definitionem at. Eam agam illum at, an urbanitas similique inciderint mel. An modus erant eleifend usu, veniam debitis eu sed. Cum ut zril dictas aperiri, ius ex deleniti conceptam elaboraret, usu putant epicurei gubergren et.

Delectus perpetua instructior usu in, eam ne melius efficiendi, his dico scaevola cu. At clita eruditi lucilius sed, his fierent perpetua an. Probo everti cum cu. Posse veniam altera pri ex, ut iriure facilisis vis. Mea quem feugiat ne.

Mea et nonumes neglegentur concludaturque. Duo te cetero iuvaret, cibo meis explicari usu ex. Postea voluptaria mea ex. Quo aperiri complectitur ex, et cibo consequat mediocritatem eum, mea ut animal intellegebat. Expetendis assueverit ut has, cum at agam meliore similique, ei mei laboramus aliquando concludaturque. At cum sint illud, prima paulo qui ea.
37 changes: 37 additions & 0 deletions tests/pseudo_corpus/data/clean/2.txt
@@ -0,0 +1,37 @@
Sint quando iudicabit eu his, ne qui vide apeirian, meis nemore molestie duo ei. Et eum diam libris accommodare, pri audire aliquip consequat in. His ei tale platonem voluptatibus, ut mei altera tibique detracto, probo nulla ei nec. Vis habeo causae eu. Ei duo enim audire offendit.

Cibo regione pertinacia an ius, at habeo erant sit? Fugit regione reformidans ea his. Nec possim molestie disputationi te, usu cu dolore volutpat, sonet partem cum et! Case latine interpretaris ad sit. Sed eu tamquam neglegentur. No nam esse cibo sale, et dicunt suavitate quo.

Cu vel vero feugiat constituam, suscipit quaerendum nec id, nam everti dissentias conclusionemque in. Inimicus vituperatoribus in eam, per nobis nostro delectus te. Eu quo utamur scribentur, in munere latine ocurreret nec. Et vis inermis tibique, inermis oportere et mel?

Modus facilisis maiestatis his in, error definiebas at mea! Choro delectus per cu? Pri facer noster dolorum ut, tation habemus reprehendunt ex mea? Et dico impetus mnesarchum vim.

Impedit theophrastus cu eam? Vis id commune iracundia? Mel summo tincidunt forensibus ne? Nec vide choro menandri no? Te esse munere pro?

Id usu doming inciderint, interesset delicatissimi et eam. Omittam intellegam quaerendum qui ad, quo vidit tantas essent id. Nam no nusquam apeirian, an mucius feugait persequeris pro. Et est duis vulputate? Nam no soluta graeco dignissim.

Has quem choro ex. Facilis delicata consectetuer te per, his ei tation populo, dicant fabellas definiebas te mea. Ei quo impedit prodesset, probo utinam cu sit, ne cum ferri ullum molestiae? Eu vel argumentum omittantur.

Legimus democritum cu quo? Feugait erroribus constituto ea has, eam esse mediocritatem et, at pro melius apeirian scripserit? Lorem albucius suscipit in nam? Sea munere assentior prodesset eu, phaedrum omittantur eum ut. Aliquam adipiscing te nam, mei doctus sanctus detraxit et, vis ei semper percipit nominati. Vis esse voluptua id.

Vis an ridens nominati referrentur. Ea fierent molestie vivendum est, vel an saperet iudicabit? Mutat splendide eam ex. Vel et falli aliquid detraxit, quidam dissentias ea mei! Odio indoctum intellegebat est at. Debitis hendrerit dissentias per ne, equidem verterem aliquando te per?

Ut diceret recteque eos? Vix ad albucius atomorum, mea ipsum habemus adipiscing at. Duis omnis volumus pri an, per dolorem sapientem at, adipisci salutatus intellegam eos in? Vivendum molestiae interpretaris in mel, no pri iusto noster. Eu eos vero probo ipsum, epicuri appetere iudicabit an pri, sed soluta voluptaria eu!

Sea natum velit ludus te, putant docendi ea quo. Quas solum comprehensam ut duo, ea erat fabulas insolens cum, oratio molestiae vis cu? Ut eripuit laboramus philosophia eos, vix ceteros moderatius at. Vix enim senserit intellegat ne! In dolorum expetenda sit, veniam impetus at vis, ei ius tritani contentiones! Eu quem enim periculis vis?

Ad nec mazim pertinax deseruisse, quidam laoreet praesent pro ad! Per no integre fuisset imperdiet! Cum malis voluptaria ne, appareat ponderum et sea. Amet magna definiebas per id, has viris dissentiet complectitur cu. Mel no utamur adversarium.

Mundi solet nemore in qui. Has dicit vituperata ne. Ad vim eros percipitur concludaturque? Error legere facilisi vix eu, ad mutat soleat usu. Choro delenit persequeris nam ei, no sit movet consulatu. Id vivendum adipisci percipitur eos, vim te error altera impedit, labore virtute an quo.

Quo primis vocibus at, choro alterum accusamus et ius? Pri etiam falli conclusionemque te, cum te erroribus disputando. Solum detraxit deseruisse ad eos, sit et tantas detraxit incorrupte, error errem quando ex nam. Duo et laudem officiis percipit, ad odio tacimates mei, mel eros concludaturque an.

Te legimus consetetur honestatis vim. Tantas aliquip mel ea, reque salutandi mediocritatem ne nec. Latine graecis propriae per no, homero urbanitas te eam! Cu cum phaedrum explicari dissentiet, sea probo definiebas instructior et. His dictas partiendo no, no per utinam fuisset? Commune similique concludaturque per ei, has utinam commodo no, mel et movet libris platonem. Ut ius exerci vocent, te sea velit deserunt pertinacia, vel sale falli ceteros ut.

Ad iuvaret denique sit. Mel etiam timeam disputando ne, vel eruditi copiosae eu. Ex ridens admodum deleniti qui, an philosophia consequuntur usu. Eos verterem pericula adversarium et, in mea accumsan omittantur, veri alienum appetere nam ex. Ex odio constituam est, te ius case dicunt tamquam. Vix cu error scaevola, mel epicurei consequat omittantur eu?

Eirmod cotidieque ei ius. Iisque philosophia theophrastus sea eu! Sit meliore probatus ut. Nam ei nibh habemus consetetur, ad nec gloriatur moderatius. Mentitum rationibus eloquentiam his et, mel at cibo suavitate. Quidam recusabo ad ius.

Sit modus voluptatibus ut, no eos platonem perpetua interesset? Et mei fugit impetus, pro no scaevola tacimates conceptam. Id justo convenire mei, ea debet conceptam mei, at mei reque praesent. Eos explicari definitiones te, accusam ancillae no quo. Dolorum nusquam sea at.

Autem vituperatoribus sea ea, his ad duis accusamus neglegentur. Etiam consequat nam ea, appetere sensibus ad per? Interesset interpretaris nam ut, sale.
8 changes: 8 additions & 0 deletions tests/pseudo_corpus/driver.py
@@ -0,0 +1,8 @@
import quantgov

from pathlib import Path

driver = quantgov.corpora.RecursiveDirectoryCorpusDriver(
directory=Path(__file__).parent.joinpath('data', 'clean'),
index_labels=('file',)
)
41 changes: 41 additions & 0 deletions tests/test_corpora.py
@@ -1,5 +1,8 @@
import pytest
import quantgov.corpora
import subprocess

from pathlib import Path


def build_recursive_directory_corpus(directory):
Expand Down Expand Up @@ -74,3 +77,41 @@ def test_corpus_streamer(corpus):
(('b', '2'), 'bar')
)
assert streamer.index == [('a', '1'), ('b', '2')]


PSEUDO_CORPUS_PATH = Path(__file__).resolve().parent.joinpath('pseudo_corpus')


def test_wordcount():
output = subprocess.check_output(
['quantgov', 'corpus', 'count_words', str(PSEUDO_CORPUS_PATH)],
universal_newlines=True
)
assert output == 'file,words\n1,248\n2,800\n'


def test_wordcount_pattern():
output = subprocess.check_output(
['quantgov', 'corpus', 'count_words', str(PSEUDO_CORPUS_PATH),
'--word_pattern', '\S+'],
universal_newlines=True
)
assert output == 'file,words\n1,248\n2,800\n'


def test_termcount():
output = subprocess.check_output(
['quantgov', 'corpus', 'count_occurrences', str(PSEUDO_CORPUS_PATH),
'lorem'],
universal_newlines=True
)
assert output == 'file,lorem\n1,1\n2,1\n'


def test_termcount_multiple():
output = subprocess.check_output(
['quantgov', 'corpus', 'count_occurrences', str(PSEUDO_CORPUS_PATH),
'lorem', 'dolor sit'],
universal_newlines=True
)
assert output == 'file,lorem,dolor sit\n1,1,1\n2,1,0\n'

0 comments on commit 65a919e

Please sign in to comment.