Skip to content

Commit

Permalink
fix(corpus): parallel parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
thvitt committed Oct 1, 2021
1 parent 8d71481 commit 3a23d14
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
2 changes: 1 addition & 1 deletion delta/corpus.py
Expand Up @@ -226,7 +226,7 @@ def process_directory(self, directory):
data = parallel(delayed(self.process_file)(filename) for filename in used_filenames)
else:
data = (self.process_file(filename) for filename in used_filenames)
return {series.name: series for series in data}
return {self.get_name(fn): series for (series, fn) in zip(data, used_filenames)}

def _get_parallel_executor(self) -> Parallel:
if self.parallel:
Expand Down
7 changes: 6 additions & 1 deletion test/corpus_test.py
Expand Up @@ -73,4 +73,9 @@ def test_table_describer(testdir):

def test_featuredescriber_args(testdir):
corpus = d.Corpus(testdir, lower_case=True)
assert 'Sie' not in corpus.columns
assert 'Sie' not in corpus.columns


def test_parallel_corpus(testdir, corpus):
parallel_corpus = d.Corpus(testdir, parallel=True)
assert (parallel_corpus == corpus).all().all()

0 comments on commit 3a23d14

Please sign in to comment.