diff --git a/delta/corpus.py b/delta/corpus.py index e76846d..59d9141 100644 --- a/delta/corpus.py +++ b/delta/corpus.py @@ -311,7 +311,7 @@ def __init__(self, operation): class Corpus(pd.DataFrame): - _metadata = ['metadata'] + _metadata = ['metadata', 'logger', 'document_describer', 'feature_generator'] def __init__(self, source=None, *, subdir=None, file=None, corpus=None, feature_generator=None, diff --git a/test/corpus_test.py b/test/corpus_test.py index 43333d8..d282b03 100644 --- a/test/corpus_test.py +++ b/test/corpus_test.py @@ -1,3 +1,5 @@ +import numpy as np +import pandas as pd from pytest import approx import delta as d @@ -78,4 +80,13 @@ def test_featuredescriber_args(testdir): def test_parallel_corpus(testdir, corpus): parallel_corpus = d.Corpus(testdir, parallel=True) - assert (parallel_corpus == corpus).all().all() \ No newline at end of file + assert (parallel_corpus == corpus).all().all() + + +@pytest.mark.parametrize("attr_name", ["logger", "metadata", "feature_generator", "document_describer", "save"]) +def test_attribute_names(attr_name): + df = pd.DataFrame([[17, 4], [23, 42]], + columns=['foo', attr_name], + index=['doc1', 'doc2']) + corpus = d.Corpus(df) + assert list(corpus[attr_name]) == [4, 42]