Skip to content

Commit

Permalink
Allow filtering by a feature list.
Browse files Browse the repository at this point in the history
Feature list may be created by RFE, for example
  • Loading branch information
thvitt committed Aug 24, 2016
1 parent 2ae2571 commit 814991f
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
3 changes: 2 additions & 1 deletion delta/__init__.py
Expand Up @@ -15,7 +15,8 @@
from .deltas import registry, Normalization, DeltaFunction, \
PDistDeltaFunction, CompositeDeltaFunction
from .cluster import Clustering
from .features import get_rfe_features

__all__ = [ Corpus, FeatureGenerator, LETTERS_PATTERN, WORD_PATTERN,
registry, Normalization,
DeltaFunction, PDistDeltaFunction, CompositeDeltaFunction, Clustering ]
DeltaFunction, PDistDeltaFunction, CompositeDeltaFunction, Clustering, get_rfe_features ]
18 changes: 14 additions & 4 deletions delta/corpus.py
Expand Up @@ -321,6 +321,7 @@ def __init__(self, subdir=None, file=None, corpus=None,
self.logger = logger
self.metadata = metadata
self.document_describer = document_describer
self.feature_generator = feature_generator


def new_data(self, data, **metadata):
Expand Down Expand Up @@ -469,10 +470,19 @@ def filter_wordlist(self, filename, **kwargs):
New corpus with seelected features.
"""
words = list(self._load_wordlist(filename, **kwargs))
return self.new_data(
corpus=self.loc[:, words],
complete=False,
wordlist=filename)
return self.filter_features(words, wordlist=filename)

def filter_features(self, features, **metadata):
"""
Returns a new corpus that contains only the given features.
Args:
features (Iterable):
The features to select. If its in a file, use filter_wordlist
"""
return self.new_data(self.loc[:, features],
complete=False,
**metadata)


def relative_frequencies(self):
Expand Down

0 comments on commit 814991f

Please sign in to comment.