Skip to content

Commit

Permalink
Fix bug whereby extractors that validated their direct dependencies b…
Browse files Browse the repository at this point in the history
…roke Feature.version
  • Loading branch information
JohnVinyard committed May 17, 2016
1 parent 9c3b743 commit f13516d
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
2 changes: 1 addition & 1 deletion featureflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '1.8.9'
__version__ = '1.8.10'

from model import BaseModel

Expand Down
13 changes: 10 additions & 3 deletions featureflow/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from decoder import JSONDecoder, Decoder, GreedyDecoder, DecoderNode, \
BZ2Decoder, PickleDecoder
from datawriter import DataWriter, StringIODataWriter
from persistence import PersistenceSettings


class Feature(object):
Expand Down Expand Up @@ -49,7 +50,13 @@ def __str__(self):

@property
def version(self):
return self.extractor(**self.extractor_args).version
# KLUDGE: Build a shallow version of the extractor. Building a deep
# version with re-usable code is more difficult, because
# self._build_extractor relies on this version property, so there's
# a circular dependency.
dependencies = [f.extractor(**f.extractor_args) for f in self.needs]
e = self.extractor(needs=dependencies, **self.extractor_args)
return e.version

def copy(
self,
Expand Down Expand Up @@ -205,7 +212,7 @@ def _depends_on(self, _id, graph, persistence):
needs.append(e)
return needs

def _build_extractor(self, _id, graph, persistence):
def _build_extractor(self, _id, graph, persistence, never_store=False):
try:
return graph[self.key]
except KeyError:
Expand All @@ -218,7 +225,7 @@ def _build_extractor(self, _id, graph, persistence):
setattr(e, '_reader', reader)

graph[self.key] = e
if not self.store:
if never_store or not self.store:
return e

key = self.key
Expand Down
25 changes: 25 additions & 0 deletions featureflow/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ def _process(self, data):
yield self._version


class ValidatesDependencies(Node):
def __init__(self, needs=None):
if not needs:
raise ValueError('you must supply at least one dependency')
super(ValidatesDependencies, self).__init__(needs=needs)

def _process(self, data):
yield data


class Echo(Node):
def __init__(self, needs=None):
super(Echo, self).__init__(needs=needs)
Expand Down Expand Up @@ -320,6 +330,21 @@ class MultipleRoots(BaseModel):

class BaseTest(object):

def test_can_use_node_that_validates_its_dependency_list(self):
class D1(BaseModel, self.Settings):
stream = Feature(TextStream, store=True)
words = Feature(Tokenizer, needs=stream, store=False)
count = JSONFeature(WordCount, needs=words, store=True)
timestamp = JSONFeature(
TimestampEmitter,
version='1',
needs=stream,
store=True)
validated = Feature(ValidatesDependencies, needs=stream, store=True)

_id = D1.process(stream='mary')
self.assertTrue(_id)

def test_recomputes_when_necessary(self):
class D1(BaseModel, self.Settings):
stream = Feature(TextStream, store=True)
Expand Down

0 comments on commit f13516d

Please sign in to comment.