Skip to content

Commit

Permalink
Merge pull request #101 from tyarkoni/various-improvements
Browse files Browse the repository at this point in the history
Major improvements
  • Loading branch information
tyarkoni committed Jan 1, 2017
2 parents ca7dac3 + a12cd74 commit 4592589
Show file tree
Hide file tree
Showing 29 changed files with 516 additions and 290 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -1,6 +1,7 @@
*.pyc
*.DS_Store
*~
benchmarks/
build/
dist
dist/*
Expand Down
5 changes: 5 additions & 0 deletions pliers/config.py
@@ -0,0 +1,5 @@

cache_converters = False
cache_filters = False
cache_extractors = False
log_transformations = True
39 changes: 19 additions & 20 deletions pliers/converters/__init__.py
@@ -1,37 +1,25 @@
from abc import ABCMeta, abstractmethod, abstractproperty
from pliers.transformers import Transformer, CollectionStimMixin
from six import with_metaclass
from tempfile import mkdtemp
from joblib import Memory
from pliers.utils import memory
import importlib
from types import GeneratorType
from pliers import config


cachedir = mkdtemp()
memory = Memory(cachedir=cachedir, verbose=0)

__all__ = ['api', 'audio', 'google', 'image', 'video', 'multistep']
__all__ = ['api', 'audio', 'google', 'image', 'iterators', 'video', 'multistep']


class Converter(with_metaclass(ABCMeta, Transformer)):
''' Base class for Converters.'''

def __init__(self):
super(Converter, self).__init__()
self.convert = memory.cache(self.convert)
if config.cache_converters:
self.transform = memory.cache(self.transform)

def convert(self, stim, *args, **kwargs):
new_stim = self._convert(stim, *args, **kwargs)
if new_stim.name is None:
new_stim.name = stim.name
else:
new_stim.name = stim.name + '_' + new_stim.name
if isinstance(new_stim, CollectionStimMixin):
for s in new_stim:
if s.name is None:
s.name = stim.name
else:
s.name = stim.name + '_' + s.name
return new_stim
return self.transform(stim, *args, **kwargs)

@abstractmethod
def _convert(self, stim):
Expand All @@ -42,7 +30,18 @@ def _output_type(self):
pass

def _transform(self, stim, *args, **kwargs):
return self.convert(stim, *args, **kwargs)
new_stim = self._convert(stim, *args, **kwargs)
if isinstance(new_stim, (list, tuple, GeneratorType)):
return new_stim
if new_stim.name is None:
new_stim.name = stim.name
else:
new_stim.name = stim.name + '->' + new_stim.name
if isinstance(new_stim, CollectionStimMixin):
for s in new_stim:
if s.name is None:
s.name = stim.name
return new_stim


def get_converter(in_type, out_type):
Expand Down
18 changes: 4 additions & 14 deletions pliers/converters/api.py
Expand Up @@ -48,24 +48,14 @@ def _convert(self, audio):

class WitTranscriptionConverter(SpeechRecognitionAPIConverter):

@property
def environ_key(self):
return 'WIT_AI_API_KEY'

@property
def recognize_method(self):
return 'recognize_wit'
environ_key = 'WIT_AI_API_KEY'
recognize_method ='recognize_wit'


class GoogleSpeechAPIConverter(SpeechRecognitionAPIConverter):

@property
def environ_key(self):
return 'GOOGLE_API_KEY'

@property
def recognize_method(self):
return 'recognize_google'
environ_key = 'GOOGLE_APPLICATION_CREDENTIALS'
recognize_method = 'recognize_google_cloud'


class IBMSpeechAPIConverter(AudioToTextConverter):
Expand Down
1 change: 0 additions & 1 deletion pliers/converters/google.py
Expand Up @@ -8,7 +8,6 @@ class GoogleVisionAPITextConverter(GoogleVisionAPITransformer, ImageToTextConver
request_type = 'TEXT_DETECTION'
response_object = 'textAnnotations'


def __init__(self, handle_annotations='first', **kwargs):
super(GoogleVisionAPITextConverter, self).__init__(**kwargs)
self.handle_annotations = handle_annotations
Expand Down
34 changes: 34 additions & 0 deletions pliers/converters/iterators.py
@@ -0,0 +1,34 @@
from pliers.stimuli.video import VideoStim, DerivedVideoStim
from pliers.stimuli.image import ImageStim
from pliers.stimuli.text import ComplexTextStim, TextStim
from pliers.converters import Converter


class StimCollectionIterator(Converter):

def _convert(self, stim):
for element in stim:
yield element


class VideoFrameIterator(StimCollectionIterator):

_input_type = VideoStim
_output_type = ImageStim


class DerivedVideoFrameIterator(StimCollectionIterator):

# TODO: use VideoFrameIterator for both VideoStim and DerivedVideoStim,
# but this may require reworking _input_type to handle disjunction rather
# than the current conjunction, or making the get_converter() code walk
# up the hierarchy and use superclass iterators.

_input_type = DerivedVideoStim
_output_type = ImageStim


class ComplexTextIterator(StimCollectionIterator):

_input_type = ComplexTextStim
_output_type = TextStim
2 changes: 1 addition & 1 deletion pliers/converters/multistep.py
Expand Up @@ -31,7 +31,7 @@ def _convert(self, stim):
raise ValueError(msg)
else:
converter = step
stim = converter.transform(stim)
stim = converter.transform(stim)
return stim


Expand Down
20 changes: 6 additions & 14 deletions pliers/converters/video.py
Expand Up @@ -51,6 +51,9 @@ class FrameSamplingConverter(VideoToDerivedVideoConverter):
top_n (int): takes top n frames sorted by the absolute difference
with the next frame
'''

_log_attributes = ('every', 'hertz', 'top_n')

def __init__(self, every=None, hertz=None, top_n=None):
super(FrameSamplingConverter, self).__init__()
self.every = every
Expand All @@ -62,19 +65,12 @@ def _convert(self, video):
frame_index = range(video.n_frames)
else:
frame_index = video.frame_index

if not hasattr(video, "history"):
history = pd.DataFrame(columns=["filter", "value", "n_frames"])
else:
history = video.history.copy()

if self.every is not None:
new_idx = range(video.n_frames)[::self.every]
history.loc[history.shape[0]]= ["every", self.every, len(new_idx)]
elif self.hertz is not None:
interval = int(video.fps / self.hertz)
new_idx = range(video.n_frames)[::interval]
history.loc[history.shape[0]] = ["hertz", self.hertz, len(new_idx)]
elif self.top_n is not None:
import cv2
diffs = []
Expand All @@ -85,7 +81,6 @@ def _convert(self, video):
diffs.append(sum(cv2.sumElems(cv2.absdiff(last, img))))
last = img
new_idx = sorted(range(len(diffs)), key=lambda i: diffs[i], reverse=True)[:self.top_n]
history.loc[history.shape[0]] = ["top_n", self.top_n, len(new_idx)]

frame_index = sorted(list(set(frame_index).intersection(new_idx)))

Expand All @@ -98,12 +93,9 @@ def _convert(self, video):
else:
dur = (len(video.frames) / video.fps) - onsets[i]

elem = VideoFrameStim(video=video, frame_num=f,
duration=dur)
elem = VideoFrameStim(video=video, frame_num=f, duration=dur)
elements.append(elem)

return DerivedVideoStim(filename=video.filename,
elements=elements,
frame_index=frame_index,
history=history)
return DerivedVideoStim(filename=video.filename, elements=elements,
frame_index=frame_index)

36 changes: 31 additions & 5 deletions pliers/extractors/__init__.py
Expand Up @@ -4,17 +4,28 @@
import pandas as pd
import numpy as np
from collections import defaultdict

from pliers import config
from pliers.utils import memory
from types import GeneratorType

__all__ = ['api', 'audio', 'google', 'image', 'text', 'video']


class Extractor(with_metaclass(ABCMeta, Transformer)):
''' Base class for Converters.'''

def __init__(self):
super(Extractor, self).__init__()
if config.cache_extractors:
self.transform = memory.cache(self.transform)

def extract(self, stim, *args, **kwargs):
return self.transform(stim, *args, **kwargs)

def transform(self, stim, *args, **kwargs):
result = super(Extractor, self).transform(stim, *args, **kwargs)
return list(result) if isinstance(result, GeneratorType) else result

@abstractmethod
def _extract(self, stim):
pass
Expand All @@ -23,6 +34,7 @@ def _transform(self, stim, *args, **kwargs):
return self._extract(stim, *args, **kwargs)



class ExtractorResult(object):

def __init__(self, data, stim, extractor, features=None, onsets=None,
Expand All @@ -31,6 +43,7 @@ def __init__(self, data, stim, extractor, features=None, onsets=None,
self.stim = stim
self.extractor = extractor
self.features = features
self._history = None
if onsets is None:
onsets = stim.onset
self.onsets = onsets if onsets is not None else np.nan
Expand All @@ -47,6 +60,14 @@ def to_df(self, stim_name=False):
df.set_index('stim', append=True, inplace=True)
return df

@property
def history(self):
return self._history

@history.setter
def history(self, history):
self._history = history

@classmethod
def merge_features(cls, results, extractor_names=True, stim_names=True):
''' Merge a list of ExtractorResults bound to the same Stim into a
Expand All @@ -61,7 +82,7 @@ def merge_features(cls, results, extractor_names=True, stim_names=True):
'''

# Make sure all ExtractorResults are associated with same Stim.
stims = set([r.stim for r in results])
stims = set([r.stim.name for r in results])
dfs = [r.to_df() for r in results]
if len(stims) > 1:
raise ValueError("merge_features() can only be called on a set of "
Expand Down Expand Up @@ -94,16 +115,21 @@ def merge_features(cls, results, extractor_names=True, stim_names=True):
if durations.apply(lambda x: x.nunique()<=1, axis=1).all():
result = result.drop('duration', axis=1, level=1)

result.insert(0, 'class', results[0].stim.__class__.__name__)
result.insert(0, 'filename', results[0].stim.filename)
result.insert(0, 'history', str(results[0].history))

if stim_names:
result['stim'] = list(stims)[0].name
result['stim'] = list(stims)[0]
result.set_index('stim', append=True, inplace=True)
result = result.sort_index()

return result
return result.sort_values(['onset'])

@classmethod
def merge_stims(cls, results, stim_names=True):
results = [r.to_df(True) if isinstance(r, ExtractorResult) else r for r in results]
return pd.concat(results, axis=0)
return pd.concat(results, axis=0).sort_values('onset')


def merge_results(results, extractor_names=True, stim_names=True):
Expand Down
6 changes: 6 additions & 0 deletions pliers/extractors/api.py
Expand Up @@ -28,6 +28,8 @@ class IndicoAPIExtractor(ComplexTextExtractor):
models (list): The names of the Indico models to use.
'''

_log_attributes = ('models',)

def __init__(self, api_key=None, models=None):
ComplexTextExtractor.__init__(self)
if api_key is None:
Expand Down Expand Up @@ -92,6 +94,8 @@ class ClarifaiAPIExtractor(ImageExtractor):
For example, ['food', 'animal'].
'''

_log_attributes = ('model', 'select_classes')

def __init__(self, app_id=None, app_secret=None, model=None, select_classes=None):
ImageExtractor.__init__(self)
if app_id is None or app_secret is None:
Expand All @@ -106,6 +110,8 @@ def __init__(self, app_id=None, app_secret=None, model=None, select_classes=None
self.tagger = ClarifaiApi(app_id=app_id, app_secret=app_secret)
if not (model is None):
self.tagger.set_model(model)

self.model = model

if select_classes is None:
self.select_classes = None
Expand Down

0 comments on commit 4592589

Please sign in to comment.