Skip to content

Commit

Permalink
Merge branch 'api-remote' into batch-caching
Browse files Browse the repository at this point in the history
  • Loading branch information
qmac committed Mar 31, 2018
2 parents 6c7b1ba + c71f736 commit d60f664
Show file tree
Hide file tree
Showing 18 changed files with 182 additions and 41 deletions.
10 changes: 8 additions & 2 deletions pliers/converters/api/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ class GoogleSpeechAPIConverter(GoogleAPITransformer, AudioToTextConverter):
speech_contexts (list): A list of a list of favored phrases or words
to assist the API. The inner list is a sequence of word tokens,
each outer element is a potential context.
discovery_file (str): path to discovery file containing Google
application credentials.
api_version (str): API version to use.
max_results (int): Max number of results per page.
num_retries (int): Number of times to retry query on failure.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
'''

api_name = 'speech'
Expand Down Expand Up @@ -55,14 +62,13 @@ def _build_request(self, stim):
data = f.read()
os.remove(tmp)

content = base64.b64encode(data).decode()
if self.speech_contexts:
speech_contexts = [{'phrases': c} for c in self.speech_contexts]
else:
speech_contexts = []
request = {
'audio': {
'content': content
'content': base64.b64encode(data).decode()
},
'config': {
'encoding': 'FLAC',
Expand Down
2 changes: 2 additions & 0 deletions pliers/converters/api/ibm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class IBMSpeechAPIConverter(APITransformer, AudioToTextConverter):
be separated by (i.e. the unit each TextStim in the ComplexTextStim
elements should be). Currently, only 'words' or 'phrases' are
supported.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
'''

_env_keys = ('IBM_USERNAME', 'IBM_PASSWORD')
Expand Down
16 changes: 15 additions & 1 deletion pliers/converters/api/microsoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,21 @@
class MicrosoftAPITextConverter(MicrosoftVisionAPITransformer,
ImageToTextConverter):

''' Detects text within images using the Microsoft Vision API. '''
''' Detects text within images using the Microsoft Vision API.
Args:
language (str): Target language to detect in the image.
subscription_key (str): A valid subscription key for Microsoft Cognitive
Services. Only needs to be passed the first time the extractor is
initialized.
location (str): Region the subscription key has been registered in.
It will be the first part of the endpoint URL suggested by
Microsoft when you first created the key.
Examples include: westus, westcentralus, eastus
api_version (str): API version to use.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
'''

api_method = 'ocr'
_log_attributes = ('subscription_key', 'api_version', 'language')
Expand Down
2 changes: 2 additions & 0 deletions pliers/converters/api/wit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class SpeechRecognitionAPIConverter(APITransformer, AudioToTextConverter):
Args:
api_key (str): API key. Must be passed explicitly or stored in
the environment variable specified in the _env_keys field.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
'''

_log_attributes = ('api_key', 'recognize_method')
Expand Down
15 changes: 11 additions & 4 deletions pliers/extractors/api/clarifai.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ class ClarifaiAPIExtractor(APITransformer, BatchTransformerMixin,
number of label predictions returned.
select_concepts (list): List of concepts (strings) to query from the
API. For example, ['food', 'animal'].
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
batch_size (int): Number of stims to send per batched API request.
'''

_log_attributes = ('api_key', 'model', 'model_name', 'min_value',
Expand Down Expand Up @@ -88,13 +91,17 @@ def _extract(self, stims):
moc = clarifai_client.ModelOutputConfig(min_value=self.min_value,
max_concepts=self.max_concepts,
select_concepts=self.select_concepts)
output_config = moc
model_output_info = clarifai_client.ModelOutputInfo(output_config=output_config)
model_output_info = clarifai_client.ModelOutputInfo(output_config=moc)

# ExitStack lets us use filename context managers simultaneously
with ExitStack() as stack:
files = [stack.enter_context(s.get_filename()) for s in stims]
imgs = [clarifai_client.Image(filename=filename) for filename in files]
imgs = []
for s in stims:
if s.url:
imgs.append(clarifai_client.Image(url=s.url))
else:
f = stack.enter_context(s.get_filename())
imgs.append(clarifai_client.Image(filename=f))
tags = self.model.predict(imgs, model_output_info=model_output_info)

extracted = []
Expand Down
33 changes: 32 additions & 1 deletion pliers/extractors/api/indico.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ class IndicoAPIExtractor(APITransformer, BatchTransformerMixin, Extractor):
api_key (str): A valid API key for the Indico API. Only needs to be
passed the first time the extractor is initialized.
models (list): The names of the Indico models to use.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
batch_size (int): Number of stims to send per batched API request.
'''

_log_attributes = ('api_key', 'models', 'model_names')
Expand Down Expand Up @@ -75,8 +78,11 @@ def check_valid_keys(self):
# If valid key, a data error (None passed) is expected here
return True

def _get_tokens(self, stims):
return [stim.data for stim in stims if stim.data is not None]

def _extract(self, stims):
tokens = [stim.data for stim in stims if stim.data is not None]
tokens = self._get_tokens(stims)
scores = [model(tokens) for model in self.models]

results = []
Expand All @@ -100,6 +106,14 @@ class IndicoAPITextExtractor(TextExtractor, IndicoAPIExtractor):

''' Uses to Indico API to extract features from text, such as
sentiment extraction.
Args:
api_key (str): A valid API key for the Indico API. Only needs to be
passed the first time the extractor is initialized.
models (list): The names of the Indico models to use.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
batch_size (int): Number of stims to send per batched API request.
'''

def __init__(self, api_key=None, models=None, rate_limit=None,
Expand All @@ -116,6 +130,14 @@ class IndicoAPIImageExtractor(ImageExtractor, IndicoAPIExtractor):

''' Uses to Indico API to extract features from Images, such as
facial emotion recognition or content filtering.
Args:
api_key (str): A valid API key for the Indico API. Only needs to be
passed the first time the extractor is initialized.
models (list): The names of the Indico models to use.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
batch_size (int): Number of stims to send per batched API request.
'''

def __init__(self, api_key=None, models=None, rate_limit=None,
Expand All @@ -126,3 +148,12 @@ def __init__(self, api_key=None, models=None, rate_limit=None,
models=models,
rate_limit=rate_limit,
batch_size=batch_size)

def _get_tokens(self, stims):
toks = []
for s in stims:
if s.url:
toks.append(s.url)
elif s.data is not None:
toks.append(s.data)
return toks
45 changes: 33 additions & 12 deletions pliers/extractors/api/microsoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,25 @@ class MicrosoftAPIFaceExtractor(MicrosoftAPITransformer, ImageExtractor):
image using the Microsoft Azure Cognitive Services API.
Args:
face_id (bool): return faceIds of the detected faces or not. The
face_id (bool): Return faceIds of the detected faces or not. The
default value is False.
landmarks (str): return face landmarks of the detected faces or
landmarks (str): Return face landmarks of the detected faces or
not. The default value is False.
attributes (list): one or more specified face attributes as strings.
attributes (list): One or more specified face attributes as strings.
Supported face attributes include accessories, age, blur, emotion,
exposure, facialHair, gender, glasses, hair, headPose, makeup,
noise, occlusion, and smile. Note that each attribute has
additional computational and time cost.
subscription_key (str): A valid subscription key for Microsoft Cognitive
Services. Only needs to be passed the first time the extractor is
initialized.
location (str): Region the subscription key has been registered in.
It will be the first part of the endpoint URL suggested by
Microsoft when you first created the key.
Examples include: westus, westcentralus, eastus
api_version (str): API version to use.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
'''

api_name = 'face'
Expand Down Expand Up @@ -116,11 +126,21 @@ class MicrosoftVisionAPIExtractor(MicrosoftVisionAPITransformer,
''' Base MicrosoftVisionAPIExtractor class.
Args:
features (list): one or more specified vision features as strings.
features (list): One or more specified vision features as strings.
Supported vision features include Tags, Categories, ImageType,
Color, and Adult. Note that each attribute has additional
computational and time cost. By default extracts all visual
features from an image.
subscription_key (str): A valid subscription key for Microsoft Cognitive
Services. Only needs to be passed the first time the extractor is
initialized.
location (str): Region the subscription key has been registered in.
It will be the first part of the endpoint URL suggested by
Microsoft when you first created the key.
Examples include: westus, westcentralus, eastus
api_version (str): API version to use.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
'''

api_method = 'analyze'
Expand Down Expand Up @@ -149,14 +169,15 @@ def _to_df(self, result):
data_dict = {}
for feat in self.features:
feat = feat[0].lower() + feat[1:]
if feat == 'tags':
for tag in result._data[feat]:
data_dict[tag['name']] = tag['confidence']
elif feat == 'categories':
for cat in result._data[feat]:
data_dict[cat['name']] = cat['score']
else:
data_dict.update(result._data[feat])
if feat in result._data:
if feat == 'tags':
for tag in result._data[feat]:
data_dict[tag['name']] = tag['confidence']
elif feat == 'categories':
for cat in result._data[feat]:
data_dict[cat['name']] = cat['score']
else:
data_dict.update(result._data[feat])
return pd.DataFrame([data_dict.values()], columns=data_dict.keys())


Expand Down
2 changes: 1 addition & 1 deletion pliers/stimuli/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, filename=None, onset=None, sampling_rate=None, url=None,
self.data = self.data.mean(axis=1)

super(AudioStim, self).__init__(
filename, onset=onset, duration=duration, order=order)
filename, onset=onset, duration=duration, order=order, url=url)

@staticmethod
def get_sampling_rate(filename):
Expand Down
3 changes: 2 additions & 1 deletion pliers/stimuli/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@ class Stim(with_metaclass(ABCMeta)):
'''

def __init__(self, filename=None, onset=None, duration=None, order=None,
name=None):
name=None, url=None):

self.filename = filename
self.onset = onset
self.duration = duration
self.order = order
self._history = None
self.url = url

if name is None:
name = '' if self.filename is None else basename(self.filename)
Expand Down
2 changes: 1 addition & 1 deletion pliers/stimuli/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, filename=None, onset=None, duration=None, data=None,
filename = url
self.data = data
super(ImageStim, self).__init__(filename, onset=onset,
duration=duration)
duration=duration, url=url)

def save(self, path):
imsave(path, self.data)
3 changes: 2 additions & 1 deletion pliers/stimuli/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def __init__(self, filename=None, text=None, onset=None, duration=None,
text = urlopen(url).read()
self.text = text
name = 'text[%s]' % text[:40] # Truncate at 40 chars
super(TextStim, self).__init__(filename, onset, duration, order, name)
super(TextStim, self).__init__(filename, onset, duration, order,
name=name, url=url)

@property
def data(self):
Expand Down
3 changes: 2 additions & 1 deletion pliers/stimuli/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def __init__(self, filename=None, frame_index=None, onset=None, url=None,
self.n_frames = len(self.frame_index)
super(VideoFrameCollectionStim, self).__init__(filename,
onset=onset,
duration=duration)
duration=duration,
url=url)

def _load_clip(self):
audio_fps = AudioStim.get_sampling_rate(self.filename)
Expand Down
6 changes: 6 additions & 0 deletions pliers/tests/extractors/api/test_clarifai_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def test_clarifai_api_extractor():
assert result.shape == (1, 6)
assert 'cat' in result.columns and 'dog' in result.columns

url = 'https://tuition.utexas.edu/sites/all/themes/tuition/logo.png'
stim = ImageStim(url=url)
result = ClarifaiAPIExtractor(max_concepts=5).transform(stim).to_df()
assert result.shape == (1, 9)
assert result['symbol'][0] > 0.8

ext = ClarifaiAPIExtractor(api_key='nogood')
assert not ext.validate_keys()

Expand Down
5 changes: 5 additions & 0 deletions pliers/tests/extractors/api/test_google_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ def test_google_vision_api_label_extractor():
assert 'apple' in result.columns
assert result['apple'][0] > 0.75

url = 'https://tuition.utexas.edu/sites/all/themes/tuition/logo.png'
stim = ImageStim(url=url)
result = ext.transform(stim).to_df()
assert result['orange'][0] > 0.7

ext = GoogleVisionAPILabelExtractor(discovery_file='nogood')
assert not ext.validate_keys()

Expand Down
5 changes: 5 additions & 0 deletions pliers/tests/extractors/api/test_indico_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ def test_indico_api_image_extractor():
assert set(result2.columns) == outdfKeysCheck
assert result2['fer_Happy'][0] > 0.7

url = 'https://tuition.utexas.edu/sites/all/themes/tuition/logo.png'
stim = ImageStim(url=url)
result = ext.transform(stim).to_df()
assert result['fer_Neutral'][0] > 0.1


@pytest.mark.skipif("'INDICO_APP_KEY' not in os.environ")
def test_indico_api_extractor_large():
Expand Down
5 changes: 5 additions & 0 deletions pliers/tests/extractors/api/test_microsoft_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ def test_microsoft_vision_api_tag_extractor():
assert 'apple' in res.columns
assert res['apple'][0] > 0.7

url = 'https://tuition.utexas.edu/sites/all/themes/tuition/logo.png'
stim = ImageStim(url=url)
result = ext.transform(stim).to_df()
assert result['plate'][0] > 0.1 # doesn't give great labels


@pytest.mark.requires_payment
@pytest.mark.skipif("'MICROSOFT_VISION_SUBSCRIPTION_KEY' not in os.environ")
Expand Down

0 comments on commit d60f664

Please sign in to comment.