From 9c8447dcf974b57d5e0f5500d03c94109b1d6a67 Mon Sep 17 00:00:00 2001 From: qmac Date: Mon, 12 Feb 2018 01:31:20 -0600 Subject: [PATCH] google extractors using _to_df --- pliers/extractors/google.py | 67 +++++++------------ .../extractors/test_google_extractors.py | 37 +++++----- 2 files changed, 45 insertions(+), 59 deletions(-) diff --git a/pliers/extractors/google.py b/pliers/extractors/google.py index cf0ca66b..a1312548 100644 --- a/pliers/extractors/google.py +++ b/pliers/extractors/google.py @@ -4,6 +4,7 @@ from pliers.transformers import GoogleVisionAPITransformer from pliers.extractors.base import ExtractorResult import numpy as np +import pandas as pd class GoogleVisionAPIExtractor(GoogleVisionAPITransformer, ImageExtractor): @@ -19,17 +20,12 @@ def _extract(self, stims): results = [] for i, response in enumerate(responses): if response and self.response_object in response: - annotations = response[self.response_object] - features, values = self._parse_annotations(annotations) - values = [values] - results.append(ExtractorResult(values, stims[i], self, - features=features)) + raw = response[self.response_object] + results.append(ExtractorResult(None, stims[i], self, raw=raw)) elif 'error' in response: raise Exception(response['error']['message']) - else: - results.append(ExtractorResult([[]], stims[i], self, - features=[])) + results.append(ExtractorResult(None, stims[i], self, raw=[{}])) return results @@ -41,13 +37,12 @@ class GoogleVisionAPIFaceExtractor(GoogleVisionAPIExtractor): request_type = 'FACE_DETECTION' response_object = 'faceAnnotations' - def _parse_annotations(self, annotations): - features = [] - values = [] - + def _to_df(self, result): + annotations = result.raw if self.handle_annotations == 'first': annotations = [annotations[0]] + face_results = [] for i, annotation in enumerate(annotations): data_dict = {} for field, val in annotation.items(): @@ -68,11 +63,9 @@ def _parse_annotations(self, annotations): else: data_dict[field] = val - names = ['face%d_%s' % (i+1, n) for n in data_dict.keys()] - features += names - values += list(data_dict.values()) + face_results.append(data_dict) - return features, values + return pd.DataFrame(face_results) class GoogleVisionAPILabelExtractor(GoogleVisionAPIExtractor): @@ -82,13 +75,9 @@ class GoogleVisionAPILabelExtractor(GoogleVisionAPIExtractor): request_type = 'LABEL_DETECTION' response_object = 'labelAnnotations' - def _parse_annotations(self, annotations): - features = [] - values = [] - for annotation in annotations: - features.append(annotation['description']) - values.append(annotation['score']) - return features, values + def _to_df(self, result): + res = {label['description']: label['score'] for label in result.raw} + return pd.DataFrame([res]) class GoogleVisionAPIPropertyExtractor(GoogleVisionAPIExtractor): @@ -98,15 +87,13 @@ class GoogleVisionAPIPropertyExtractor(GoogleVisionAPIExtractor): request_type = 'IMAGE_PROPERTIES' response_object = 'imagePropertiesAnnotation' - def _parse_annotations(self, annotation): - colors = annotation['dominantColors']['colors'] - features = [] - values = [] + def _to_df(self, result): + colors = result.raw['dominantColors']['colors'] + data_dict = {} for color in colors: rgb = color['color'] - features.append((rgb['red'], rgb['green'], rgb['blue'])) - values.append(color['score']) - return features, values + data_dict[(rgb['red'], rgb['green'], rgb['blue'])] = color['score'] + return pd.DataFrame([data_dict]) class GoogleVisionAPISafeSearchExtractor(GoogleVisionAPIExtractor): @@ -116,8 +103,8 @@ class GoogleVisionAPISafeSearchExtractor(GoogleVisionAPIExtractor): request_type = 'SAFE_SEARCH_DETECTION' response_object = 'safeSearchAnnotation' - def _parse_annotations(self, annotation): - return list(annotation.keys()), list(annotation.values()) + def _to_df(self, result): + return pd.DataFrame([result.raw]) class GoogleVisionAPIWebEntitiesExtractor(GoogleVisionAPIExtractor): @@ -127,12 +114,10 @@ class GoogleVisionAPIWebEntitiesExtractor(GoogleVisionAPIExtractor): request_type = 'WEB_DETECTION' response_object = 'webDetection' - def _parse_annotations(self, annotations): - features = [] - values = [] - if 'webEntities' in annotations: - for annotation in annotations['webEntities']: - if 'description' in annotation and 'score' in annotation: - features.append(annotation['description']) - values.append(annotation['score']) - return features, values + def _to_df(self, result): + data_dict = {} + if 'webEntities' in result.raw: + for entity in result.raw['webEntities']: + if 'description' in entity and 'score' in entity: + data_dict[entity['description']] = entity['score'] + return pd.DataFrame([data_dict]) diff --git a/pliers/tests/extractors/test_google_extractors.py b/pliers/tests/extractors/test_google_extractors.py index 4cb93829..dd3db114 100644 --- a/pliers/tests/extractors/test_google_extractors.py +++ b/pliers/tests/extractors/test_google_extractors.py @@ -4,7 +4,8 @@ GoogleVisionAPIPropertyExtractor, GoogleVisionAPISafeSearchExtractor, GoogleVisionAPIWebEntitiesExtractor, - ExtractorResult, merge_results) + ExtractorResult, + merge_results) from pliers.extractors.google import GoogleVisionAPIExtractor from pliers.stimuli import ImageStim, VideoStim import pytest @@ -33,12 +34,12 @@ def test_google_vision_api_face_extractor_inits(): filename = join( get_test_data_path(), 'payloads', 'google_vision_api_face_payload.json') response = json.load(open(filename, 'r')) - features, data = ext._parse_annotations(response['faceAnnotations']) - assert len(features) == len(data) - assert data[features.index('face1_angerLikelihood')] == 'VERY_UNLIKELY' - assert data[ - features.index('face1_landmark_LEFT_EYE_BOTTOM_BOUNDARY_y')] == 257.023 - assert np.isnan(data[features.index('face1_boundingPoly_vertex2_y')]) + stim = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg')) + res = ExtractorResult(None, stim, ext, raw=response['faceAnnotations']) + df = res.to_df() + assert df['angerLikelihood'][0] == 'VERY_UNLIKELY' + assert df['landmark_LEFT_EYE_BOTTOM_BOUNDARY_y'][0] == 257.023 + assert np.isnan(df['boundingPoly_vertex2_y'][0]) @pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ") @@ -47,9 +48,9 @@ def test_google_vision_api_face_extractor(): filename = join(get_test_data_path(), 'image', 'obama.jpg') stim = ImageStim(filename) result = ext.transform(stim).to_df() - assert 'face1_joyLikelihood' in result.columns - assert result['face1_joyLikelihood'][0] == 'VERY_LIKELY' - assert float(result['face1_face_detectionConfidence'][0]) > 0.7 + assert 'joyLikelihood' in result.columns + assert result['joyLikelihood'][0] == 'VERY_LIKELY' + assert float(result['face_detectionConfidence'][0]) > 0.7 @pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ") @@ -59,12 +60,12 @@ def test_google_vision_multiple_face_extraction(): # Only first record ext = GoogleVisionAPIFaceExtractor(handle_annotations='first') result1 = ext.transform(stim).to_df() - assert 'face1_joyLikelihood' in result1.columns + assert 'joyLikelihood' in result1.columns # All records ext = GoogleVisionAPIFaceExtractor() result2 = ext.transform(stim).to_df() - assert 'face2_joyLikelihood' in result2.columns - assert result2.shape[1] > result1.shape[1] + assert 'joyLikelihood' in result2.columns + assert result2.shape[0] > result1.shape[0] @pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ") @@ -77,23 +78,23 @@ def test_google_vision_face_batch(): result = ext.transform(stims) result = merge_results(result, format='wide', extractor_names=False) assert result.shape == (2, 139) - assert 'face1_joyLikelihood' in result.columns - assert result['face1_joyLikelihood'][0] == 'VERY_LIKELY' - assert result['face1_joyLikelihood'][1] == 'VERY_LIKELY' + assert 'joyLikelihood' in result.columns + assert result['joyLikelihood'][0] == 'VERY_LIKELY' + assert result['joyLikelihood'][1] == 'VERY_LIKELY' video = VideoStim(join(get_test_data_path(), 'video', 'obama_speech.mp4')) conv = FrameSamplingFilter(every=10) video = conv.transform(video) result = ext.transform(video) result = merge_results(result, format='wide', extractor_names=False) - assert 'face1_joyLikelihood' in result.columns + assert 'joyLikelihood' in result.columns assert result.shape == (11, 139) video = VideoStim(join(get_test_data_path(), 'video', 'small.mp4')) video = conv.transform(video) result = ext.transform(video) result = merge_results(result, format='wide', extractor_names=False) - assert 'face1_joyLikelihood' not in result.columns + assert 'joyLikelihood' not in result.columns assert len(result) == 0