-
Notifications
You must be signed in to change notification settings - Fork 67
/
microsoft.py
32 lines (23 loc) · 998 Bytes
/
microsoft.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
''' Microsoft Azure API-based Converter classes. '''
from .image import ImageToTextConverter
from pliers.stimuli.text import TextStim
from pliers.transformers import MicrosoftVisionAPITransformer
class MicrosoftAPITextConverter(MicrosoftVisionAPITransformer, ImageToTextConverter):
''' Detects text within images using the Microsoft Vision API. '''
api_method = 'ocr'
_log_attributes = ('api_version', 'language')
def __init__(self, language='en', **kwargs):
self.language = language
super(MicrosoftAPITextConverter, self).__init__(**kwargs)
def _convert(self, stim):
params = {
'language': self.language,
'detectOrientation': False
}
response = self._query_api(stim, params)
lines = []
for r in response['regions']:
for l in r['lines']:
lines.append(' '.join([w['text'] for w in l['words']]))
text = '\n'.join(lines)
return TextStim(text=text)