-
Notifications
You must be signed in to change notification settings - Fork 6.3k
/
hybrid_tutorial.py
278 lines (214 loc) · 8.2 KB
/
hybrid_tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# [START translate_hybrid_imports]
import html
import os
# Imports the Google Cloud client libraries
from google.api_core.exceptions import AlreadyExists
from google.cloud import texttospeech
from google.cloud import translate_v3beta1 as translate
from google.cloud import vision
# [END translate_hybrid_imports]
# [START translate_hybrid_project_id]
# extract GCP project id
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
# [END translate_hybrid_project_id]
# [START translate_hybrid_vision]
def pic_to_text(infile: str) -> str:
"""Detects text in an image file
Args:
infile: path to image file
Returns:
String of text detected in image
"""
# Instantiates a client
client = vision.ImageAnnotatorClient()
# Opens the input image file
with open(infile, "rb") as image_file:
content = image_file.read()
image = vision.Image(content=content)
# For dense text, use document_text_detection
# For less dense text, use text_detection
response = client.document_text_detection(image=image)
text = response.full_text_annotation.text
print(f"Detected text: {text}")
return text
# [END translate_hybrid_vision]
# [START translate_hybrid_create_glossary]
def create_glossary(
languages: list,
project_id: str,
glossary_name: str,
glossary_uri: str,
) -> str:
"""Creates a GCP glossary resource
Assumes you've already manually uploaded a glossary to Cloud Storage
Args:
languages: list of languages in the glossary
project_id: GCP project id
glossary_name: name you want to give this glossary resource
glossary_uri: the uri of the glossary you uploaded to Cloud Storage
Returns:
name of the created or existing glossary
"""
# Instantiates a client
client = translate.TranslationServiceClient()
# Designates the data center location that you want to use
location = "us-central1"
# Set glossary resource name
name = client.glossary_path(project_id, location, glossary_name)
# Set language codes
language_codes_set = translate.Glossary.LanguageCodesSet(language_codes=languages)
gcs_source = translate.GcsSource(input_uri=glossary_uri)
input_config = translate.GlossaryInputConfig(gcs_source=gcs_source)
# Set glossary resource information
glossary = translate.Glossary(
name=name, language_codes_set=language_codes_set, input_config=input_config
)
parent = f"projects/{project_id}/locations/{location}"
# Create glossary resource
# Handle exception for case in which a glossary
# with glossary_name already exists
try:
operation = client.create_glossary(parent=parent, glossary=glossary)
operation.result(timeout=90)
print("Created glossary " + glossary_name + ".")
except AlreadyExists:
print(
"The glossary "
+ glossary_name
+ " already exists. No new glossary was created."
)
return glossary_name
# [END translate_hybrid_create_glossary]
# [START translate_hybrid_translate]
def translate_text(
text: str,
source_language_code: str,
target_language_code: str,
project_id: str,
glossary_name: str,
) -> str:
"""Translates text to a given language using a glossary
Args:
text: String of text to translate
source_language_code: language of input text
target_language_code: language of output text
project_id: GCP project id
glossary_name: name you gave your project's glossary
resource when you created it
Return:
String of translated text
"""
# Instantiates a client
client = translate.TranslationServiceClient()
# Designates the data center location that you want to use
location = "us-central1"
glossary = client.glossary_path(project_id, location, glossary_name)
glossary_config = translate.TranslateTextGlossaryConfig(glossary=glossary)
parent = f"projects/{project_id}/locations/{location}"
result = client.translate_text(
request={
"parent": parent,
"contents": [text],
"mime_type": "text/plain", # mime types: text/plain, text/html
"source_language_code": source_language_code,
"target_language_code": target_language_code,
"glossary_config": glossary_config,
}
)
# Extract translated text from API response
return result.glossary_translations[0].translated_text
# [END translate_hybrid_translate]
# [START translate_hybrid_tts]
def text_to_speech(text: str, outfile: str) -> str:
"""Converts plaintext to SSML and
generates synthetic audio from SSML
Args:
text: text to synthesize
outfile: filename to use to store synthetic audio
Returns:
String of synthesized audio
"""
# Replace special characters with HTML Ampersand Character Codes
# These Codes prevent the API from confusing text with
# SSML commands
# For example, '<' --> '<' and '&' --> '&'
escaped_lines = html.escape(text)
# Convert plaintext to SSML in order to wait two seconds
# between each line in synthetic speech
ssml = "<speak>{}</speak>".format(
escaped_lines.replace("\n", '\n<break time="2s"/>')
)
# Instantiates a client
client = texttospeech.TextToSpeechClient()
# Sets the text input to be synthesized
synthesis_input = texttospeech.SynthesisInput(ssml=ssml)
# Builds the voice request, selects the language code ("en-US") and
# the SSML voice gender ("MALE")
voice = texttospeech.VoiceSelectionParams(
language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.MALE
)
# Selects the type of audio file to return
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
# Performs the text-to-speech request on the text input with the selected
# voice parameters and audio file type
request = texttospeech.SynthesizeSpeechRequest(
input=synthesis_input, voice=voice, audio_config=audio_config
)
response = client.synthesize_speech(request=request)
# Writes the synthetic audio to the output file.
with open(outfile, "wb") as out:
out.write(response.audio_content)
print("Audio content written to file " + outfile)
# [END translate_hybrid_tts]
return outfile
# [START translate_hybrid_integration]
def main() -> None:
"""This method is called when the tutorial is run in the Google Cloud
Translation API. It creates a glossary, translates text to
French, and speaks the translated text.
Args:
None
Returns:
None
"""
# Photo from which to extract text
infile = "resources/example.png"
# Name of file that will hold synthetic speech
outfile = "resources/example.mp3"
# Defines the languages in the glossary
# This list must match the languages in the glossary
# Here, the glossary includes French and English
glossary_langs = ["fr", "en"]
# Name that will be assigned to your project's glossary resource
glossary_name = "bistro-glossary"
# uri of .csv file uploaded to Cloud Storage
glossary_uri = "gs://cloud-samples-data/translation/bistro_glossary.csv"
created_glossary_name = create_glossary(
glossary_langs, PROJECT_ID, glossary_name, glossary_uri
)
# photo -> detected text
text_to_translate = pic_to_text(infile)
# detected text -> translated text
text_to_speak = translate_text(
text_to_translate, "fr", "en", PROJECT_ID, created_glossary_name
)
# translated text -> synthetic audio
text_to_speech(text_to_speak, outfile)
# [END translate_hybrid_integration]
if __name__ == "__main__":
main()