In [1]:
"""Synthesizes speech from the input string of text or ssml.
Make sure to be working in a virtual environment.

Note: ssml must be well-formed according to:
    https://www.w3.org/TR/speech-synthesis/
"""
from google.cloud import texttospeech

In [2]:
# Instantiates a client
client = texttospeech.TextToSpeechClient()

In [3]:
# Set the text input to be synthesized
synthesis_input = texttospeech.SynthesisInput(text="Hello world, hello world, hello world, hello world!")

In [4]:
# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
voice = texttospeech.VoiceSelectionParams(
    language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
)

In [33]:
# Select the type of audio file you want returned
audio_config = texttospeech.AudioConfig(
    # audio_encoding=texttospeech.AudioEncoding.MP3,
    # audio_encoding=texttospeech.AudioEncoding.OGG_OPUS,
    # audio_encoding=texttospeech.AudioEncoding.MULAW,
    # audio_encoding=texttospeech.AudioEncoding.ALAW,
    audio_encoding=texttospeech.AudioEncoding.LINEAR16,
)

In [14]:
print(list(map(lambda c: c.name, texttospeech.AudioEncoding)))

['AUDIO_ENCODING_UNSPECIFIED', 'LINEAR16', 'MP3', 'OGG_OPUS', 'MULAW', 'ALAW']


In [34]:
# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = client.synthesize_speech(
    input=synthesis_input, voice=voice, audio_config=audio_config
)

In [35]:
# The response's audio_content is binary.
with open("output.wav", "wb") as out:
    # Write the response to the output file.
    out.write(response.audio_content)
    print('Audio content written to file "output.wav"')

Audio content written to file "output.wav"
