In [None]:
from typing import Sequence
import os

import google.cloud.texttospeech as tts

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./files/gcloud_key.json"


def unique_languages_from_voices(voices: Sequence[tts.Voice]):
    language_set = set()
    for voice in voices:
        for language_code in voice.language_codes:
            language_set.add(language_code)
    return language_set


def list_languages():
    client = tts.TextToSpeechClient()
    response = client.list_voices()
    languages = unique_languages_from_voices(response.voices)

    print(f" Languages: {len(languages)} ".center(60, "-"))
    for i, language in enumerate(sorted(languages)):
        print(f"{language:>10}", end="\n" if i % 5 == 4 else "")
        

list_languages()

In [None]:
def synthesize_ssml(ssml_text, voice, audio_config):
    client = tts.TextToSpeechClient()
    
    input_text = tts.SynthesisInput(ssml=ssml_text)
    
    response = client.synthesize_speech(
        input=input_text,
        voice=voice,
        audio_config=audio_config,
    )
    
    return response, input_text

de_voice = tts.VoiceSelectionParams(
    language_code="de-DE", # "de-DE",
    name= "de-DE-Wavenet-F",   # "de-DE-Standard-F", #"de-DE-Wavenet-B",
    ssml_gender=tts.SsmlVoiceGender.FEMALE,
)

mp3_audio_config = tts.AudioConfig(
    audio_encoding=tts.AudioEncoding.MP3
)


text = '<prosody rate="medium" pitch="high">Guten Morgen, Keesekuchenbaum! Hoffentlich hast Du Dein Frühstück schon gegessen, denn hier wird nur gespielt! Viel Spaß!</prosody>'
text2 = '<speak>Guten Morgen, Keesekuchenbaum! Hoffentlich hast Du Dein Frühstück schon gegessen, denn hier wird nur gespielt! Viel Spaß!</speak>'


filename = "test2.mp3"
response, input_text = synthesize_ssml(text2, de_voice, mp3_audio_config)

with open(filename, "wb") as out:
    out.write(response.audio_content)
    print("Audio content written to " + filename)