In [16]:
from typing import Sequence

import google.cloud.texttospeech as tts
base_dir = "data/Google_tts"

def unique_languages_from_voices(voices: Sequence[tts.Voice]):
    language_set = set()
    for voice in voices:
        for language_code in voice.language_codes:
            language_set.add(language_code)
    return language_set


def list_languages():
    client = tts.TextToSpeechClient()
    response = client.list_voices()
    languages = unique_languages_from_voices(response.voices)

    print(f" Languages: {len(languages)} ".center(60, "-"))
    for i, language in enumerate(sorted(languages)):
        print(f"{language:>10}", end="\n" if i % 5 == 4 else "")


In [17]:
list_languages()


---------------------- Languages: 57 -----------------------
     af-ZA     ar-XA     bg-BG     bn-IN     ca-ES
    cmn-CN    cmn-TW     cs-CZ     da-DK     de-DE
     el-GR     en-AU     en-GB     en-IN     en-US
     es-ES     es-US     eu-ES     fi-FI    fil-PH
     fr-CA     fr-FR     gl-ES     gu-IN     he-IL
     hi-IN     hu-HU     id-ID     is-IS     it-IT
     ja-JP     kn-IN     ko-KR     lt-LT     lv-LV
     ml-IN     mr-IN     ms-MY     nb-NO     nl-BE
     nl-NL     pa-IN     pl-PL     pt-BR     pt-PT
     ro-RO     ru-RU     sk-SK     sr-RS     sv-SE
     ta-IN     te-IN     th-TH     tr-TR     uk-UA
     vi-VN    yue-HK

In [18]:
import google.cloud.texttospeech as tts


def list_voices(language_code=None):
    client = tts.TextToSpeechClient()
    response = client.list_voices(language_code=language_code)
    voices = sorted(response.voices, key=lambda voice: voice.name)

    print(f" Voices: {len(voices)} ".center(60, "-"))
    for voice in voices:
        languages = ", ".join(voice.language_codes)
        name = voice.name
        gender = tts.SsmlVoiceGender(voice.ssml_gender).name
        rate = voice.natural_sample_rate_hertz
        print(f"{languages:<8} | {name:<24} | {gender:<8} | {rate:,} Hz")


In [19]:
list_voices("en")


----------------------- Voices: 109 ------------------------
en-AU    | en-AU-Neural2-A          | FEMALE   | 24,000 Hz
en-AU    | en-AU-Neural2-B          | MALE     | 24,000 Hz
en-AU    | en-AU-Neural2-C          | FEMALE   | 24,000 Hz
en-AU    | en-AU-Neural2-D          | MALE     | 24,000 Hz
en-AU    | en-AU-News-E             | FEMALE   | 24,000 Hz
en-AU    | en-AU-News-F             | FEMALE   | 24,000 Hz
en-AU    | en-AU-News-G             | MALE     | 24,000 Hz
en-AU    | en-AU-Polyglot-1         | MALE     | 24,000 Hz
en-AU    | en-AU-Standard-A         | FEMALE   | 24,000 Hz
en-AU    | en-AU-Standard-A         | FEMALE   | 24,000 Hz
en-AU    | en-AU-Standard-B         | MALE     | 24,000 Hz
en-AU    | en-AU-Standard-B         | MALE     | 24,000 Hz
en-AU    | en-AU-Standard-C         | FEMALE   | 24,000 Hz
en-AU    | en-AU-Standard-C         | FEMALE   | 24,000 Hz
en-AU    | en-AU-Standard-D         | MALE     | 24,000 Hz
en-AU    | en-AU-Standard-D         | MALE     | 24,00

## 6. Synthesize audio from text

In [20]:
def text_to_wav(voice_name: str, text: str, out_file = None):
    language_code = "-".join(voice_name.split("-")[:2])
    text_input = tts.SynthesisInput(text=text)
    voice_params = tts.VoiceSelectionParams(
        language_code=language_code, name=voice_name
    )
    audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16)

    client = tts.TextToSpeechClient()
    response = client.synthesize_speech(
        input=text_input,
        voice=voice_params,
        audio_config=audio_config,
    )
    if out_file is None:
        filename = f"{base_dir}/{voice_name}.wav"
    else:
        filename = f"{base_dir}/{out_file}.wav"
    with open(filename, "wb") as out:
        out.write(response.audio_content)
        print(f'Generated speech saved to "{filename}"')


In [22]:
output_file = "ask_the_whether"
text_to_wav("en-US-Studio-O", "What make Vietnam the idea place for visit",output_file)


Generated speech saved to "data/Google_tts/ask_the_whether.wav"
