In [None]:
import google.cloud.texttospeech as tts
import os
from tqdm import tqdm
def unique_languages_from_voices(voices):
    language_set = set()
    for voice in voices:
        for language_code in voice.language_codes:
            language_set.add(language_code)
    return language_set

def list_languages():
    client = tts.TextToSpeechClient()
    response = client.list_voices()
    languages = unique_languages_from_voices(response.voices)

    print(f" Languages: {len(languages)} ".center(60, "-"))
    for i, language in enumerate(sorted(languages)):
        print(f"{language:>10}", end="\n" if i % 5 == 4 else "")
        
def list_voices(language_code=None):
    voiceList = []
    client = tts.TextToSpeechClient()
    response = client.list_voices(language_code=language_code)
    voices = sorted(response.voices, key=lambda voice: voice.name)

    #print(f" Voices: {len(voices)} ".center(60, "-"))
    for voice in voices:
        languages = ", ".join(voice.language_codes)
        name = voice.name
        voiceList.append(name)
        gender = tts.SsmlVoiceGender(voice.ssml_gender).name
        rate = voice.natural_sample_rate_hertz
        #print(f"{languages:<8} | {name:<24} | {gender:<8} | {rate:,} Hz")
    return voiceList
        
def text_to_wav(voice_name: str, text: str, outputFileName: str):
    language_code = "-".join(voice_name.split("-")[:2])
    text_input = tts.SynthesisInput(text=text)
    voice_params = tts.VoiceSelectionParams(
        language_code=language_code, name=voice_name
    )
    audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16)

    client = tts.TextToSpeechClient()
    response = client.synthesize_speech(
        input=text_input, voice=voice_params, audio_config=audio_config
    )
    with open(outputFileName, "wb") as out:
        out.write(response.audio_content)
        print(f'Generated speech saved to "{outputFileName}"')

def createFolder(folderLocation):
    try:
        shutil.rmtree(folderLocation)
        os.makedirs(folderLocation)
    except:
        os.makedirs(folderLocation)

In [None]:
################################################# Parameter Setting ###########################################################
#### You can modify the command list you want to synthetically generate here
voiceCommandList = ["Yomi", "Yomi pause", "Yomi free", "Yomi baseline", "Yomi guided", "Yomi tooth 1", "Yomi tooth 2", "Yomi tooth 3", "Yomi tooth 4", 
    "Yomi tooth 5", "Yomi tooth 6", "Yomi tooth 7", "Yomi tooth 8", "Yomi tooth 9", "Yomi tooth 10", "Yomi tooth 11", "Yomi tooth 12", "Yomi tooth 13", 
    "Yomi tooth 14", "Yomi tooth 15", "Yomi tooth 16", "Yomi tooth 17", "Yomi tooth 18", "Yomi tooth 19", "Yomi tooth 20", "Yomi tooth 21", 
    "Yomi tooth 22", "Yomi tooth 23", "Yomi tooth 24", "Yomi tooth 25", "Yomi tooth 26", "Yomi tooth 27", "Yomi tooth 28", "Yomi tooth 29", 
    "Yomi tooth 30", "Yomi tooth 31", "Yomi tooth 32", "Yomi previous page", "Yomi next page", "Yomi previous step", "Yomi next step",
    "Yomi previous bit", "Yomi next bit", "Yomi move buccal", "Yomi move lingual", "Yomi move distal", "Yomi move mesial",
    "Yomi move apical", "Yomi move coronal", "Yomi undo", "Yomi redo"]

####First create the folder yourself, then set the result folder here
resultFolder = "C:/Users/RuichenHe/source/Python/VoiceSyn"    

In [None]:
readerNameList = list_voices("en")
for readerName in tqdm(readerNameList):
    resultLocation = os.path.join(resultFolder, readerName)
    createFolder(resultLocation)
    for currentCommand in voiceCommandList:
        generateSuccessfulFlag = False
        while generateSuccessfulFlag == False:
            try:
                text_to_wav(readerName, currentCommand, os.path.join(resultLocation, currentCommand.replace(" ", "_") + ".wav"))
                generateSuccessfulFlag = True
                break
            except:
                continue