In [1]:
# Note, to use this code, please follow the instuctions in 
# https://tts.readthedocs.io/en/latest/installation.html to install the TTS package

import os
import sys
# pylint: disable=redefined-outer-name, unused-argument
from pathlib import Path
import TTS
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Most of this code is modified from TTS/TTS/bin/synthesis.py


# Set up Synth for tts model
def setupttsmodel(modelpath='tts_models/multilingual/multi-dataset/your_tts'):
    path = Path(TTS.tts.__file__).parent / "../.models.json"
    model_path = None
    config_path = None
    speakers_file_path = None
    language_ids_file_path = None
    vocoder_path = None
    vocoder_config_path = None
    encoder_path = None
    encoder_config_path = None
    use_cuda = None
    
    # Load Model Manager
    manager = ModelManager(path)
    
    # Download model 

    model_path, config_path, model_item = manager.download_model('tts_models/multilingual/multi-dataset/your_tts')

    # Set Vocoder Name
    vocoder_name = model_item["default_vocoder"]
    
    return Synthesizer(
        model_path,
        config_path,
        speakers_file_path,
        language_ids_file_path,
        vocoder_path,
        vocoder_config_path,
        encoder_path,
        encoder_config_path,
        use_cuda,
    )





In [3]:
synth = setupttsmodel()

 > tts_models/multilingual/multi-dataset/your_tts is already downloaded.
 > Using model: vits
 > Setting up Audio Processor...
 | > sample_rate:16000
 | > resample:False
 | > num_mels:80
 | > log_func:np.log
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:0.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:45
 | > do_sound_norm:False
 | > do_amp_to_db_linear:False
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:2.718281828459045
 | > hop_length:256
 | > win_length:1024
 > Setting up Audio Processor...
 | > sample_rate:16000
 | > resample:False
 | > num_mels:64
 | > log_func:np.log10
 | > min_level_db:-100
 | > frame_

In [4]:
def usesynth(synth,text="This is a test of the model",speaker_idx=None,lang='en',speaker_wav=None):
    return synth.tts(text,speaker_idx,lang,speaker_wav)

In [5]:
import librosa
from IPython.display import Audio


wav = usesynth(synth,text="He is well-known friends with the social outcast Huckleberry Finn and the neighborhood child Joe Harper. Although never told to anyone but the female herself, Tom also has an infatuation with classmate Rebecca Thatcher. ",speaker_idx=None,lang='en',speaker_wav='TomSawyerTest.wav')

Audio(wav,rate = 16000)


 > Text splitted to sentences.
['He is well-known friends with the social outcast Huckleberry Finn and the neighborhood child Joe Harper.', 'Although never told to anyone but the female herself, Tom also has an infatuation with classmate Rebecca Thatcher.']




 > Processing time: 4.538602352142334
 > Real-time factor: 0.29149661863470355


In [30]:
import pyaudio

In [21]:
text = "What the heck did you just frickin’ say about me, you little whiner? I’ll have you know I graduated top of my class in the Navy Seals, and I’ve been involved in numerous secret raids on Al-Quaeda, and I have over three hundred confirmed kills.I am trained in gorilla warfare and I’m the top sniper in the entire US armed forces. You are nothing to me but just another target. I will wipe you the frick out with precision the likes of which has never been seen before on this Earth, mark my frickin’ words. You think you can get away with saying that crap to me over the Internet? Think again, buddy. As we speak I am contacting my secret network of spies across the USA and your IP is being traced right now so you better prepare for the storm, buddy. The storm that wipes out the pathetic little thing you call your life. You’re frickin dead, kid. I can be anywhere, anytime, and I can kill you in over seven hundred ways, and that’s just with my bare hands.Not only am I extensively trained in unarmed combat, but I have access to the entire arsenal of the United States Marine Corps and I will use it to its full extent to wipe your miserable butt off the face of the continent, you little poopy-head. If only you could have known what unholy retribution your little “clever” comment was about to bring down upon you, maybe you would have held your stinking tongue.But you couldn’t, you didn’t, and now you’re paying the price, you big jerk. I will rain fury all over you and you will drown in it. You’re in frickin’ trouble, mister."


In [9]:

wav = usesynth(synth,text=text,speaker_idx=None,lang='en',speaker_wav='TomSawyerTest.wav')


 > Text splitted to sentences.
['What the heck did you just frickin’ say about me, you little whiner?', 'I’ll have you know I graduated top of my class in the Navy Seals, and I’ve been involved in numerous secret raids on Al-Quaeda, and I have over three hundred confirmed kills.', 'I am trained in gorilla warfare and I’m the top sniper in the entire US armed forces.', 'You are nothing to me but just another target.', 'I will wipe you the frick out with precision the likes of which has never been seen before on this Earth, mark my frickin’ words.', 'You think you can get away with saying that crap to me over the Internet?', 'Think again, buddy.', 'As we speak I am contacting my secret network of spies across the USA and your IP is being traced right now so you better prepare for the storm, buddy.', 'The storm that wipes out the pathetic little thing you call your life.', 'You’re frickin dead, kid.', 'I can be anywhere, anytime, and I can kill you in over seven hundred ways, and that’s j

In [14]:
Audio(wav,rate = 17000)


In [40]:
text = "Battre le fer pendant qu’il est chaud"

In [41]:
wav = usesynth(synth,text=text,speaker_idx=None,lang='fr-fr',speaker_wav='TomSawyerTest.wav')


 > Text splitted to sentences.
['Battre le fer pendant qu’il est chaud']
 > Processing time: 2.619337320327759
 > Real-time factor: 1.341186543946625


In [42]:
Audio(wav,rate = 16000)


In [24]:
synth.save_wav(wav,"FrenchNavySealCopypasta")

In [35]:
synth.tts_model.language_manager.language_id_mapping


{'en': 0, 'fr-fr': 1, 'pt-br': 2}