# Text to Speech - Mozilla

### Transform the input sentence into speech.

Original Author: [Mozilla](https://github.com/mozilla)

Provided by: [BreezeWhite](https://github.com/BreezeWhite)

Original Github: https://github.com/mozilla/TTS/

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/BreezeWhite/interesting-colabs/blob/main/scripts/MozillaTTS.ipynb)

In [None]:
#@title Setup

!apt-get install python-espeak
!pip install git+https://github.com/mozilla/TTS

%load_ext autoreload
%autoreload 2


import os


def make_download_url(fid, out_name):
  os.makedirs(os.path.dirname(out_name), exist_ok=True)
  return f'''wget "https://docs.google.com/uc?export=download&confirm=t&id={fid}" \
      -O '{out_name}'
    '''

# TTS checkpoints
!{make_download_url('1NFsfhH8W8AgcfJ-BsL8CYAwQfZ5k4T-n', 'glow-tts/model.pth.tar')}
!{make_download_url('1IAROF3yy9qTK43vG_-R67y3Py9yYbD6t', 'glow-tts/config.json')}
!{make_download_url('1CFoPDQBnhfBFu2Gc0TBSJn8o-TuNKQn7', 'tacotron2-DCA/model.pth.tar')}
!{make_download_url('1lWSscNfKet1zZSJCNirOn7v9bigUZ8C1', 'tacotron2-DCA/config.json')}
!{make_download_url('1qevpGRVHPmzfiRBNuugLMX62x1k7B5vK', 'tacotron2-DCA/scale_stats.npy')}
!{make_download_url('1VXAwiq6N-Viq3rsSXlf43bdoi0jSvMAJ', 'speedy-speech-wn/model.pth.tar')}
!{make_download_url('1KvZilhsNP3EumVggDcD46yd834eO5hR3', 'speedy-speech-wn/config.json')}
!{make_download_url('1Ju7apZ5JlgsVECcETL-GEx3DRoNzWfkR', 'speedy-speech-wn/scale_stats.npy')}

!sed -i 's#/root/LJSpeech-1.1/scale_stats.npy#./tacotron2-DCA/scale_stats.npy#g' tacotron2-DCA/config.json
!sed -i 's#/root/LJSpeech-1.1/scale_stats.npy#./speedy-speech-wn/scale_stats.npy#g' speedy-speech-wn/config.json


# Vocoder checkpoints
!{make_download_url('1r2g90JaZsfCj9dJkI9ioIU6JCFMPRqi6', 'wavegrad/model.pth.tar')}
!{make_download_url('1POrrLf5YEpZyjvWyMccj1nGCVc94mR6s', 'wavegrad/config.json')}
!{make_download_url('1Vwbv4t-N1i3jXqI0bgKAhShAEO097sK0', 'wavegrad/scale_stats.npy')}
!{make_download_url('1Ty5DZdOc0F7OTGj9oJThYbL5iVu_2G0K', 'fullband-melgan/model.pth.tar')}
!{make_download_url('1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu', 'fullband-melgan/config.json')}
!{make_download_url('11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU', 'fullband-melgan/scale_stats.npy')}
!mkdir multiband-melgan
!cp fullband-melgan/* multiband-melgan  # fullband-melgan and multiband-melgan has the same GD file ID.

!sed -i 's#/root/scale_stats_wavegrad.npy#./wavegrad/scale_stats.npy#g' wavegrad/config.json
!sed -i 's#/root/scale_stats_wavegrad.npy#./multiband-melgan/scale_stats.npy#g' multiband-melgan/config.json
!sed -i 's#./scale_stats.npy#./fullband-melgan/scale_stats.npy#g' fullband-melgan/config.json


In [81]:
#@title Run

import tempfile
import subprocess
from pathlib import Path
from IPython.display import display, Audio, HTML, clear_output


text = "A woman is yelling madly and loudly at a innocent cat in front of a table during the dinner in a France restaurant."  #@param {type:"string"}
tts_model = "tacotron2-DCA"  #@param ["glow-tts", "tacotron2-DCA", "speedy-speech-wn"]
vocoder = "fullband-melgan"  #@param ["wavegrad", "fullband-melgan", "multiband-melgan"]

tmp_folder = tempfile.mkdtemp()

!tts \
  --use_cuda True \
  --text "{text}" \
  --model_path {tts_model}/model.pth.tar \
  --config_path {tts_model}/config.json \
  --vocoder_path {vocoder}/model.pth.tar \
  --vocoder_config_path {vocoder}/config.json \
  --out_path {tmp_folder}

out = subprocess.run("echo $?", shell=True, capture_output=True)
if int(out.stdout.decode('utf8').strip()) == 0:
  clear_output()
  audio_path = str(next(Path(tmp_folder).iterdir()))
  display(HTML(f'<h3>{text}</h3>'))
  display(Audio(audio_path))