<a href="https://colab.research.google.com/github/Nikav7/Assignment_7_Veronica/blob/main/Assignment7_Veronica.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**TRAINING A TTS ON LJSPEECH DATASET WITH GLOW-TTS MODEL FROM COQUI-AI FOR ITALIAN**

Repo: https://github.com/coqui-ai/TTS.git

This is a Demo just to look at the code. 

In [None]:
#Mount Drive to save the output from model training

from google.colab import drive

drive.mount("/content/drive")

In [None]:
!pwd

In [None]:
#CLONE THE COQUI TTS REPO, INSTALL THE TTS AND LIST THE AVAILABLE MODELS

!git clone https://github.com/coqui-ai/TTS

In [None]:
%cd /content/TTS

In [None]:
# install espeak backend
!sudo apt-get install espeak-ng

In [None]:
!pip install -r requirements.txt
!python setup.py install

In [None]:
!pip install tts
!tts --list_models

In [None]:
#TEST WITH LJSPEECH/GLOW-TTS MODEL (tts_models/en/ljspeech/glow-tts) AND ITS DEFAULT VOCODER MODEL
!tts --text "Thank you for trying, hope you like this Voice" --model_name "tts_models/en/ljspeech/glow-tts" --out_path /content/audio_test.wav

In [11]:
#GO BACK TO CONTENT BEFORE DOWNLOADING DATASET
%cd /content

/content


In [None]:
#DOWNLOAD LJSPEECH DATASET TO TRAIN GLOW-TTS MODEL ON IT
!wget http://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2

#DECOMPRESS
!tar -xjf LJSpeech-1.1.tar.bz2

In [None]:
#%cd /content/TTS

**GLOW TTS MODEL TRAINING with italian phoneme language**

Note: sometimes it gives an error for the first modul (TTS.tts.configs.glow_tts_config) -- restart the runtime to make it work

IMPORTANT: to try with english, change the phoneme language to "en-us"

In [None]:
#this take a long time but if you want to give it a try, 
#you can change the number of epochs to a lower number

import os

# TrainingArgs: Defines the set of arguments of the Trainer.
from trainer import Trainer, TrainerArgs

# GlowTTSConfig: all model related values for training, validating and testing.
from TTS.tts.configs.glow_tts_config import GlowTTSConfig

# BaseDatasetConfig: defines name, formatter and path of the dataset.
from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.glow_tts import GlowTTS
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor

# we use the same path as this script as our training folder.
output_path ="/content/drive/MyDrive/trainoutput-2" #HERE YOU HAVE TO SPECIFY A FOLDER CREATED ON YOUR DRIVE

# DEFINE DATASET CONFIG
# Set LJSpeech as our target dataset and define its path

dataset_config = BaseDatasetConfig(
    formatter="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "/content/LJSpeech-1.1/")
)

# INITIALIZE THE TRAINING CONFIGURATION
# Configure the model. Every config class inherits the BaseTTSConfig.
config = GlowTTSConfig(
    batch_size=32,
    eval_batch_size=16,
    num_loader_workers=4,
    num_eval_loader_workers=4,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=1000,
    text_cleaner="phoneme_cleaners",
    use_phonemes=True,
    phoneme_language="it-it",
    phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
    print_step=25,
    print_eval=False,
    mixed_precision=True,
    output_path=output_path,
    datasets=[dataset_config],
)

# INITIALIZE THE AUDIO PROCESSOR
ap = AudioProcessor.init_from_config(config)

# INITIALIZE THE TOKENIZER
tokenizer, config = TTSTokenizer.init_from_config(config)

# LOAD DATA SAMPLES
train_samples, eval_samples = load_tts_samples(
    dataset_config,
    eval_split=True,
    eval_split_max_size=config.eval_split_max_size,
    eval_split_size=config.eval_split_size,
)

# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
model = GlowTTS(config, ap, tokenizer, speaker_manager=None)

# INITIALIZE THE TRAINER
trainer = Trainer(
    TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)

trainer.fit()

In [None]:
!tts-server --list_models