<a href="https://colab.research.google.com/github/Prim9000/Thai_TTS/blob/main/Train_TTS_Github.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Extracting the Dataset

In [1]:
import os
import shutil
def download():
    url = "https://github.com/korakot/corpus/releases/download/v1.0/AIFORTHAI-TSync2Corpus.zip"
    print("NECTEC licensed TSync2 under CC-BY-NC-SA")
    print("Start downloading: .. ")
    os.system(f"wget {url}")
    os.system("unzip AIFORTHAI-TSync2Corpus.zip")
    os.system("rm AIFORTHAI-TSync2Corpus.zip")
    shutil.move('/content/TSync2/wav','/content/wav')
    shutil.move('/content/TSync2/wrd_ph','/content/wrd_ph')
    os.system("rm /content/TSync2")
    print("Finished")

In [2]:
download()

NECTEC licensed TSync2 under CC-BY-NC-SA
Start downloading: .. 
Finished


In [3]:
path = "/content/drive/MyDrive/TSync2/TSync2.zip"

In [4]:
!unzip /content/drive/MyDrive/TSync2/TSync2.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: TSync2/wrd_ph/tsync2_noon_28_4129.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_72_9141.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_32_7494.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_70_7815_2.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_50_6507.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_41_4340.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_29_1209.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_29_145.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_25_1948.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_63_6187.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_74_759_1.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_77_4600.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_39_3351.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_30_5149.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_52_1728.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_19_1613_2.txt  
  inflating: TSync2/wrd_ph/tsync2_noon_99_394.txt  
  inflating: TSync2/wrd_ph/tsyn

# Trim silence sampling rate = 220505

In [5]:
import matplotlib.pyplot as plt
import os
import librosa
import shutil
import soundfile as sf
from tqdm.auto import tqdm

In [6]:
def trim(directory,filename,sr=22050, threshold=20):
  new_filename = "{}.wav".format(filename[:-4])
  signal, sr = librosa.load(os.path.join(directory,filename), sr=sr)
  trimed, index = librosa.effects.trim(signal, top_db=threshold)
  sf.write(os.path.join(directory, new_filename), trimed, samplerate=sr)
  shutil.move(os.path.join(directory, new_filename) , os.path.join('/content/wav', new_filename))

In [None]:
source = '/content/wav/'
for root, dirnames, filenames in os.walk(source):
    for filename in filenames:
      try:
        trim(source,filename)
      except:
        pass

# Tacotron2

In [None]:
%cd /content/

In [None]:
%tensorflow_version 1.x
import os
from os.path import exists, join, basename, splitext
git_repo_url = 'https://github.com/Prim9000/tacotron2.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # clone and install
  !git clone -q --recursive {git_repo_url}
  !cd {project_name}/waveglow && git checkout 9168aea
  !pip install -q librosa unidecode
  
import sys
sys.path.append(join(project_name, 'waveglow/'))
sys.path.append(project_name)
import time
import matplotlib
import matplotlib.pylab as plt
plt.rcParams["axes.grid"] = False

In [None]:
def download_from_google_drive(file_id, file_name):
  # download a file from the Google Drive link
  !rm -f ./cookie
  !curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id={file_id}" > /dev/null
  confirm_text = !awk '/download/ {print $NF}' ./cookie
  confirm_text = confirm_text[0]
  !curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm={confirm_text}&id={file_id}" -o {file_name}

tacotron2_pretrained_model = 'tacotron2_statedict.pt'
if not exists(tacotron2_pretrained_model):
  # download the Tacotron2 pretrained model
  download_from_google_drive('1c5ZTuT7J08wLUoVZ2KkUs_VdZuJ86ZqA', tacotron2_pretrained_model)
waveglow_pretrained_model = 'waveglow_old.pt'
if not exists(waveglow_pretrained_model):
  # download the Waveglow pretrained model  
  download_from_google_drive('1WsibBTsuRg_SF2Z6L6NFRTT-NjEy1oTx', waveglow_pretrained_model)

In [None]:
import IPython.display as ipd
import numpy as np
import torch

from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT
from audio_processing import griffin_lim
from text import text_to_sequence
from denoiser import Denoiser

def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom', 
                       interpolation='none', cmap='viridis')

torch.set_grad_enabled(False)
        
# initialize Tacotron2 with the pretrained model
hparams = create_hparams()
hparams.sampling_rate = 22050
model = Tacotron2(hparams)
model.load_state_dict(torch.load(tacotron2_pretrained_model)['state_dict'])
_ = model.cuda().eval()#.half()

# initialize Waveglow with the pretrained model
# waveglow = torch.load(waveglow_pretrained_model)['model']
# WORKAROUND for: https://github.com/NVIDIA/tacotron2/issues/182
import json
from glow import WaveGlow
waveglow_config = json.load(open('%s/waveglow/config.json' % project_name))['waveglow_config']
waveglow = WaveGlow(**waveglow_config)
waveglow.load_state_dict(torch.load(waveglow_pretrained_model)['model'].state_dict())
_ = waveglow.cuda().eval()#.half()
for k in waveglow.convinv:
    k.float()
denoiser = Denoiser(waveglow)

In [None]:
%cd /content/tacotron2

In [None]:
shutil.move('/content/tacotron2_statedict.pt','/content/tacotron2/tacotron2_statedict.pt')

In [None]:
from scipy.io import wavfile
samplerate, data = wavfile.read('/content/wav/tsync2_noon_0_1228.wav')

In [None]:
!python train.py --output_directory=outdir --log_directory=logdir -c tacotron2_statedict.pt --warm_start