## Tacotron 2 inference code
Edit the variables **checkpoint_path** and **text** to match yours and run the entire code to generate plots of mel outputs, alignments and audio synthesis from the generated mel-spectrogram using Griffin-Lim.

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

#### Import libraries and setup matplotlib

In [None]:
import matplotlib
%matplotlib inline
import matplotlib.pylab as plt

import IPython.display as ipd

import sys
sys.path.append('waveglow/')
import numpy as np
import torch

from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT, STFT
from audio_processing import griffin_lim
from train import load_model
from text import text_to_sequence
from denoiser import Denoiser

In [None]:
# !pip install hparams
# !pip install tacotron2
# !pip install Unidecode

In [None]:
def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom',
                       interpolation='none')

#### Setup hparams

In [None]:
hparams = create_hparams()
hparams.sampling_rate = 22050

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

#### Load model from checkpoint

In [None]:

checkpoint_path = "/content/drive/MyDrive/tacotron2_statedict.pt"
# model = load_model(hparams)
# model.load_state_dict(torch.load(checkpoint_path,map_location=torch.device('cpu'))['state_dict'])
# _ = model.cuda().eval()

# checkpoint_path = "/content/drive/MyDrive/tacotron2_statedict.pt"
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
_ = model.eval()

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# !nvidia-smi
# !pip install glow

#### Load WaveGlow for mel2audio synthesis and denoiser

In [None]:
# import torch
# waveglow_path = '/content/drive/MyDrive/waveglow_256channels_universal_v5 .pt'
# waveglow = torch.load(waveglow_path)['model']
# waveglow = torch.load(waveglow_path, map_location=torch.device('cpu'))['model']
# # waveglow = torch.load(waveglow_path, map_location=torch.device('cpu'))['model']
# waveglow.cuda().eval().half()
# for k in waveglow.convinv:
#     k.float()
# denoiser = Denoiser(waveglow)

import glow

waveglow_path = '/content/drive/MyDrive/waveglow_256channels_universal_v5 .pt'
waveglow = torch.load(waveglow_path )['model']
waveglow.cuda().eval().half()
for k in waveglow.convinv:
    k.float()
denoiser = Denoiser(waveglow)


In [None]:
# !pip install glow
# !pip install torch
# !pip install torch torchvision
# !pip install glow
# !pip show glow




#### Prepare text input

In [None]:
text = "Viluppuram GNU/Linux Users Group (VGLUG) is a non profit organization started in 2013, Impression of worldwide Free and Open Source Software movement."
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()

#### Decode text input and plot results

In [None]:
# mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)

# plot_data((mel_outputs.float().data.cpu().numpy()[0],
#            mel_outputs_postnet.float().data.cpu().numpy()[0],
#            alignments.float().data.cpu().numpy()[0].T))

mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
def plot_data(data, origin='upper'):
  plot_data((mel_outputs.float().data.cpu().numpy()[0],
           mel_outputs_postnet.float().data.cpu().numpy()[0],
           alignments.float().data.cpu().numpy()[0].T))






#### Synthesize audio from spectrogram using WaveGlow

In [None]:
# with torch.no_grad():
#   audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
#   ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

# Cast the input data to half-precision
mel_outputs_postnet = mel_outputs_postnet.half()

# Perform inference with the half-precision input data
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
    ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

#### (Optional) Remove WaveGlow bias

In [None]:
audio_denoised = denoiser(audio, strength=0.01)[:, 0]
ipd.Audio(audio_denoised.cpu().numpy(), rate=hparams.sampling_rate)