## Tacotron 2 inference code 
Edit the variables **checkpoint_path** and **text** to match yours and run the entire code to generate plots of mel outputs, alignments and audio synthesis from the generated mel-spectrogram using Griffin-Lim.

#### Import libraries

In [None]:
import matplotlib.pyplot as plt
import IPython.display as ipd

import numpy as np
import torch

from hparams import create_hparams
from train import load_model
from text import text_to_sequence

In [None]:
%matplotlib inline

def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='lower', 
                       interpolation='none')
    return fig, axes

#### Setup hparams

In [None]:
hparams = create_hparams()
hparams["sampling_rate"] = 22050

#### Load model from checkpoint

In [None]:
checkpoint_path = "tacotron2_ljspeech_50k.pt"
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
_ = model.cuda().eval().half()

#### Prepare text input

In [None]:
text = "Hello, my name is Marvin"
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()

#### Decode text input and plot results

In [None]:
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
fig, axes = plot_data((mel_outputs.float().data.cpu().numpy()[0],
           mel_outputs_postnet.float().data.cpu().numpy()[0],
           alignments.float().data.cpu().numpy()[0].T))

# plt.savefig("generated_spectrograms.png")
plt.show()

#### Synthesize audio using Griffin-Lim algorithm

In [None]:
from librosa.feature.inverse import mel_to_audio
from scipy.io.wavfile import write

mel_spectrogram = mel_outputs_postnet.float().data.cpu().numpy()[0]

audio = mel_to_audio(
    M = mel_spectrogram,
    sr = hparams["sampling_rate"],
    # n_fft = hparams["filter_length"],
    hop_length = hparams["hop_length"],
    win_length = hparams["win_length"],
    fmin = hparams["mel_fmin"],
    fmax = hparams["mel_fmax"]
)

ipd.Audio(audio, rate=hparams["sampling_rate"])
# write(filename="generated_audio.wav", data=audio, rate=hparams["sampling_rate"])