In [1]:
!git clone https://github.com/NVIDIA/tacotron2.git

Cloning into 'tacotron2'...
remote: Enumerating objects: 403, done.[K
remote: Total 403 (delta 0), reused 0 (delta 0), pack-reused 403[K
Receiving objects: 100% (403/403), 2.69 MiB | 4.76 MiB/s, done.
Resolving deltas: 100% (204/204), done.


In [3]:
%%bash
cd tacotron2
git submodule init; git submodule update

Couldn't find program: 'bash'


In [4]:
!pip install gdown

Collecting gdown
  Downloading gdown-3.12.2.tar.gz (8.2 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Building wheels for collected packages: gdown
  Building wheel for gdown (PEP 517): started
  Building wheel for gdown (PEP 517): finished with status 'done'
  Created wheel for gdown: filename=gdown-3.12.2-py3-none-any.whl size=9684 sha256=24a2fc3fa590d218c268363a00863ed70b07e7f998c589d12b68aaf798f90985
  Stored in directory: c:\users\sebbi\appdata\local\pip\cache\wheels\e2\62\1e\926d1ebe7b1e733c78d627fd288d01b83feaf67efc06e0e4c3
Successfully built gdown
Installing collected packages: gdown
Successfully installed gdown-3.12.2


In [1]:
import os
datadir='/datadir/tacotron2' # CHANGE THIS
os.chdir('/datadir/tacotron2')# CHANGE THIS
outdir = datadir + '/outdir'
logsdirtemp = outdir + '/logs'

In [13]:
!sed -i -- 's,DUMMY,wavs,g' filelists/*.txt

In [2]:
#let's see if we've got the right content inside
import itertools
with open(datadir + "/filelists/filelist_train.txt") as f:
           lines=list(itertools.islice(f,5,10,1))
           print(lines)

NameError: name 'datadir' is not defined

In [12]:
%%bash
pip install  numpy==1.17.2

Collecting numpy==1.17.2
  Downloading numpy-1.17.2-cp36-cp36m-manylinux1_x86_64.whl (20.4 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.17.0
    Uninstalling numpy-1.17.0:
      Successfully uninstalled numpy-1.17.0
Successfully installed numpy-1.17.2


In [2]:
import os
os.chdir('/datadir/tacotron2') # you might need/want to change this
import time
import argparse
import math
from numpy import finfo

import torch
import random
import matplotlib.pylab as plt

from distributed import apply_gradient_allreduce
import torch.distributed as dist
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader

from model import Tacotron2
from data_utils import TextMelLoader, TextMelCollate
from loss_function import Tacotron2Loss
from logger import Tacotron2Logger
from hparams import create_hparams

def reduce_tensor(tensor, n_gpus):
    rt = tensor.clone()
    dist.all_reduce(rt, op=dist.reduce_op.SUM)
    rt /= n_gpus
    return rt

def init_distributed(hparams, n_gpus, rank, group_name):
    assert torch.cuda.is_available(), "Distributed mode requires CUDA."
    print("Initializing Distributed")

    # Set cuda device so everything is done on the right GPU.
    torch.cuda.set_device(rank % torch.cuda.device_count())

    # Initialize distributed communication
    dist.init_process_group(
        backend=hparams.dist_backend, init_method=hparams.dist_url,
        world_size=n_gpus, rank=rank, group_name=group_name)

    print("Done initializing distributed")
    
    
def prepare_dataloaders(hparams):
    # Get data, data loaders and collate function ready
    trainset = TextMelLoader(hparams.training_files, hparams)
    valset = TextMelLoader(hparams.validation_files, hparams)
    collate_fn = TextMelCollate(hparams.n_frames_per_step)

    if hparams.distributed_run:
        train_sampler = DistributedSampler(trainset)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True

    train_loader = DataLoader(trainset, num_workers=1, shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=hparams.batch_size, pin_memory=False,
                              drop_last=True, collate_fn=collate_fn)
    return train_loader, valset, collate_fn


def prepare_directories_and_logger(output_directory, log_directory, rank):
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        logger = Tacotron2Logger(os.path.join(output_directory, log_directory))
    else:
        logger = None
    return logger

def load_model(hparams):
    model = Tacotron2(hparams).cuda()
    if hparams.fp16_run:
        model.decoder.attention_layer.score_mask_value = finfo('float16').min

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model


def warm_start_model(checkpoint_path, model, ignore_layers):
    assert os.path.isfile(checkpoint_path)
    print("Warm starting model from checkpoint '{}'".format(checkpoint_path))
    checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
    model_dict = checkpoint_dict['state_dict']
    if len(ignore_layers) > 0:
        model_dict = {k: v for k, v in model_dict.items()
                      if k not in ignore_layers}
        dummy_dict = model.state_dict()
        dummy_dict.update(model_dict)
        model_dict = dummy_dict
    model.load_state_dict(model_dict)
    return model


def load_checkpoint(checkpoint_path, model, optimizer):
    assert os.path.isfile(checkpoint_path)
    print("Loading checkpoint '{}'".format(checkpoint_path))
    checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
    model.load_state_dict(checkpoint_dict['state_dict'])
    optimizer.load_state_dict(checkpoint_dict['optimizer'])
    learning_rate = checkpoint_dict['learning_rate']
    iteration = checkpoint_dict['iteration']
    print("Loaded checkpoint '{}' from iteration {}" .format(
        checkpoint_path, iteration))
    return model, optimizer, learning_rate, iteration


def save_checkpoint(model, optimizer, learning_rate, iteration, filepath):
    print("Saving model and optimizer state at iteration {} to {}".format(
        iteration, filepath))
    torch.save({'iteration': iteration,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'learning_rate': learning_rate}, filepath)
    
    
def validate(model, criterion, valset, iteration, batch_size, n_gpus,
             collate_fn, logger, distributed_run, rank):
    """Handles all the validation scoring and printing"""
    model.eval()
    with torch.no_grad():
        val_sampler = DistributedSampler(valset) if distributed_run else None
        val_loader = DataLoader(valset, sampler=val_sampler, num_workers=1,
                                shuffle=False, batch_size=batch_size,
                                pin_memory=False, collate_fn=collate_fn)

        val_loss = 0.0
        for i, batch in enumerate(val_loader):
            x, y = model.parse_batch(batch)
            y_pred = model(x)
            loss = criterion(y_pred, y)
            if distributed_run:
                reduced_val_loss = reduce_tensor(loss.data, n_gpus).item()
            else:
                reduced_val_loss = loss.item()
            val_loss += reduced_val_loss
        val_loss = val_loss / (i + 1)

    model.train()
    if rank == 0:
        print("Validation loss {}: {:9f}  ".format(iteration, val_loss))
        logger.log_validation(val_loss, model, y, y_pred, iteration)
        %matplotlib inline
        _, mel_outputs, gate_outputs, alignments = y_pred
        idx = random.randint(0, alignments.size(0) - 1)
        plot_alignment(alignments[idx].data.cpu().numpy().T)
        
def plot_alignment(alignment, info=None):
    %matplotlib inline
    fig, ax = plt.subplots(figsize=(int(alignment_graph_width/100), int(alignment_graph_height/100)))
    im = ax.imshow(alignment, cmap='inferno', aspect='auto', origin='lower',
                   interpolation='none')
    ax.autoscale(enable=True, axis="y", tight=True)
    fig.colorbar(im, ax=ax)
    xlabel = 'Decoder timestep'
    if info is not None:
        xlabel += '\n\n' + info
    plt.xlabel(xlabel)
    plt.ylabel('Encoder timestep')
    plt.tight_layout()
    fig.canvas.draw()
    plt.show()        
def train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus,
          rank, group_name, hparams):
    """Training and validation logging results to tensorboard and stdout

    Params
    ------
    output_directory (string): directory to save checkpoints
    log_directory (string) directory to save tensorboard logs
    checkpoint_path(string): checkpoint path
    n_gpus (int): number of gpus
    rank (int): rank of current gpu
    hparams (object): comma separated list of "name=value" pairs.
    """
    if hparams.distributed_run:
        init_distributed(hparams, n_gpus, rank, group_name)

    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)

    model = load_model(hparams)
    learning_rate = hparams.learning_rate
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
                                 weight_decay=hparams.weight_decay)

    if hparams.fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(
            model, optimizer, opt_level='O2')

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    criterion = Tacotron2Loss()

    logger = prepare_directories_and_logger(
        output_directory, log_directory, rank)

    train_loader, valset, collate_fn = prepare_dataloaders(hparams)

    # Load checkpoint if one exists
    iteration = 0
    epoch_offset = 0
    if checkpoint_path is not None:
        if warm_start:
            model = warm_start_model(
                checkpoint_path, model, hparams.ignore_layers)
        else:
            model, optimizer, _learning_rate, iteration = load_checkpoint(
                checkpoint_path, model, optimizer)
            if hparams.use_saved_learning_rate:
                learning_rate = _learning_rate
            iteration += 1  # next iteration is iteration + 1
            epoch_offset = max(0, int(iteration / len(train_loader)))

    model.train()
    is_overflow = False
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, hparams.epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            start = time.perf_counter()
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

            model.zero_grad()
            x, y = model.parse_batch(batch)
            y_pred = model(x)

            loss = criterion(y_pred, y)
            if hparams.distributed_run:
                reduced_loss = reduce_tensor(loss.data, n_gpus).item()
            else:
                reduced_loss = loss.item()
            if hparams.fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            if hparams.fp16_run:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    amp.master_params(optimizer), hparams.grad_clip_thresh)
                is_overflow = math.isnan(grad_norm)
            else:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hparams.grad_clip_thresh)

            optimizer.step()

            if not is_overflow and rank == 0:
                duration = time.perf_counter() - start
                print("Train loss {} {:.6f} Grad Norm {:.6f} {:.2f}s/it".format(
                    iteration, reduced_loss, grad_norm, duration))
                logger.log_training(
                    reduced_loss, grad_norm, learning_rate, duration, iteration)

            if not is_overflow and (iteration % hparams.iters_per_checkpoint == 0):
                validate(model, criterion, valset, iteration,
                         hparams.batch_size, n_gpus, collate_fn, logger,
                         hparams.distributed_run, rank)
                if rank == 0:
                    checkpoint_path = os.path.join(
                        output_directory, "checkpoint_{}".format(iteration))
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
            
            
alignment_graph_height = 600
alignment_graph_width = 1000

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/opt/conda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/conda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/opt/conda/lib/python3.6/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()
  File "/opt/conda/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 149, in start
    self.asyncio_loop.run_fore

In [4]:
from text import symbols
warm_start=True
n_gpus=1
rank=0
group_name=None
hparams = create_hparams()
hparams.epochs=5000
hparams.iters_per_checkpoint=500
hparams.seed=1234
hparams.dynamic_loss_scaling=True
hparams.fp16_run=False
hparams.distributed_run=False
hparams.dist_backend="nccl"
hparams.dist_url="tcp://localhost:54321"
hparams.cudnn_enabled=True
hparams.cudnn_benchmark=False
# hparams.ignore_layers=[]
hparams.ignore_layers=['embedding.weight']
        ################################
        # Data Parameters             #
        ################################
hparams.load_mel_from_disk=False
hparams.training_files = "filelists/filelist_train.txt"  # CHANGE THIS
hparams.validation_files = "filelists/filelist_validation.txt" # CHANGE THIS
hparams.text_cleaners=['english_cleaners'] # CHANGE this if training on non-english dataset (btw. this is more nuanced and involved, training on non english datasets requires more changes, for more information refer to the issues in NVIDIA/tacotron2 github.)

        ################################
        # Audio Parameters             #
        ################################
hparams.max_wav_value=32768.0
hparams.sampling_rate=22050
hparams.filter_length=1024
hparams.hop_length=256
hparams.win_length=1024
hparams.n_mel_channels=80
hparams.mel_fmin=0.0
hparams.mel_fmax=8000.0

        ################################
        # Model Parameters             #
        ################################
hparams.n_symbols=len(symbols)
hparams.symbols_embedding_dim=512

        # Encoder parameters
hparams.encoder_kernel_size=5
hparams.encoder_n_convolutions=3
hparams.encoder_embedding_dim=512

        # Decoder parameters
hparams.n_frames_per_step=1  # currently only 1 is supported
hparams.decoder_rnn_dim=1024
hparams.prenet_dim=256
hparams.max_decoder_steps=1000
hparams.gate_threshold=0.5
hparams.p_attention_dropout=0.1
hparams.p_decoder_dropout=0.1

        # Attention parameters
hparams.attention_rnn_dim=1024
hparams.attention_dim=128

        # Location Layer parameters
hparams.attention_location_n_filters=32
hparams.attention_location_kernel_size=31

        # Mel-post processing network parameters
hparams.postnet_embedding_dim=512
hparams.postnet_kernel_size=5
hparams.postnet_n_convolutions=5

        ################################
        # Optimization Hyperparameters #
        ################################
hparams.use_saved_learning_rate=False
hparams.learning_rate=1e-3
hparams.weight_decay=1e-6
hparams.grad_clip_thresh=1.0
hparams.batch_size=32 # CHANGE THIS, you might need to change this depending on your GPU model
hparams.mask_padding=True  # set model's padded outputs to padded values



In [28]:
import scipy.io.wavfile as wav
(sig, rate) = wav.read("PLACEHOLDER") # CHANGE THIS, replace with path to one of your downsampled wavs to confirm everything looks good
print(sig) # must be 22050

22050.0
22050
15699
15699


In [1]:
torch.backends.cudnn.enabled = hparams.cudnn_enabled
torch.backends.cudnn.benchmark = hparams.cudnn_benchmark

print("FP16 Run:", hparams.fp16_run)
print("Dynamic Loss Scaling:", hparams.dynamic_loss_scaling)
print("Distributed Run:", hparams.distributed_run)
print("cuDNN Enabled:", hparams.cudnn_enabled)
print("cuDNN Benchmark:", hparams.cudnn_benchmark)

output_directory = outdir # Location to save Checkpoints
log_directory = outdir + '/logs' # Location to save Log files locally
model_filename = "nameless_hero_model"
checkpoint_path = output_directory+(r'/')+model_filename
warm_start = True
n_gpus = 1
rank = 0
group_name = None

train(output_directory, log_directory, checkpoint_path,
      warm_start, n_gpus, rank, group_name, hparams)

NameError: name 'hparams' is not defined