In [84]:
from data_utils_2 import TextMelLoader, TextMelCollate
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader

In [85]:

hparams = {
    'training_files':'filelists/5_wavs.txt',
    'validation_files':'filelists/5_wavs.txt',
    'text_cleaners': ['english_cleaners'],
    'max_wav_value': 32768.0,
    'sampling_rate': 22050,
    'load_mel_from_disk': False,
    'filter_length': 1024,
    'hop_length': 256,
    'win_length': 1024,
    'n_mel_channels': 80,
    'mel_fmin': 0.0,
    'mel_fmax': 8000.0,
    'seed': 20,
    'distributed_run':False,
     ################################
        # Optimization Hyperparameters #
        ################################
    'use_saved_learning_rate':False,
    'learning_rate':1e-3,
    'weight_decay':1e-6,
    'grad_clip_thresh':1.0,
    'batch_size':64,
    'mask_padding':True  # set model's padded outputs to padded values
}


def prepare_dataloaders(hparams):
    # Get data, data loaders and collate function ready
    trainset = TextMelLoader(hparams['training_files'], hparams['text_cleaners'],
                         hparams['max_wav_value'], hparams['sampling_rate'],
                         hparams['load_mel_from_disk'], hparams['filter_length'],
                         hparams['hop_length'], hparams['win_length'],
                         hparams['n_mel_channels'], hparams['mel_fmin'],
                         hparams['mel_fmax'], hparams['seed'])
    valset = TextMelLoader( hparams['validation_files'], hparams['text_cleaners'],
                         hparams['max_wav_value'], hparams['sampling_rate'],
                         hparams['load_mel_from_disk'], hparams['filter_length'],
                         hparams['hop_length'], hparams['win_length'],
                         hparams['n_mel_channels'], hparams['mel_fmin'],
                         hparams['mel_fmax'], hparams['seed'])
    collate_fn = TextMelCollate(n_frames_per_step=1)

    if hparams['distributed_run']:
        train_sampler = DistributedSampler(trainset)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True

    train_loader = DataLoader(trainset, num_workers=1, shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=hparams['batch_size'], pin_memory=False,
                              drop_last=True, collate_fn=collate_fn)
    return train_loader, valset, collate_fn

In [86]:
t,v,c =prepare_dataloaders(hparams)
v.get_mel_text_pair(v.audiopaths_and_text[0])

(tensor([57, 45, 42, 11, 46, 51, 59, 42, 51, 57, 46, 52, 51, 11, 52, 43, 11, 50,
         52, 59, 38, 39, 49, 42, 11, 50, 42, 57, 38, 49, 11, 49, 42, 57, 57, 42,
         55, 56, 11, 46, 51, 11, 57, 45, 42, 11, 50, 46, 41, 41, 49, 42, 11, 52,
         43, 11, 57, 45, 42, 11, 43, 46, 43, 57, 42, 42, 51, 57, 45, 11, 40, 42,
         51, 57, 58, 55, 62, 11, 50, 38, 62, 11, 47, 58, 56, 57, 49, 62, 11, 39,
         42, 11, 40, 52, 51, 56, 46, 41, 42, 55, 42, 41, 11, 38, 56, 11, 57, 45,
         42, 11, 46, 51, 59, 42, 51, 57, 46, 52, 51, 11, 52, 43, 11, 57, 45, 42,
         11, 38, 55, 57, 11, 52, 43, 11, 53, 55, 46, 51, 57, 46, 51, 44,  7],
        dtype=torch.int32),
 tensor([[-9.1835, -6.4719, -6.0837,  ..., -7.4548, -7.7138, -7.3676],
         [-6.7287, -6.1699, -6.1468,  ..., -6.8450, -7.2991, -6.9986],
         [-5.8363, -5.4046, -5.2813,  ..., -6.3080, -6.4373, -6.1320],
         ...,
         [-6.1044, -5.9024, -5.3051,  ..., -9.3646, -9.5347, -9.8833],
         [-5.6057, -5.5987, -