In [113]:
from data_utils import TextMelLoader, TextMelCollate

from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader
from utils import load_filepaths_and_text
from layers import TacotronSTFT
from text import text_to_sequence
from text import cleaners as cl
import stft
import random
import torch
import numpy as np

In [28]:
hparams = {
    'text_cleaners': ['english_cleaners'],
    'max_wav_value': 32768.0,
    'sampling_rate': 22050,
    'load_mel_from_disk': False,
    'filter_length': 1024,
    'hop_length': 256,
    'win_length': 1024,
    'n_mel_channels': 80,
    'mel_fmin': 0.0,
    'mel_fmax': 8000.0,
    'seed': 123
}

training_files='filelists/ljs_audio_text_train_filelist.txt'
validation_files='filelists/ljs_audio_text_val_filelist.txt'

def prepare_dataloaders(training_files,validation_files,hparams):
    # Get data, data loaders and collate function ready
    trainset = TextMelLoader(training_files, hparams)
    valset = TextMelLoader(validation_files, hparams)
    collate_fn = TextMelCollate(hparams)

    if hparams.distributed_run:
        train_sampler = DistributedSampler(trainset)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True

    train_loader = DataLoader(trainset, num_workers=1, shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=hparams.batch_size, pin_memory=False,
                              drop_last=True, collate_fn=collate_fn)
    return train_loader, valset, collate_fn

In [115]:
"""
    1) loads audio,text pairs
    2) normalizes text and converts them to sequences of one-hot vectors
    3) computes mel-spectrograms from audio files.
"""

input_pat_mels = "E:\Github\F-voice\F-VOICE\Mel_creator\wavs_mel\LJ001-0001.npy"

audiopaths_and_text = load_filepaths_and_text(training_files)
stft = TacotronSTFT(
            hparams['filter_length'], hparams['hop_length'], hparams['win_length'],
            hparams['n_mel_channels'], hparams['sampling_rate'], hparams['mel_fmin'],
            hparams['mel_fmax'])  
random.seed(hparams['seed'])
random.shuffle(audiopaths_and_text)
# extract the text and wav paths
archives_wav = [wav[0] for wav in audiopaths_and_text]
text = [text[1] for text in audiopaths_and_text]
audiopaths_and_text = [archives_wav] + [text]
text_norm  = torch.IntTensor(text_to_sequence(audiopaths_and_text[1][0], ['english_cleaners']))
#mel = torch.from_numpy(np.load(audiopaths_and_text[0][0]))
mel = torch.from_numpy(np.load(input_pat_mels))
#assert mel.size(0) == hparams['n_mel_channels'], (
#                'Mel dimension mismatch: given {}, expected {}'.format(
#                    mel.size(0), hparams['n_mel_channels']))


  input_pat_mels = "E:\Github\F-voice\F-VOICE\Mel_creator\wavs_mel\LJ001-0001.npy"


In [96]:
lista_original = [
    ['DUMMY/LJ021-0092.wav', 'first, the legislative or policy making function;'],
    ['DUMMY/LJ017-0124.wav', 'He frequently declared before and during the trial that it would be impossible to find him guilty.']
]

# Extraer los archivos .wav y el texto
archivos_wav = [elemento[0] for elemento in lista_original]
texto = [elemento[1] for elemento in lista_original]

print("Archivos .wav:", archivos_wav)
print("Texto:", texto)


a = archivos_wav+ texto
a[1]

Archivos .wav: ['DUMMY/LJ021-0092.wav', 'DUMMY/LJ017-0124.wav']
Texto: ['first, the legislative or policy making function;', 'He frequently declared before and during the trial that it would be impossible to find him guilty.']


'DUMMY/LJ017-0124.wav'

In [None]:
lista_original =audiopaths_and_text

# Dividir cada elemento de la lista interna en sublistas independientes
lista_dividida = [[subelemento] for elemento in lista_original for subelemento in elemento]

print(lista_dividida)