In [6]:
from pathlib import Path
from tqdm import tqdm

from common.text.unidecoder import _homoglyphs, _replacements


def check_text(s, homoglyphs=False) -> bool:
    for u in s:
        if ord(u) < 127:
            a = u
        elif homoglyphs:
            a = _homoglyphs.get(u, _replacements.get(u, None))
        else:
            a = _replacements.get(u, _homoglyphs.get(u, None))
        if a is None:
            return False
    return True


speakers = {
    'lj': "0",
    '9017': ('John_Van_Stan', 1),
    '6097': ('Phil_Benson', 2),
    '92': ('Cori_Samuel', 3)
}


speakers_data = {'John_Van_Stan': [], 'Phil_Benson': [], 'Cori_Samuel': []}

hifi_path = Path('/root/datasets/')

meta = []
with open('meta.txt') as meta_file:
    lines = meta_file.readlines()
    for line in tqdm(lines):
        line = line.strip()
        if len(line) == 0:
            continue
        parts = line.split('|')
        text = parts[-2]
        if not check_text(text):
            print(f'Problem text: {text}')
            continue
        wav_path = Path(parts[0]).relative_to(hifi_path)
        parts[0] = str(wav_path)
        speaker_str = str(parts[-1])
        speaker_name, speaker_id = speakers[speaker_str]
        parts[-1] = str(speaker_id)
        speakers_data[speaker_name].append('|'.join(parts) + '\n')

for speaker_name, lines in speakers_data.items():
    list_path = Path('filelists') / f'{speaker_name}_audio_text.txt'
    with open(list_path, 'w') as list_file:
        list_file.writelines(lines)
        
audio_folder_path = Path('hi_fi_tts_v0') / 'audio'
for speaker_name, lines in speakers_data.items():
    
    new_lines = []
    for line in lines:
        line = line.strip()
        if len(line) == 0:
            continue
        parts = line.split('|')
        audio_path = Path(parts[0])
        rel_path = audio_path.relative_to(audio_folder_path)
        pitch_path = Path('hi_fi_tts_v0/pitch') / f'{rel_path.stem}.pt'
        parts.insert(1, str(pitch_path))
        new_lines.append('|'.join(parts) + '\n')
    
    list_path = Path('filelists') / f'{speaker_name}_audio_pitch_text.txt'
    with open(list_path, 'w') as list_file:
        list_file.writelines(new_lines)

  0%|          | 0/126439 [00:00<?, ?it/s]

100%|██████████| 126439/126439 [00:00<00:00, 127579.59it/s]


In [7]:
import numpy as np

list_paths = [
    'filelists/Cori_Samuel_audio_pitch_text.txt',
    'filelists/John_Van_Stan_audio_pitch_text.txt',
    'filelists/Phil_Benson_audio_pitch_text.txt'
]

for list_path in list_paths:
    list_path = Path(list_path)
    with open(list_path, 'r') as list_file:
        lines = list_file.readlines()
        lines = [line.strip() + '\n' for line in lines if len(line.strip()) > 0]
        
        np.random.shuffle(lines)
        val_lines =  lines[:100]
        train_lines = lines[100:]
        
        train_list_path = list_path.parent / f'{list_path.stem}_train.txt'
        with open(train_list_path, 'w') as list_file:
            list_file.writelines(train_lines)
        
        val_list_path = list_path.parent / f'{list_path.stem}_val.txt'
        with open(val_list_path, 'w') as list_file:
            list_file.writelines(val_lines)

In [3]:
from pathlib import Path
import shutil

folder = Path('filelists')

shutil.copy('/home/server2/datasets/Stephan/meta.txt', folder / 'stephan_audio_text.txt')

PosixPath('filelists/stephan_audio_text.txt')

In [3]:
import numpy as np

with open('filelists/stephan_audio_text.txt', 'r') as file:
    data = file.readlines()
    data = [line for line in data if len(line.strip()) > 0]
np.random.shuffle(data)

val_data = data[:50]
data = data[50:]

test_data = data[:250]
data = data[250:]

train_data = data

with open('filelists/stephan_audio_text_val.txt', 'w') as file:
    file.writelines(val_data)
with open('filelists/stephan_audio_text_test.txt', 'w') as file:
    file.writelines(test_data)
with open('filelists/stephan_audio_text_train.txt', 'w') as file:
    file.writelines(train_data)

In [4]:
from pathlib import Path

l_files = [
    ('filelists/stephan_audio_text_test.txt', 'filelists/stephan_audio_pitch_text_test.txt'),
    ('filelists/stephan_audio_text_val.txt', 'filelists/stephan_audio_pitch_text_val.txt'),
    ('filelists/stephan_audio_text_train.txt', 'filelists/stephan_audio_pitch_text_train.txt')
]

for in_file, out_file in l_files:
    with open(in_file, 'r') as file:
        data = file.readlines()
        data = [line for line in data if len(line.strip()) > 0]
        new_data = []
        for line in data:
            parts = line.strip().split('|')
            wav_path = Path(parts[0])
            pitch_path = Path('pitch') / f'{wav_path.stem}.pt'
            parts.insert(1, str(pitch_path))
            new_data.append('|'.join(parts) + '\n')
    with open(out_file, 'w') as file:
        file.writelines(new_data)


In [8]:
merge_data = {
    'filelists/train.txt': [
        {
            'root': 'LJSpeech-1.1',
            'list_file': 'filelists/ljs_audio_pitch_text_train_v3.txt'
        },
        {
            'root': '.',
            'list_file': 'filelists/John_Van_Stan_audio_pitch_text_train.txt'
        },
        {
            'root': '.',
            'list_file': 'filelists/Phil_Benson_audio_pitch_text_train.txt'
        },
        {
            'root': '.',
            'list_file': 'filelists/Cori_Samuel_audio_pitch_text_train.txt'
        }
    ],
    'filelists/val.txt': [
        {
            'root': 'LJSpeech-1.1',
            'list_file': 'filelists/ljs_audio_pitch_text_val.txt'
        },
        {
            'root': '.',
            'list_file': 'filelists/John_Van_Stan_audio_pitch_text_val.txt'
        },
        {
            'root': '.',
            'list_file': 'filelists/Phil_Benson_audio_pitch_text_val.txt'
        },
        {
            'root': '.',
            'list_file': 'filelists/Cori_Samuel_audio_pitch_text_val.txt'
        }
    ],
}

for file_name, m_data in merge_data.items():
    new_data = []
    for sub_m_data in m_data:
        with open(sub_m_data['list_file'], 'r') as file:
            data = file.readlines()
        root_folder = Path(sub_m_data['root'])
        for line in data:
            parts = line.strip().split('|')
            wav_path = Path(parts[0])
            pitch_path = Path(parts[1])
            parts[0] = str(root_folder / wav_path)
            parts[1] = str(root_folder / pitch_path)
            new_data.append('|'.join(parts) + '\n')
    with open(file_name, 'w') as file:
        file.writelines(new_data)
        