In [8]:
import torch
import torchaudio

from model import commons
from params import Params
from text.convert import text_to_sequence
from model.synthesizer import SynthesizerTrn
from utils.checkpoint import load_checkpoint

In [5]:
def read_file(path: str):
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            yield line.strip().replace('.', '')

def get_text(text: str, text_cleaners: list[str], language: str):
    text_norm = text_to_sequence(text, text_cleaners, language)
    text_norm = commons.intersperse(text_norm, 0)
    text_norm = torch.LongTensor(text_norm)

    return text_norm

In [6]:
params = Params.parse_file('files/configs/lt.json')

net_g = SynthesizerTrn.from_params(params)
_ = net_g.eval()

_ = load_checkpoint('G_105000.pth', net_g, None)

In [2]:
lines = list(read_file('files/datasets/sakinukai/Sakinukai_stressed.txt'))

In [18]:
for i, text in enumerate(lines, start=1):
    stn_tst = get_text(text, params.data.text_cleaners, params.data.language)

    with torch.inference_mode():
        x_tst = stn_tst.unsqueeze(0)
        x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
        audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=0.667, noise_scale_w=0.5, length_scale=1.0)[0][0]

    torchaudio.save(f'sakinukai/{i:03d}.wav', audio, params.data.sampling_rate)