In [1]:
from pathlib import Path

import librosa
import numpy
import pysptk
import pyworld

from become_yukarin.config import create_from_json as create_config
from become_yukarin.data_struct import Wave
from become_yukarin.dataset.dataset import *

from IPython.display import Audio
import matplotlib.pyplot as plt
%matplotlib inline

vecLib, which is a part of Accelerate, is known not to work correctly with Chainer.
We recommend using other BLAS libraries such as OpenBLAS.
For details of the issue, please see
https://docs.chainer.org/en/stable/tips.html#mnist-example-does-not-converge-in-cpu-mode-on-mac-os-x.

Also note that Chainer does not officially support Mac OS X.
Please use it at your own risk.

  ''')  # NOQA


In [23]:
path_base = Path('~/dataset/yukari-pause-atr503-subset').expanduser()
path_wave = path_base / 'yukari-pause-atr-A01.wav'
path_mean = path_base / 'mean.npy'
path_var = path_base / 'var.npy'

wave_file_load = WaveFileLoadProcess(
    sample_rate=24000,
    top_db=None,
    pad_second=0.01,
)
acoustic_feature = AcousticFeatureProcess(
    frame_period=5,
    order=25,
    alpha=pysptk.util.mcepalpha(wave_file_load._sample_rate),
)

load_feature = AcousticFeatureLoadProcess()
normalize = AcousticFeatureNormalizeProcess(mean=load_feature(path_mean), var=load_feature(path_var))
denormalize = AcousticFeatureDenormalizeProcess(mean=load_feature(path_mean), var=load_feature(path_var))

In [24]:
w = wave_file_load(path_wave, test=True)
Audio(data=w.wave, rate=w.sampling_rate)

In [33]:
f_in = acoustic_feature(w, test=True)
f_norm = normalize(f_in)

noise_f0 = numpy.random.randn(*f_norm.f0.shape) * 0.01
noise_mfcc = numpy.random.randn(*f_norm.mfcc.shape) * 0.01

f_out = AcousticFeature(
    f0=f_norm.f0 + noise_f0,
    spectrogram=numpy.nan,
    aperiodicity=numpy.nan,
    mfcc=f_norm.mfcc + noise_mfcc,
    voiced=f_in.voiced,
)
f_out = denormalize(f_out)

spectrogram = pysptk.mc2sp(
    f_out.mfcc,
    alpha=acoustic_feature._alpha,
    fftlen=pyworld.get_cheaptrick_fft_size(wave_file_load._sample_rate),
)

f_out = AcousticFeature(
    f0=f_out.f0,
    spectrogram=spectrogram,
    aperiodicity=f_in.aperiodicity,
    mfcc=f_out.mfcc,
    voiced=f_in.voiced,
).astype(numpy.float64)

out = pyworld.synthesize(
    f0=f_out.f0.ravel(),
    spectrogram=f_out.spectrogram,
    aperiodicity=f_out.aperiodicity,
    fs=wave_file_load._sample_rate,
    frame_period=acoustic_feature._frame_period,
)

Audio(data=out, rate=wave_file_load._sample_rate)