# Todo

* Add visuals
* Synthesize Audio

## Visualize different data transform packages

* [pyworld](https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder)
* [pysptk](https://github.com/r9y9/pysptk)
* [librosa](https://github.com/librosa)
* [torchaudio](https://github.com/pytorch/audio)

## Load data

Import dependencies for loading a datafile and visualizing data.

In [None]:
import os
from glob import glob
import numpy as np
from scipy.io.wavfile import read
import matplotlib.pyplot as plt
from librosa.display import specshow, waveplot

Load demo data and split the two channels

In [None]:
sr, wav = read('data/demo.wav')

print('sample rate: ', sr)
print('wav shape: ', wav.shape)
print('wav dtype: ', wav.dtype)

x = wav[:, 0]
y = wav[:, 1]

## Settings

In [None]:
fft_length=1024
hop_length=256
frame_period=12.8  # hop_length/sr = 0.0128 s = 12.8 ms
alpha=0.441
order=40
f0_floor=71.
f0_ceil=800.
n_mfcc=20
norm_mfcc=True
use_mel=False
n_mels=128
bc_threshold=0.33

### pysptk

Starting with pysptk

In [None]:
import pysptk

x = np.ascontiguousarray(x).astype(np.float64)
y = np.ascontiguousarray(y).astype(np.float64)

pitch = pysptk.swipe(x, fs=sr, hopsize=hop_length,
                            min=f0_floor, max=f0_ceil, otype="pitch")
f0_swipe = pysptk.swipe(x, fs=sr, hopsize=hop_length,
                        min=f0_floor, max=f0_ceil, otype="f0")
f0_rapt = pysptk.rapt(x.astype(np.float32), fs=sr, hopsize=hop_length,
                             min=f0_floor, max=f0_ceil, otype="f0")

# mel_spec = pysptk.sp2mc(sp, order=order, alpha=alpha) */
# mfcc = pysptk.mfcc(mel_spec, fs=sr, alpha=alpha, order=80, num_filterbanks=100) */
# energy = pysptk.mc2e(mel_spec, alpha=alpha) */

print('Pysptk')
print('F0 (swap): {}, mean: {}'.format(f0_swipe.shape, f0_swipe.mean()))
print('F0 (swap): {}, mean: {}'.format(f0_rapt.shape, f0_rapt.mean()))

In [None]:
import pyworld

f0, timeaxis = pyworld.dio(x, fs=sr, f0_floor=f0_floor,
                           f0_ceil=f0_ceil, frame_period=frame_period)
f0 = pyworld.stonemask(x, f0, timeaxis, sr)
sp = pyworld.cheaptrick(x, f0, timeaxis, sr, fft_size=fft_length)
ap = pyworld.d4c(x, f0, timeaxis, sr, fft_size=fft_length)  # Aperiodicity

print('PyWorld')
print('F0: {}, mean: {}'.format(f0.shape, f0.mean()))
print('Spectrogram: {}, mean: {}'.format(sp.shape, sp.mean()))
print('Aperiodicity: '.format(ap.shape, ap.mean()))

In [None]:
import librosa

sp = np.abs(librosa.stft(x))
spec = librosa.amplitude_to_db(sp, ref=np.max)

mel_spec = librosa.feature.melspectrogram(x, sr=sr, n_fft=fft_length,
                                          hop_length=hop_length)

print('Librosa')
print('sp: {}, mean: {}'.format(sp.shape, sp.mean()))
print('Spectrogram: {}, mean: {}'.format(spec.shape, spec.mean()))
print('MelSpectrogram: {}, mean: {}'.format(mel_spec.shape, mel_spec.mean()))

In [None]:
import torchaudio