## Alternative Model (Preprocessing)
You need to run this before you run notebook 4b.

The wavs in your dataset will be converted to 9bit linear and 80-band mels.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import math, pickle, os, glob
from nnmnkwii import preprocessing as P
import numpy as np
from utils.display import *
from tqdm import tqdm

In [None]:
bits = 9
notebook_name = 'nb4'
use_deepvoice_preprocessing = True
use_mu_law = True

if use_mu_law:
    print("using mu_loaw")
    
if use_deepvoice_preprocessing:
    print("using deepvoice")
    from utils.audio import *
else:
    print("using original")
    from utils.dsp import *

In [None]:
# Point SEG_PATH to a folder containing your training wavs 
# Doesn't matter if it's LJspeech, CMU Arctic etc. it should work fine
SEG_PATH = '/media/gary/more_storage/audio_dataset/LJSpeech-1.1/wavs/' 
DATA_PATH = f'data/{notebook_name}/'
%mkdir -p $DATA_PATH

In [None]:
def get_files(path, extension='.wav') :
    filenames = []
    for filename in glob.iglob(f'{path}/**/*{extension}', recursive=True):
        filenames += [filename]
    return filenames

In [None]:
wav_files = get_files(SEG_PATH)

In [None]:
def convert_file(path) :
    wav = load_wav(path)
    mel = melspectrogram(wav)
    if use_mu_law:
        quant = P.mulaw_quantize(wav)
    else:
        quant = (wav + 1.) * (2**bits - 1) / 2
    return mel.astype(np.float32), quant.astype(np.int)

In [None]:
m, x = convert_file(wav_files[1])

In [None]:
plot_spec(m)

In [None]:
plot(x)

In [None]:
x = 2 * x / (2**bits - 1) - 1

In [None]:
librosa.output.write_wav(DATA_PATH + 'test_quant.wav', x, sr=sample_rate)

In [None]:
QUANT_PATH = DATA_PATH + 'quant/'
MEL_PATH = DATA_PATH + 'mel/'
%mkdir -p $QUANT_PATH
%mkdir -p $MEL_PATH

In [None]:
wav_files[0].split('/')[-1][:-4]

In [None]:
# This will take a while depending on size of dataset
dataset_ids = []
for i, path in enumerate(tqdm(wav_files)):
    id = path.split('/')[-1][:-4]
    dataset_ids += [id]
    m, x = convert_file(path)
    np.save(f'{MEL_PATH}{id}.npy', m)
    np.save(f'{QUANT_PATH}{id}.npy', x)

In [None]:
with open(DATA_PATH + 'dataset_ids.pkl', 'wb') as f:
    pickle.dump(dataset_ids, f)