In [8]:
import os
import random

TRAIN = 160 # in paper - 9k
VALID = 20 # in paper - 80
TEST  = 20 # in paper - 80

LENGTH = 1248
SOURCE_SHAPE = [LENGTH, 129]
TARGET_SHAPE = [LENGTH, 257]

ROOT = os.path.expanduser("~")

PATH = os.path.join(ROOT, "DAE-libri")
CACHE_PATH = os.path.join(ROOT, "cache")
DATASETPATH = os.path.join(ROOT, ".dataset")

do_i_noise = True
NOISEPATH = os.path.join(ROOT, "NoiseDb")
SNR = 5

files = [os.path.join(PATH, x) for x in os.listdir(PATH) if x.endswith(".wav") and not x.endswith(".gsm.wav")]
train_files = random.sample(files, TRAIN)
valid_files = random.sample(list(set(files) - set(train_files)), VALID)
test_files = random.sample(list(set(files) - set(train_files) - set(valid_files)), TEST)
noise_files = [os.path.join(NOISEPATH, x) for x in os.listdir(NOISEPATH)]

In [9]:
### PREPARE AND LOAD

import numpy as np
import scipy.io.wavfile as sio
import tempfile
import subprocess

train_source, valid_source, test_source, train_target, valid_target, test_target, test_phase = [None] * 7
tempnam = tempfile.mktemp

def ennoise(data):
    SNR_ln = SNR / 10 * np.log(10)
    noise = sio.read(random.choice(noise_files))[1].astype(np.float32)
    data = data[:176000].astype(np.float32)
    if len(noise) <= len(data):
        print("NOISE TOO SHORT:", len(noise))
        noise = np.pad(noise, ((0, len(data) - len(noise)),),'constant')
    else:
        start = random.randint(0, len(noise) - len(data) - 1)
        noise = noise[start:start + len(data)]
    log_power_of_signal = np.log((data ** 2).mean())
    log_power_of_noise = np.log((noise ** 2).mean())
    gain_of_noise = np.exp(log_power_of_signal - SNR_ln - log_power_of_noise)   # SNR was miscalculated...
    if np.isnan(gain_of_noise):
        print("NAN gain - generating some random white noise")
        noise = np.random.normal(0, 1500, size=data.shape)
    else:
        noise = noise * gain_of_noise
    noised = data + noise
    if np.abs(noised).max() >= 2**15 - 1:
        print("Clipping noised by", np.abs(noised).max() - 2**15 - 1)
        noised = np.clip(noised, -1 * 2**15, 2**15 - 1)
    noised = noised.astype(np.int16)
    # print(gain_of_noise, log_power_of_signal, log_power_of_noise)
    # print("Mean amp of data: ", np.abs(data).mean())
    # print("Mean amp of noise: ", np.abs(noise).mean())
    # print("Mean amp of noised signal: ", np.abs(noised).mean())
    return noised

def prepare_data(filename):
    print(filename)
    data = sio.read(filename)[1] # .astype(np.float32) to generate noise in the experiment...
    if do_i_noise:
        data = ennoise(data)
    oldname = tempnam() + '.oldwav'
    sio.write(oldname, 16000, data)
    tmpname = tempnam() + '.amr-nb'
    newname = filename + ".gsm.wav"
    subprocess.Popen(['sox', oldname, '-C', '7', '-r', '8000', tmpname]).communicate()
    subprocess.Popen(['sox', tmpname, '-r', '16000', "-e", "signed", '-b', '16',  newname]).communicate()
    list(map(os.remove, [oldname, tmpname]))

[prepare_data(x) for x in train_files]
[prepare_data(x) for x in valid_files]
[prepare_data(x) for x in test_files]

train_source = np.zeros([TRAIN] + SOURCE_SHAPE, np.float32)
valid_source = np.zeros([VALID] + SOURCE_SHAPE, np.float32)
test_source  = np.zeros([TEST]  + SOURCE_SHAPE, np.float32)
train_target = np.zeros([TRAIN] + TARGET_SHAPE, np.float32)
valid_target = np.zeros([VALID] + TARGET_SHAPE, np.float32)
test_target  = np.zeros([TEST]  + TARGET_SHAPE, np.float32)
test_phase   = np.zeros([TEST]  + TARGET_SHAPE, np.float32)

window = np.hamming(512)

def get_data(source, target, index, filename, true_phase=None):
    print(filename)
    recording = sio.read(filename + ".gsm.wav")[1].astype(np.float32)
    recording /= 2**15
    for time in range(LENGTH):
        win = recording[128 * time : 128 * time + 512]
        if len(win) != 512:
            break
        fft = np.fft.rfft(window * win) / 512
        source[index, time, :] = -np.log(np.abs(fft) ** 2 + 2e-12)[:129]
    recording = sio.read(filename)[1].astype(np.float32)
    recording /= 2**15
    for time in range(LENGTH):
        win = recording[128 * time : 128 * time + 512]
        if len(win) != 512:
            break
        fft = np.fft.rfft(window * win) / 512
        target[index, time, :] = -np.log(np.abs(fft) ** 2 + 2e-12)
    if true_phase is not None:
        true_phase = np.angle(fft)


[get_data(train_source, train_target, ix, x) for ix, x in enumerate(train_files)]
[get_data(valid_source, valid_target, ix, x) for ix, x in enumerate(valid_files)]
[get_data(test_source,  test_target,  ix, x, true_phase=test_phase) for ix, x in enumerate(test_files)]


/home/zantyr/DAE-libri/3299.wav
Clipping noised by 6254.01953125
/home/zantyr/DAE-libri/19841.wav
/home/zantyr/DAE-libri/5577.wav
Clipping noised by 25469.8671875
/home/zantyr/DAE-libri/15442.wav
Clipping noised by 12686.6484375
/home/zantyr/DAE-libri/17585.wav
/home/zantyr/DAE-libri/4180.wav
/home/zantyr/DAE-libri/13932.wav
Clipping noised by 57837.5625
/home/zantyr/DAE-libri/27133.wav
Clipping noised by 148755.5
/home/zantyr/DAE-libri/3014.wav
/home/zantyr/DAE-libri/15982.wav
/home/zantyr/DAE-libri/18769.wav
/home/zantyr/DAE-libri/12133.wav
/home/zantyr/DAE-libri/25380.wav
/home/zantyr/DAE-libri/1314.wav
Clipping noised by 23474.44140625
/home/zantyr/DAE-libri/13876.wav
/home/zantyr/DAE-libri/6630.wav
Clipping noised by 4787.09765625
/home/zantyr/DAE-libri/27345.wav
Clipping noised by 133932.390625
/home/zantyr/DAE-libri/2838.wav
Clipping noised by 69982.21875
/home/zantyr/DAE-libri/14460.wav
/home/zantyr/DAE-libri/9113.wav
/home/zantyr/DAE-libri/25376.wav
/home/zantyr/DAE-libri/2650

/home/zantyr/DAE-libri/11416.wav
/home/zantyr/DAE-libri/11958.wav
/home/zantyr/DAE-libri/165.wav
Clipping noised by 61439.1171875
/home/zantyr/DAE-libri/9461.wav
/home/zantyr/DAE-libri/14353.wav
/home/zantyr/DAE-libri/12717.wav
/home/zantyr/DAE-libri/22977.wav
Clipping noised by 181893.90625
/home/zantyr/DAE-libri/25379.wav
/home/zantyr/DAE-libri/27394.wav
Clipping noised by 53.15625
/home/zantyr/DAE-libri/8586.wav
/home/zantyr/DAE-libri/20551.wav
Clipping noised by 11481.2421875
/home/zantyr/DAE-libri/24158.wav
Clipping noised by 82806.984375
/home/zantyr/DAE-libri/3757.wav
/home/zantyr/DAE-libri/14403.wav
Clipping noised by 50208.921875
/home/zantyr/DAE-libri/3299.wav
/home/zantyr/DAE-libri/19841.wav
/home/zantyr/DAE-libri/5577.wav
/home/zantyr/DAE-libri/15442.wav
/home/zantyr/DAE-libri/17585.wav
/home/zantyr/DAE-libri/4180.wav
/home/zantyr/DAE-libri/13932.wav
/home/zantyr/DAE-libri/27133.wav
/home/zantyr/DAE-libri/3014.wav
/home/zantyr/DAE-libri/15982.wav
/home/zantyr/DAE-libri/1876

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [10]:
if not os.path.exists(DATASETPATH):
    os.mkdir(DATASETPATH)
np.save(os.path.join(DATASETPATH, "train_source.bin"), train_source)
np.save(os.path.join(DATASETPATH, "valid_source.bin"), valid_source)
np.save(os.path.join(DATASETPATH, "test_source.bin"), test_source)
np.save(os.path.join(DATASETPATH, "train_target.bin"), train_target)
np.save(os.path.join(DATASETPATH, "valid_target.bin"), valid_target)
np.save(os.path.join(DATASETPATH, "test_target.bin"), test_target)
np.save(os.path.join(DATASETPATH, "test_phase.bin"), test_phase)