In [6]:
!ls ../data/30measures

LP_backing_clean.wav
LP_backing_crunch.wav
LP_backing_distorted.wav
LP_backing_raw.wav
LP_lead_clean.wav
LP_lead_crunch.wav
LP_lead_distorted.wav
LP_lead_raw.wav
ST_backing_clean.wav
ST_backing_crunch.wav
ST_backing_distorted.wav
ST_backing_raw.wav
ST_lead_clean.wav
ST_lead_crunch.wav
ST_lead_distorted.wav
ST_lead_raw.wav


In [9]:
# develop 20200121
#generate datasets

from pydub import AudioSegment
import matplotlib.pyplot as plt
import numpy as np
import pathlib

SEQUENCE_LENGTH = 4851
OUTPUT_LENGTH = 441

file_names = []
input_wave = []
output_wave = []


for f_name in ['ST_backing_raw.wav', 'ST_lead_raw.wav', 'LP_lead_raw.wav', 'ST_backing_distorted.wav', 'ST_lead_distorted.wav', 'LP_lead_distorted.wav']:

    if("raw" not in f_name and "distorted" not in f_name): continue # input-> raw, output -> distorted, others -> continue

    print(str(f_name) + ' ... now processing')
    file_names.append(f_name)

    wave_data = np.array(AudioSegment.from_file('../data/30measures/' + f_name, 'wav').get_array_of_samples())[::2]
    # wave_data = wave_data / wave_data.max() # normalizarion
    d_mean = wave_data.mean()
    d_std = wave_data.std()
    wave_data = (wave_data - d_mean) / d_std # standardization

    num_proc = wave_data.shape[0] // OUTPUT_LENGTH

    if("raw" in f_name): wave_data = np.concatenate([np.zeros(SEQUENCE_LENGTH), wave_data])

    for j in range(num_proc):

        if("raw" in f_name):
            wave_frag = wave_data[j*OUTPUT_LENGTH: j*OUTPUT_LENGTH + SEQUENCE_LENGTH]

            if(wave_frag.shape[0] == SEQUENCE_LENGTH): input_wave.append(wave_frag) # input -> raw (-1, 4851)
            
        elif("distorted" in f_name):
            wave_frag = wave_data[j*OUTPUT_LENGTH: j*OUTPUT_LENGTH+OUTPUT_LENGTH]

            if(wave_frag.shape[0] == OUTPUT_LENGTH): output_wave.append(wave_frag) # output -> distorted (-1, 441)

        else: raise

    

input_wave = np.array(input_wave)
output_wave = np.array(output_wave)


input_wave.shape, output_wave.shape

ST_backing_raw.wav ... now processing
ST_lead_raw.wav ... now processing
LP_lead_raw.wav ... now processing
ST_backing_distorted.wav ... now processing
ST_lead_distorted.wav ... now processing
LP_lead_distorted.wav ... now processing


((17832, 4851), (17832, 441))

In [10]:
from sklearn.model_selection import train_test_split

x_train, x_valid, y_train, y_valid = train_test_split(input_wave, output_wave, test_size = 0.2, random_state = 42)

print(x_train.shape, x_valid.shape, y_train.shape, y_valid.shape)

x_train = x_train.reshape(-1, 4851, 1)
x_valid = x_valid.reshape(-1, 4851, 1)
y_train = y_train.reshape(-1, 441, 1)
y_valid = y_valid.reshape(-1, 441, 1)

x_train.shape, x_valid.shape, y_train.shape, y_valid.shape

(14265, 4851) (3567, 4851) (14265, 441) (3567, 441)


((14265, 4851, 1), (3567, 4851, 1), (14265, 441, 1), (3567, 441, 1))

In [14]:
from keras import backend as K
from keras.layers import Input, CuDNNLSTM, Lambda
from keras.optimizers import Adam
from keras.models import Model

def generate_lstm():
    ip = Input(shape=(4851, 1))

    x = CuDNNLSTM(64, return_sequences=True)(ip)
    x = CuDNNLSTM(64, return_sequences=True)(x)
    x = CuDNNLSTM(1, return_sequences=True)(x)
    x = Lambda(lambda x: x[:, -OUTPUT_LENGTH: ])(x)
    
    

    model = Model(ip, x)

    return model

In [None]:
K.clear_session()

model = generate_lstm()
model.compile(optimizer = 'adam', loss = 'mse', metrics = ['mae', 'mse'])

history = model.fit(x_train, y_train, batch_size = 16, epochs = 170, verbose = 1, validation_data = (x_valid, y_valid))

Train on 14265 samples, validate on 3567 samples
Epoch 1/170
Epoch 2/170
Epoch 3/170
Epoch 4/170
Epoch 5/170
Epoch 6/170
Epoch 7/170
Epoch 8/170
Epoch 9/170
Epoch 10/170
Epoch 11/170