# Filtering Piano Instrument - Sound Wave & Feedfoward Neural Network (FNN)
![Getting Started](./PianoSoundWaveFNN/images/SoundWaveFNN.jpg)

****
## Goal
* What types of sounds are generated?
* What are the features used to train the system?
* What is the DL architecture employeed?
* What are the inputs for generation?

****

In [None]:
import os
import librosa
import librosa.display
import IPython.display as ipd
import numpy as np
import soundfile as sf
import keras
from keras.layers import Dense, Dropout, PReLU
from keras.optimizers import Adam

DATASET_PATH = "./PianoSoundWaveFNN/data/"
AUDIO_DATASET_PATH = os.path.join(DATASET_PATH, "audio")
SAMPLE_DATASET_PATH = os.path.join(DATASET_PATH, "sample")
TRAIN_DATASET_PATH = os.path.join(DATASET_PATH, "train")
MODEL_PATH = os.path.join(DATASET_PATH, "model")

WAVE_SAMPLE_RATE = 44100
NUM_SAMPLES = 1024

if not os.path.exists(SAMPLE_DATASET_PATH):
    os.makedirs(SAMPLE_DATASET_PATH)

if not os.path.exists(SAMPLE_DATASET_PATH):
    os.makedirs(SAMPLE_DATASET_PATH)

if not os.path.exists(TRAIN_DATASET_PATH):
    os.makedirs(TRAIN_DATASET_PATH)

## Functions to extract training dataset
****

In [None]:
def get_wave_samples(audio_file_path, sample_dataset_path, num_samples=NUM_SAMPLES, sr=WAVE_SAMPLE_RATE):
    filename = os.path.basename(audio_file_path)
    dataset_path = os.path.join(sample_dataset_path, filename + ".npy")

    if os.path.exists(dataset_path):
        print(f"----- getting sample dataset from {dataset_path}")
        w_signal = np.load(dataset_path)
        return w_signal

    dataset = []
    print(f"----- getting sample dataset from {audio_file_path}")
    signal, sr = librosa.load(audio_file_path, sr=sr)
    num_segments = int(len(signal)/num_samples)
            
    # process all segments of audio file
    for d in range(num_segments):
        # calculate start and finish sample for current segment
        start = num_samples * d
        finish = start + num_samples
        dataset.append(signal[start:finish])

    w_signal = np.array(dataset).reshape(-1, num_samples)
    print(w_signal.shape)

    np.save(dataset_path, w_signal)
    return w_signal
    
def create_wave_samples(audio_dataset_path, sample_dataset_path, num_samples=NUM_SAMPLES, sr=WAVE_SAMPLE_RATE):    
    for dirpath, _, filenames in os.walk(audio_dataset_path):
        # process all audio files in genre sub-dir
        for f in filenames:
            # load all audio file
            if "mix" in f:
                m_wave_file_path = os.path.join(audio_dataset_path, f)
                m_signal = get_wave_samples(m_wave_file_path, sample_dataset_path, num_samples=num_samples, sr=sr)

                t_token = f.split('_')[1]
                t_file = t_token + '.wav'
                t_wave_file_path = os.path.join(audio_dataset_path,  t_file)
                t_signal = get_wave_samples(t_wave_file_path, sample_dataset_path, num_samples=num_samples, sr=sr)
    return

def create_traing_dataset(train_dataset_path, sample_dataset_path, dataset_files):   
    F_input_file_path = os.path.join(train_dataset_path, 'input.npy')
    F_target_file_path = os.path.join(train_dataset_path,'target.npy')

    input_Dataset = np.array([])
    target_Dataset = np.array([])
    input_dataset = []
    target_dataset = []
        
    if os.path.exists(F_input_file_path) and os.path.exists(F_input_file_path) :
        print(f"----- getting input dataset from {F_input_file_path}")
        input_Dataset = np.load(F_input_file_path)

        print(f"----- getting target dataset from {F_target_file_path}")
        target_Dataset = np.load(F_target_file_path)

        return input_Dataset, target_Dataset

    for f in dataset_files:
        input_file_path = os.path.join(sample_dataset_path, f)
        print(f"----- getting input dataset from {input_file_path}")
        Dataset = np.load(input_file_path)
        input_dataset.append(Dataset.tolist())

        t_token = f.split('_')[1]
        tf = t_token + '.wav.npy'
        target_file_path = os.path.join(sample_dataset_path, tf)
        print(f"----- getting target dataset from {target_file_path}")
        Dataset = np.load(target_file_path)
        target_dataset.append(Dataset.tolist())

    num_samples = Dataset.shape[1]

    input_Dataset = np.array(input_dataset).reshape(-1, num_samples)
    target_Dataset = np.array(target_dataset).reshape(-1, num_samples)

    #save to pickle file
    print(f"----- generating input dataset {F_input_file_path}")
    np.save(F_input_file_path, input_Dataset)

    print(f"----- generating target dataset {F_target_file_path}")
    np.save(F_target_file_path, target_Dataset)
    return input_Dataset, target_Dataset

## Train Model with Dataset
****

In [None]:
if not os.path.exists(MODEL_PATH):
    create_wave_samples(AUDIO_DATASET_PATH, SAMPLE_DATASET_PATH, num_samples=NUM_SAMPLES, sr=WAVE_SAMPLE_RATE) 

    input_list = [
            'mix_piano1_violin1.wav.npy', \
            'mix_piano1_violin2.wav.npy', \
            'mix_piano1_violin3.wav.npy', \
            'mix_piano2_violin1.wav.npy', \
            'mix_piano2_violin2.wav.npy', \
            'mix_piano2_violin3.wav.npy']

    input, target = create_traing_dataset(TRAIN_DATASET_PATH, SAMPLE_DATASET_PATH, input_list)

    # create model
    input_shape = input[0].shape
    input_size = input[0].shape[0]
    output_size = target[0].shape[0]
    
    model = keras.models.Sequential()
    model.add(Dense(input_size, input_shape=input_shape))
    model.add(PReLU())
    model.add(Dense(512))
    model.add(PReLU())
    model.add(Dense(output_size))
    model.compile(Adam(), 'mse')
    model.summary()
    model.fit(input, target, epochs=5)
    model.save(MODEL_PATH)
else:
    model = keras.models.load_model(MODEL_PATH)

In [None]:
def x_fade_profile(batch_dim):
    x = np.arange(batch_dim)
    return 1 - abs(x - (batch_dim / 2)) / (batch_dim / 2)

    
def model_predict(model, input_track, num_samples):
    dim = num_samples
    n_batches = int(len(input_track) / dim) - 1
    pred_batches = input_track[0:n_batches*dim].reshape((-1, dim))
    pred_batches_shifted = input_track[dim//2:n_batches*dim + dim//2].reshape((-1, dim))
    
    xfp = x_fade_profile(dim)
    x0 = np.array([xfp * batch for batch in model.predict(pred_batches)]).reshape(-1)
    x1 = np.array([xfp * batch for batch in model.predict(pred_batches_shifted)]).reshape(-1)
    return x0 + x1

## Test Model
****

### Prepare Test Data

In [None]:
input_test_file = os.path.join(AUDIO_DATASET_PATH, 'mix_piano2_violin2.wav')
target_test_file = os.path.join(AUDIO_DATASET_PATH, 'piano2.wav')

input_test_signal, sr = librosa.load(input_test_file, sr = WAVE_SAMPLE_RATE)
target_test_signal, sr = librosa.load(target_test_file, sr = WAVE_SAMPLE_RATE)

### Input Track

In [None]:
ipd.Audio(input_test_signal, rate=WAVE_SAMPLE_RATE)

### Target Track

In [None]:
ipd.Audio(target_test_signal, rate=WAVE_SAMPLE_RATE)

### Prediciton Track

In [None]:
ipd.Audio(model_predict(model, input_test_signal, num_samples=NUM_SAMPLES), rate=WAVE_SAMPLE_RATE)