## Description

##### Implementation of "Speech enhancement by LSTM-based noise suppression followed by CNN-based speech restoration" paper - https://link.springer.com/article/10.1186/s13634-020-00707-1

##### Implementation of training

### Libraries

In [None]:
import tensorflow as tf
import numpy as np
import typing

In [None]:
from ipynb.fs.full.speech_enhancement_noise_suppression_module import NoiseSuppressor
from ipynb.fs.full.speech_enhancement_speech_restoration_module import SpeechRestorationNetwork

In [None]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

### Training Functions

In [None]:
EPOCHS = 50

In [None]:
def loss_fun(y_true, y_pred):
    
    return tf.math.reduce_mean(tf.keras.metrics.mean_squared_error(y_true, y_pred))

In [None]:
def adam_optimizer(learning_rate):
    return tf.keras.optimizers.Adam(
    learning_rate=learning_rate,
    beta_1=0.9,
    beta_2=0.0002,
    epsilon=1e-07,
    amsgrad=False,
    name='Adam'
)

In [None]:
def train_step(model, optimizer, x_batch):
    """
        Training step for each training dataset batch
    """
    with tf.GradientTape() as tape:
        y_pred = model(x_in=x_batch)
        loss_value = loss_fun(y_true=x_batch, y_pred=y_pred)
    
    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss_value

In [None]:
def test_step(model, x_batch):
    """
        Test step for each test dataset batch
    """
    y_pred = model(x_in=x_batch)
    return loss_fun(y_true=x_batch, y_pred=y_pred)

In [None]:
def test_seq_desc_order(seq):
    return all(earlier >= later for earlier, later in zip(seq, seq[1:]))

In [None]:
def train_model(model, learning_rate: float, min_learning_rate: float, lr_factor: float, nr_epochs_for_lr_change: int):
    mean_epoch_losses_test = []
    for epoch in range(EPOCHS):
        print(f"working on epoch: {epoch}...")
        losses_train = []
        for train_idx, x_batch_train in enumerate(train_data):
            optimizer = adam_optimizer(learning_rate=learning_rate)
            loss_train_value = train_step(model=model, optimizer=optimizer, x_batch=x_batch_train)
            losses_train.append(loss_train_value.numpy())
            print(f"Training step is {train_idx}")

            if train_idx % 100 == 0:
                print(f"Loss at step {train_idx} is {loss_train_value.numpy():.2f}.")

        print(f"Finished epoch training nr: {epoch}. Running test inference...")

        losses_test = []
        for test_idx, x_batch_test in enumerate(test_data):
            loss_test_value = test_step(model=model, x_batch=x_batch_test)
            losses_test.append(loss_test_value.numpy())
            print(f"Test step is {test_idx}")

        print(f"Test set loss: {np.mean(losses_test):.2f}.")
        mean_epoch_losses_test.append(np.mean(losses_test))
        
        if learning_rate <= min_learning_rate:
            print(f"learning rate has reach a value equal to or less than {min_learning_rate}, aborting training")
            break
        if len(mean_epoch_losses_test) > nr_epochs_for_lr_change:
            # get last three results
            print(f"last {nr_epochs_for_lr_change} mean epoch losses test results are: {mean_epoch_losses_test[-nr_epochs_for_lr_change:]}")
            if not test_seq_desc_order(seq=mean_epoch_losses_test[-nr_epochs_for_lr_change:]): # check if there has been no decrease for more than nr_epochs_for_lr_change epochs
                learning_rate = learning_rate*lr_factor # reduce lr by factor
                print(f"updated learning rate is: {learning_rate}")

### Noise Suppressor Dummy Training

In [None]:
noise_suppressor = NoiseSuppressor(output_size=2)

In [None]:
NOISE_SUPPRESSOR_BATCH_SIZE = 25
input_shape = (200, 1024, 2)
train_size = int(input_shape[0]*0.80)
data = tf.random.normal(input_shape)
data = tf.constant(data, dtype=tf.float32)

In [None]:
train_size

In [None]:
train_data = tf.data.Dataset.from_tensor_slices(data[:train_size])
train_data = train_data.batch(NOISE_SUPPRESSOR_BATCH_SIZE)
test_data = tf.data.Dataset.from_tensor_slices(data[train_size:])
test_data = test_data.batch(NOISE_SUPPRESSOR_BATCH_SIZE)

In [None]:
train_model(model=noise_suppressor, learning_rate=0.001, min_learning_rate=0.0001, lr_factor=0.5, nr_epochs_for_lr_change=4)

### Speech Restoration Dummy Training

In [None]:
speech_restoration_network = SpeechRestorationNetwork(nr_conv_filters=88, conv_filter_size=24)

In [None]:
SPEECH_RESTORATION_BATCH_SIZE = 16
input_shape = (200, 1024, 2)
train_size = int(input_shape[0]*0.80)
data = tf.random.normal(input_shape)
data = tf.constant(data, dtype=tf.float32)

In [None]:
train_data = tf.data.Dataset.from_tensor_slices(data[:train_size])
train_data = train_data.batch(SPEECH_RESTORATION_BATCH_SIZE)
test_data = tf.data.Dataset.from_tensor_slices(data[train_size:])
test_data = test_data.batch(SPEECH_RESTORATION_BATCH_SIZE)

In [None]:
train_model(model=speech_restoration_network, learning_rate=0.0001, min_learning_rate=0.00001, lr_factor=0.6, nr_epochs_for_lr_change=3)