In [None]:
# Import dependencies
import os as os
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras import regularizers
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
from tqdm import tqdm
from util import config

# Offline Data Augmentation

## Step 1: Apply the selected augmentations from the audiomentations library

In [None]:
# Number of augmented copies to be generated
n_copies = 5

# Set the audiomentations config
# All selected augmentations are set to their default recommended parameters
audiomentations_config = {
    "augmentations": Compose([
        PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
    ])
}

# Set the offline specaugment config
specaugment_config = {
    "configuration": {
        'mask_fraction_min':0.01,
        'mask_fraction_max':0.02,
        'mask_fraction_min_time':0.04,
        'mask_fraction_max_time':0.04,
        'p_spec_augment':0.5,
        'fill_val':0,
        'num_freq_lines':5,
        'num_time_lines':3,
    }
}



## Step 2: Apply the remaining augmentations with the audiomentations spec_augment_offline function

In [None]:
# Configuration for spec augment:
# Parameters set similarly to the default example
spec_augment_offline_configuration = {
    'mask_fraction_min':0.01,
    'mask_fraction_max':0.02,
    'mask_fraction_min_time':0.04,
    'mask_fraction_max_time':0.04,
    'p_spec_augment':0.5,
    'fill_val':0,
    'num_freq_lines':5,
    'num_time_lines':3,
}

End of data augmentation demo

In [None]:
# Create training, validation and test set (50% train, 25% val, 25% test)
[X_tr, X_val, X_te, Y_tr, Y_val, Y_te] = featextract.create_tr_val_te(X_seg, Y_seg, [0.5, 0.25, 0.25])
# Add channel axis
[X_tr, X_val, X_te] = featextract.add_channel_axis(X_tr, X_val, X_te)

# Build and train the model

In [None]:
# Define model
model = keras.Sequential()

# First convolutional layer
model.add(layers.Conv2D(64, kernel_size=(5, 5), strides=(1, 1), padding='same', kernel_regularizer=regularizers.L2(0.01), input_shape=(np.shape(X_tr)[1], np.shape(X_tr)[2], 1), name='conv1'))
model.add(layers.Activation('relu', name='relu1'))
model.add(layers.MaxPooling2D(pool_size=(1, 4), name='pool1'))
model.add(layers.Dropout(0.5, name='drop1'))
# Second convolutional layer
model.add(layers.Conv2D(32, kernel_size=(5, 5), strides=(1, 1), padding='same', kernel_regularizer=regularizers.L2(0.01), name='conv2'))
model.add(layers.Activation('relu', name='relu2'))
model.add(layers.MaxPooling2D(pool_size=(1, 4), name='pool2'))
model.add(layers.Dropout(0.5, name='drop2'))
# Third convolutional layer
model.add(layers.Conv2D(16, kernel_size=(5, 5), strides=(1, 1), padding='same', kernel_regularizer=regularizers.L2(0.01), name='conv3'))
model.add(layers.Activation('relu', name='relu3'))
model.add(layers.MaxPooling2D(pool_size=(1, 4), name='pool3'))
model.add(layers.Dropout(0.5, name='drop3'))
# Flatten layer
model.add(layers.Flatten(name='flatten1'))
# First dense layer
model.add(layers.Dense(16, kernel_regularizer=regularizers.L2(0.01), name='fc1'))
model.add(layers.Activation('relu', name='relu4'))
model.add(layers.Dropout(0.2, name='drop4'))
# Output layer
model.add(layers.Dense(len(datasetconf['classNames']), kernel_regularizer=regularizers.L2(0.01), name='main_out'))
model.add(layers.Activation('softmax', name='softmax1'))
# Define the compiler
adamOwn = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, decay=0.0, amsgrad=False)
# Compile the model
model.compile(optimizer=adamOwn, loss='SparseCategoricalCrossentropy', metrics=['accuracy'])
# Print the model summary

print(model.summary())

In [None]:
# Train the model
model_log = model.fit(X_tr, Y_tr, validation_data=(X_val, Y_val), batch_size=32, epochs=50, verbose=1)

In [None]:
# Plot the model log
plt.figure()
plt.plot(model_log.history['loss'])
plt.plot(model_log.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')