In [None]:
# Written by Gabriel Sarch
# gabrielsarch@gmail.com
#
# Last edited: 4/20/2020

In [1]:
# This file trains the Convolutional Neural Network
## Uses siren, car, and background noises (split into 3-second chunks)


In [None]:
RATE = 22050 #Hz - sample rate of training files

import numpy as np

# paths to numpy array files
# These files contain numpy arrays in format (nSoundBytes x nSamples) 
# nSamples is usually = RATE*seconds
# see generateTrainingData
sirenFile = r'C:\Users\Gabe\Documents\SeniorDesign\Data\Senior Design data training\sirenNoise.npy'
backFile = r'C:\Users\Gabe\Documents\SeniorDesign\Data\Senior Design data training\backNoise.npy'
carFile = r'C:\Users\Gabe\Documents\SeniorDesign\Data\Senior Design data training\carNoise.npy'

In [1]:
# Load in numpy arrays containing data & splits data up into training and validation sets (use validation for ROC curves)

sirenNoise = np.load(sirenFile)
backNoise = np.load(backFile)
carNoise = np.load(carFile)

print('Array shapes before:')
print(sirenNoise.shape)
print(backNoise.shape)
print(carNoise.shape)
print(' ')

percentHeldOut = 0.1

perm = np.random.permutation(len(sirenNoise))[0:round(percentHeldOut*sirenNoise.shape[0])]
sirenNoiseVal = sirenNoise[perm]
sirenNoise = np.delete(sirenNoise, perm, axis=0)

perm = np.random.permutation(len(backNoise))[0:round(percentHeldOut*backNoise.shape[0])]
backNoiseVal = backNoise[perm]
backNoise = np.delete(backNoise, perm, axis=0)

perm = np.random.permutation(len(carNoise))[0:round(percentHeldOut*carNoise.shape[0])]
carNoiseVal = carNoise[perm]
carNoise = np.delete(carNoise, perm, axis=0)

print('Array shape after (valid, train):')
print(sirenNoiseVal.shape)
print(sirenNoise.shape)
print(backNoiseVal.shape)
print(backNoise.shape)
print(carNoiseVal.shape)
print(carNoise.shape)

Array shapes before:
(1839, 66150)
(6566, 66150)
(613, 66150)
 
Array shape after (valid, train):
(184, 66150)
(1655, 66150)
(657, 66150)
(5909, 66150)
(61, 66150)
(552, 66150)


In [2]:
from scipy import signal
import pandas as pd
import os
import librosa

sos = signal.butter(5, [50, 5000], 'bandpass', fs=RATE, output='sos')

def get_mfccs(audio):
   
    try:
        audio = 2*((audio-min(audio))/(max(audio)-min(audio)))-1

        #Filter
        audio = signal.sosfilt(sos, audio)
        
        mfccs = librosa.feature.mfcc(y=audio, sr=RATE, n_mfcc=40)

    except Exception as e:
            print("Error encountered for a file")
            return None

    return mfccs

In [3]:
## Fit #1

from keras.callbacks import ModelCheckpoint 
from datetime import datetime 
import math
import random
from sklearn.model_selection import train_test_split 

# Parameters
num_samples = 999 # number of samples used for training from each array
num_samp_noise = math.floor(num_samples/3)*4 # need more noise samples than siren samples

Db5gain = 1.778279 # gain for 5dB increase



def generateData(carNoise, backNoise, sirenNoise):
# This function 
# 1) combines siren and noise data to create 2 groups: siren present group, siren not present group
# siren group: siren + car, siren + environment, siren + car + environmental (4 signal to noise ratios for each group)
# no siren group: car, environment, car + environmental 
# Then
# 2) Normalizes the audio and extracts


    start = datetime.now()

    # get desired number of samples from each array
    if len(carNoise) < num_samp_noise:
        extra = num_samp_noise - len(carNoise)
        perm = np.random.permutation(len(carNoise))
        carX = np.concatenate((carNoise[perm], carNoise[perm[0:extra]]))
        if len(carX) < num_samp_noise:
            extra = num_samp_noise - len(carX)
            perm = np.random.permutation(len(carX))
            carX = np.concatenate((carX[perm], carX[perm[0:extra]]))
    else:
        perm = np.random.permutation(len(carNoise))[0:num_samp_noise]
        carX = carNoise[perm]

    if len(backNoise) < num_samp_noise:
        extra = num_samp_noise - len(backNoise)
        perm = np.random.permutation(len(backNoise))
        envX = np.concatenate((backNoise[perm], backNoise[perm[0:extra]]))
    else:
        perm = np.random.permutation(len(backNoise))[0:num_samp_noise]
        envX = backNoise[perm]

    if len(sirenNoise) < num_samples:
        extra = num_samples - len(sirenNoise)
        perm = np.random.permutation(len(sirenNoise))
        sirenX = np.concatenate((sirenNoise[perm], sirenNoise[perm[0:extra]]))
    else:
        perm = np.random.permutation(len(sirenNoise))[0:num_samples]
        sirenX = sirenNoise[perm]


    # Add gain to background noise
    splitback1 = math.floor(num_samples/3)

    carX[0:splitback1] = carX[0:splitback1]*(1/(2*Db5gain))
    carX[splitback1:splitback1*2] = carX[splitback1:splitback1*2]*(1/Db5gain)
    carX[splitback1*2:splitback1*3] = carX[splitback1*2:splitback1*3]
    carX[splitback1*3:] = carX[splitback1*3:]*Db5gain

    envX[0:splitback1] = envX[0:splitback1]*(1/(2*Db5gain))
    envX[splitback1:splitback1*2] = envX[splitback1:splitback1*2]*(1/Db5gain)
    envX[splitback1*2:splitback1*3] = envX[splitback1*2:splitback1*3]
    envX[splitback1*3:] = envX[splitback1*3:]*Db5gain

    #sirenX[0:splitback1] = sirenX[0:splitback1]
    #sirenX[splitback1:splitback1*2] = envX[splitback1:splitback1*2]*Db5gain
    #sirenX[splitback1*2:splitback1*3] = envX[splitback1*2:splitback1*3]*Db5gain*2
    #sirenX[splitback1*3:] = sirenX[splitback1*3:]*Db5gain*3

    sirencar = sirenX[0:splitback1]+carX[0:splitback1]
    sirenenv = sirenX[splitback1:splitback1*2]+envX[0:splitback1]
    sirencarenv = sirenX[splitback1*2:splitback1*3]+carX[splitback1:splitback1*2]+envX[splitback1:splitback1*2]

    sirenSet = np.concatenate((sirencar, sirenenv, sirencarenv))
    noiseSet = np.concatenate((carX[splitback1*2:splitback1*3], envX[splitback1*2:splitback1*3], carX[splitback1*3:splitback1*4]+envX[splitback1*3:splitback1*4]))            

    print('Finished scrambling data set')
    print('Extracting MFCCs...')

    mfccSiren = []
    for i in range(0,len(sirenSet)):
        cur_audio = sirenSet[i]
        #cur_audio = 2*((cur_audio-min(cur_audio))/(max(cur_audio)-min(cur_audio)))-1
        mfcc = get_mfccs(cur_audio)
        mfccSiren.append(mfcc)
        sirenSet[i] = cur_audio

    mfccNoise = []
    for i in range(0,len(noiseSet)):
        cur_audio = noiseSet[i]
        #cur_audio = 2*((cur_audio-min(cur_audio))/(max(cur_audio)-min(cur_audio)))-1
        mfcc = get_mfccs(cur_audio)
        mfccNoise.append(mfcc)
        noiseSet[i] = cur_audio
    
    # Create labels for the data
    sirenLabels = ['siren']*len(mfccSiren) 
    noiseLabels = ['noise']*len(mfccNoise)

    # Comibine MFCCs and labels 
    featureSiren = list(zip(mfccSiren, sirenLabels))
    featureNoise = list(zip(mfccNoise, noiseLabels))
    features = featureSiren + featureNoise
    random.shuffle(features)
    
    # Convert to Panda dataframe
    featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

    from sklearn.preprocessing import LabelEncoder
    from keras.utils import to_categorical

    # Convert features and corresponding classification labels into numpy arrays
    X = np.array(featuresdf.feature.tolist())
    y = np.array(featuresdf.class_label.tolist())

    # Encode the classification labels
    le = LabelEncoder()
    yy = to_categorical(le.fit_transform(y)) 

    # split the dataset 
    x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2)#, random_state = 42)

    num_rows = 40
    num_columns = 130
    num_channels = 1

    x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
    x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

    duration = datetime.now() - start
    print("Generating training and test data took: ", duration)
    
    return x_train, x_test, y_train, y_test


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [None]:
# Create model architecture

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_rows = 40
num_columns = x_train.shape[2]
num_channels = 1

num_labels = 2 #yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=filter_size, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax')) 

# Compile the model
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam') 


# Summary of model

# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

In [None]:
# Fit model by regenerating training data based on num_fits
# Each iteration of num_fits, a new combination of siren and noise is generated from the array
# adn the model weights are fit with epochs = num_epochs and batch size = num_batch_size

from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

#num_epochs = 12
#num_batch_size = 128

num_fits = 4 # number of times new training data is generated and model trained

# For each fit (i.e. for iterations = num_fits):
num_epochs = 25
num_batch_size = 32

saveModel = 'C:/Users/Gabe/Documents/SeniorDesign/CNNModels/weights.best.basic_cnn.hdf5'
checkpointer = ModelCheckpoint(filepath=saveModel, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)

for i in range(0, num_fits):
    
    print('Generating data for fit #', i+1)

    x_train, x_test, y_train, y_test = generateData(carNoise, backNoise, sirenNoise)
    
    start = datetime.now()

    model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)

    duration = datetime.now() - start
    print("Training completed in time: ", duration)

    # Evaluating the model on the training and testing set
    
    print('Results from fit #', i+1)
    score = model.evaluate(x_train, y_train, verbose=0)
    print("Training Accuracy: ", score[1])

    score = model.evaluate(x_test, y_test, verbose=0)
    print("Testing Accuracy: ", score[1])
    
print('Finished model fitting')