# Library Imports

### File Directory Libraries

In [None]:
import os

### Math Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt

### Data Pre-Processing Libraries

In [None]:
import pandas as pd
import librosa
import soundfile
import re
import cv2
from array import array
from sklearn.preprocessing import LabelBinarizer

### Visualization Libraries

In [None]:
import IPython.display as ipd

### Deep Learning Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras import Input, layers, optimizers, backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

### Configuration of Imported Libraries

In [None]:
%matplotlib inline

# Initialization of Variables

In [None]:
GUNSHOT_FREQUENCY_THESHOLD = 0.25
SAMPLE_RATE_PER_SECOND = 22050
SAMPLE_RATE_PER_TWO_SECONDS = 44100
SOUND_FILE_ID = 0
BASE_DIRECTORY = "/home/alexm/Datasets/"
DATA_DIRECTORY = BASE_DIRECTORY + "REU_Samples_and_Labels/"
SOUND_DATA_DIRECTORY = DATA_DIRECTORY + "Samples/"
samples = []
labels = []
sound_file_names = []
sample_weights = []

# Data Pre-Processing

## Reading in the CSV file of descriptors for many kinds of sounds

In [None]:
sound_types = pd.read_csv(DATA_DIRECTORY + "labels.csv")

## Reading in all of the sound data WAV files

In [None]:
print("...Parsing sound data...")

for file in os.listdir(SOUND_DATA_DIRECTORY):
    if file.endswith(".wav"):
        try:
            # Adding 2 second-long samples to the list of samples
            SOUND_FILE_ID = int(re.search(r'\d+', file).group())
            sample, sample_rate = librosa.load(SOUND_DATA_DIRECTORY + file)
            prescribed_label = sound_types.loc[sound_types["ID"] == SOUND_FILE_ID, "Class"].values[0]
            
            if len(sample) <= SAMPLE_RATE_PER_TWO_SECONDS:
                label = 1
                number_of_missing_hertz = SAMPLE_RATE_PER_TWO_SECONDS - len(sample)
                padded_sample = np.array(sample.tolist() + [0 for i in range(number_of_missing_hertz)])
                
                if prescribed_label != "gun_shot":
                    label = 0

                samples.append(padded_sample)
                labels.append(label)
                sound_file_names.append(file)
                
            else:
                for i in range(0, sample.size - SAMPLE_RATE_PER_TWO_SECONDS, SAMPLE_RATE_PER_TWO_SECONDS):
                    label = 1
                    sample_slice = sample[i : i + SAMPLE_RATE_PER_TWO_SECONDS]
                    
                    if prescribed_label != "gun_shot":
                        label = 0
                        
                    elif np.max(abs(sample_slice)) < GUNSHOT_FREQUENCY_THESHOLD:
                        label = 0

                    samples.append(sample_slice)
                    labels.append(label)
                    sound_file_names.append(file)

        except:
            sample, sample_rate = soundfile.read(SOUND_DATA_DIRECTORY + file)
            print("Sound(s) not recognized by Librosa:", file)
            pass

print("The number of samples available for training is currently " + str(len(samples)) + '.')
print("The number of labels available for training is currently " + str(len(labels)) + '.')

## Caching NumPy arrays as NumPy files

In [None]:
np.save(BASE_DIRECTORY + "gunshot_sound_samples.npy", samples)
np.save(BASE_DIRECTORY + "gunshot_sound_labels.npy", labels)
np.save(BASE_DIRECTORY + "gunshot_sound_file_names.npy", sound_file_names)

## Loading NumPy files as NumPy arrays

In [None]:
samples = np.load(BASE_DIRECTORY + "gunshot_sound_samples.npy")
labels = np.load(BASE_DIRECTORY + "gunshot_sound_labels.npy")
sound_file_names = np.load(BASE_DIRECTORY + "gunshot_sound_file_names.npy")

## Data augmentation functions

In [None]:
def time_shift(wav):
    start_ = int(np.random.uniform(-7000, 7000))
    if start_ >= 0:
        wav_time_shift = np.r_[wav[start_:], np.random.uniform(-0.001, 0.001, start_)]
    else:
        wav_time_shift = np.r_[np.random.uniform(-0.001, 0.001, -start_), wav[:start_]]
    return wav_time_shift
    
def change_pitch(wav, sample_rate):
    magnitude = (np.random.uniform(-0.1, 0.1))
    wav_pitch_change = librosa.effects.pitch_shift(wav, sample_rate, magnitude)
    return wav_pitch_change
    
def speed_change(wav):
    speed_rate = np.random.uniform(0.7, 1.3)
    wav_speed_tune = cv2.resize(wav, (1, int(len(wav) * speed_rate))).squeeze()
    
    if len(wav_speed_tune) < len(wav):
        pad_len = len(wav) - len(wav_speed_tune)
        wav_speed_tune = np.r_[np.random.uniform(-0.0001, 0.0001, int(pad_len / 2)),
                               wav_speed_tune,
                               np.random.uniform(-0.0001, 0.0001, int(np.ceil(pad_len / 2)))]
    else: 
        cut_len = len(wav_speed_tune) - len(wav)
        wav_speed_tune = wav_speed_tune[int(cut_len / 2) : int(cut_len / 2) + len(wav)]
    return wav_speed_tune
    
def change_volume(wav, magnitude):
    # 0 < x < 1 quieter; x = 1 identity; x > 1 louder
    wav_volume_change = np.multiply(np.array([magnitude]), wav)
    return wav_volume_change
    
def add_background(wav, file, data_directory, label_to_avoid):
    label_csv = data_directory + "labels.csv"
    sound_types = pd.read_csv(label_csv)
    sound_directory = data_directory + "Samples/"
    bg_files = os.listdir(sound_directory)
    bg_files.remove(file)
    chosen_bg_file = bg_files[np.random.randint(len(bg_files))]
    jndex = int(chosen_bg_file.split('.')[0])
    while sound_types.loc[sound_types["ID"] == jndex, "Class"].values[0] == label_to_avoid:
        chosen_bg_file = bg_files[np.random.randint(len(bg_files))]
        jndex = int(chosen_bg_file.split('.')[0])
    bg, sr = librosa.load(sound_directory + chosen_bg_file)
    ceil = max((bg.shape[0] - wav.shape[0]), 1)
    start_ = np.random.randint(ceil)
    bg_slice = bg[start_ : start_ + wav.shape[0]]
    if bg_slice.shape[0] < wav.shape[0]:
        pad_len = wav.shape[0] - bg_slice.shape[0]
        bg_slice = np.r_[np.random.uniform(-0.001, 0.001, int(pad_len / 2)), bg_slice, np.random.uniform(-0.001, 0.001, int(np.ceil(pad_len / 2)))]
    wav_with_bg = wav * np.random.uniform(0.8, 1.2) + bg_slice * np.random.uniform(0, 0.5)
    return wav_with_bg

## Augmenting data (i.e. time shifting, speed changing, etc.)

In [None]:
samples = np.array(samples)
labels = np.array(labels)
number_of_augmentations = 5
augmented_samples = np.zeros((samples.shape[0] * (number_of_augmentations + 1), samples.shape[1]))
augmented_labels = np.zeros((labels.shape[0] * (number_of_augmentations + 1),))
augmented_sound_file_names = []
j = 0

for i in range (0, len(augmented_samples), (number_of_augmentations + 1)):
    file = sound_file_names[j]
    
    augmented_samples[i,:] = samples[j,:]
    augmented_samples[i + 1,:] = time_shift(samples[j,:])
    augmented_samples[i + 2,:] = change_pitch(samples[j,:], SAMPLE_RATE_PER_SECOND)
    augmented_samples[i + 3,:] = speed_change(samples[j,:])
    augmented_samples[i + 4,:] = change_volume(samples[j,:], np.random.uniform())
    if labels[j] == 1:
        augmented_samples[i + 5,:] = add_background(samples[j,:], file, DATA_DIRECTORY, "") 
    else:
        augmented_samples[i + 5,:] = add_background(samples[j,:], file, DATA_DIRECTORY, "gun_shot")
    
    augmented_labels[i] = labels[j]
    augmented_labels[i + 1] = labels[j]
    augmented_labels[i + 2] = labels[j]
    augmented_labels[i + 3] = labels[j]
    augmented_labels[i + 4] = labels[j]
    augmented_labels[i + 5] = labels[j]
    
    augmented_sound_file_names.append(file)
    augmented_sound_file_names.append(file)
    augmented_sound_file_names.append(file)
    augmented_sound_file_names.append(file)
    augmented_sound_file_names.append(file)
    augmented_sound_file_names.append(file)
    
    j += 1

samples = augmented_samples
labels = augmented_labels
sound_file_names = np.array(augmented_sound_file_names)


print("The number of samples available for training is currently " + str(len(samples)) + '.')
print("The number of labels available for training is currently " + str(len(labels)) + '.')

## Saving augmented NumPy arrays as NumPy files

In [None]:
np.save(BASE_DIRECTORY + "gunshot_augmented_sound_samples.npy", samples)
np.save(BASE_DIRECTORY + "gunshot_augmented_sound_labels.npy", labels)
np.save(BASE_DIRECTORY + "gunshot_augmented_sound_file_names.npy", sound_file_names)

## Loading augmented NumPy files as NumPy arrays

In [None]:
samples = np.load(BASE_DIRECTORY + "gunshot_augmented_sound_samples.npy")
labels = np.load(BASE_DIRECTORY + "gunshot_augmented_sound_labels.npy")
# sound_file_names = np.load(BASE_DIRECTORY + "gunshot_augmented_sound_file_names.npy")

## Instantiating a sample weights NumPy array

In [None]:
sample_weights = np.array([1 for normally_recorded_sample in range(len(samples) - 660)] + [20 for raspberry_pi_recorded_sample in range(660)])
print("Shape of samples weights before splitting:", sample_weights.shape)

### Optional debugging after processing the data

In [None]:
i = 0  # You can change the value of 'i' to adjust which sample is being inspected.
sample = samples[i]
print("The number of samples available to the model for training is " + str(len(samples)) + '.')
print("The maximum frequency value in sample slice #" + str(i) + " is " + str(np.max(abs(sample))) + '.')
print("The label associated with sample slice #" + str(i) + " is " + str(labels[i]) + '.')
ipd.Audio(sample, rate = SAMPLE_RATE_PER_SECOND)

## Establishing index values for the data

In [None]:
all_index = np.arange(len(samples))
train_index = np.load("../../../raspberry_pi/indexes/training_set_indexes.npy")
test_index = np.load("../../../raspberry_pi/indexes/testing_set_indexes.npy")
valid_index = np.delete(all_index, list(train_index) + list(test_index))

## Restructuring the label data

In [None]:
labels = np.array([("gun_shot" if label == 1 else "other") for label in labels])
label_binarizer = LabelBinarizer()
labels = label_binarizer.fit_transform(labels)
labels = np.hstack((labels, 1 - labels))

### Debugging of the sample and label data's shape (optional)

In [None]:
print("Shape of samples array:", samples.shape)
print("Shape of labels array:", labels.shape)

## Arranging the data

In [None]:
train_wav, test_wav, valid_wav = samples[train_index], samples[test_index], samples[valid_index]
train_label, test_label, valid_label = labels[train_index], labels[test_index], labels[valid_index]
train_weights, test_weights, valid_weights = sample_weights[train_index], sample_weights[test_index], sample_weights[valid_index]

## Reshaping the sound data

In [None]:
train_wav = train_wav.reshape(-1, SAMPLE_RATE_PER_TWO_SECONDS, 1)
test_wav = test_wav.reshape(-1, SAMPLE_RATE_PER_TWO_SECONDS, 1)

# Model

## Loading previous model

In [None]:
model = load_model(BASE_DIRECTORY + "gunshot_sound_model.h5")

## ROC (AUC) metric - Uses the import "from tensorflow.keras import backend as K"

In [None]:
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

## Model Parameters

In [None]:
number_of_epochs = 100
batch_size = 32
optimizer = optimizers.Adam(lr = 0.001, decay = 0.001 / 100)
input_tensor = Input(shape = (44100, 1))

## Configuration of GPU for training (optional)

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config = config)
K.set_session(session)

## Model Architecture

In [None]:
# First Layer
x = layers.Conv1D(16, 9, activation = "relu", padding = "same")(input_tensor)
x = layers.Conv1D(16, 9, activation = "relu", padding = "same")(x)
x = layers.MaxPool1D(16)(x)
x = layers.Dropout(rate = 0.25)(x)

# Second Layer
x = layers.Conv1D(32, 3, activation = "relu", padding = "same")(x)
x = layers.Conv1D(32, 3, activation = "relu", padding = "same")(x)
x = layers.MaxPool1D(4)(x)
x = layers.Dropout(rate = 0.25)(x)

# Third Layer
x = layers.Conv1D(32, 3, activation = "relu", padding = "same")(x)
x = layers.Conv1D(32, 3, activation = "relu", padding = "same")(x)
x = layers.MaxPool1D(4)(x)
x = layers.Dropout(rate = 0.25)(x)

# Fourth Layer
x = layers.Conv1D(256, 3, activation = "relu", padding = "same")(x)
x = layers.Conv1D(256, 3, activation = "relu", padding = "same")(x)
x = layers.GlobalMaxPool1D()(x)
x = layers.Dropout(rate = (0.5))(x) # Increasing drop-out rate here to prevent overfitting

x = layers.Dense(64, activation = "relu")(x)
x = layers.Dense(1028, activation = "relu")(x)
output_tensor = layers.Dense(2, activation = "softmax")(x)

model = tf.keras.Model(input_tensor, output_tensor)
model.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = [auc, "accuracy"])

## Configuring model properties

In [None]:
model_filename = BASE_DIRECTORY + "gunshot_sound_model.pkl"

model_callbacks = [
    EarlyStopping(monitor = 'val_acc',
                  patience = 15,
                  verbose = 1,
                  mode = 'max'),
    
    ModelCheckpoint(model_filename, monitor = 'val_acc',
                    verbose = 1,
                    save_best_only = True,
                    mode = 'max'),
]

### Debugging of the model's architecture (optional)

In [None]:
print(model.summary())

## Training & caching the model

In [None]:
History = model.fit(train_wav, train_label, 
          validation_data = [test_wav, test_label],
          epochs = number_of_epochs,
          callbacks = model_callbacks,
          verbose = 1,
          batch_size = batch_size,
          sample_weight = train_weights,
          shuffle = True)

model.save(BASE_DIRECTORY + "gunshot_sound_model.h5")

## Summarizing history for accuracy

In [None]:
plt.plot(History.history['acc'])
plt.plot(History.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc = 'upper left')
plt.show()

## Summarizing history for loss

In [None]:
plt.plot(History.history['loss'])
plt.plot(History.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc = 'upper left')
plt.show()

### Debugging of incorrectly-labeled examples (optional)

In [None]:
y_val_pred = model.predict(valid_wav)
y_predicted_classes_val = y_val_pred.argmax(axis=-1)
y_actual_classes_val = valid_label.argmax(axis=-1)
wrong_examples = np.nonzero(y_predicted_classes_val != y_actual_classes_val)
print("Validation samples labeled incorrectly:", wrong_examples)
print("Validation accuracy of the current model:", 100 - (len(wrong_examples[0]) / len(valid_wav)) * 100)

### Debugging of an individual incorrectly-labeled example (optional)

In [None]:
i = 0
sample = np.reshape(val_wav[i], SAMPLE_RATE_PER_TWO_SECONDS, )
print(y_actual_classes_val[i], y_predicted_classes_val[i])
ipd.Audio(sample, rate = SAMPLE_RATE_PER_SECOND)

### Converting labels back into strings

In [None]:
print(label_binarizer.inverse_transform(labels[:, 0]))

## Converting model to TensorFlow Lite format

In [None]:
model_name = BASE_DIRECTORY + "gunshot_sound_model"
converter = tf.lite.TFLiteConverter.from_keras_model_file(model_name + ".h5", custom_objects = {"auc" : auc})
tflite_model = converter.convert()
open(model_name + ".tflite", "wb").write(tflite_model)