In [39]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import keras_tuner
import tensorflow as tf
import matplotlib.pyplot as plt
import os 
import numpy as np
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D
from tensorflow import keras
import seaborn as sns
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support




## Get the data 

In [2]:
# Data from the elephant listening project 
general_path = os.path.join('data', 'Clips')

# To ensure that both classes have same of samples and to increase the number of gunshots, 
# I extracted extra data from: https://data.mendeley.com/datasets/x48cwz364j/3 
background_path = os.path.join('data', 'Sounds_background')
guns_path = os.path.join('data', 'Sounds_gunshots')

gunshot_files = [os.path.join(general_path, 'pnnn*'), os.path.join(general_path, 'ecoguns*'), os.path.join(guns_path, '*\.wav')]

no_gunshot_files = [os.path.join(general_path, 'other*'), os.path.join(background_path, '*\.wav')] 
gunshot = tf.data.Dataset.list_files(gunshot_files) 
no_gunshot = tf.data.Dataset.list_files(no_gunshot_files) 

#to see how many files are in each group: 
#num_elements = tf.data.experimental.cardinality(no_gunshot).numpy()


Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



## 1. Load data and return wave 

In [3]:
def load_data(file_name): 
    file_contents = tf.io.read_file(file_name) #retuns a string 
    wave, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1) # transforms string into actual wav
    wave = wave - tf.reduce_mean(wave) # remove the mean 
    wave = wave / tf.reduce_max(tf.abs(wave)) #normalize 
    wave = tf.squeeze(wave, axis= -1) #removes axis 
    #wave = tf.cast(wave * 32768, tf.float32) # value is scaled to look like int16, however, type is kept as float32 for compatibility issues

    return wave, sample_rate

## 2. Add labels
1: gunshot 
0: no gunshot

In [4]:
gunshot = tf.data.Dataset.zip((gunshot, tf.data.Dataset.from_tensor_slices(tf.ones(len(gunshot)))))
no_gunshot= tf.data.Dataset.zip((no_gunshot, tf.data.Dataset.from_tensor_slices(tf.zeros(len(gunshot)))))

## 3. Concatenate gunshots and no_gunshots into one data set 

In [5]:
data = gunshot.concatenate(no_gunshot)
data.as_numpy_iterator().next() # see how it looks like 

(b'data/Clips/ecoguns953.wav', 1.0)

## 4. Convert data into Spectogram 
Time frequency compromise: 
https://www.tensorflow.org/tutorials/audio/simple_audio <br>
https://www.coursera.org/lecture/audio-signal-processing/stft-2-tjEQe 



In [6]:
def preprocess(file_path, label): 
    # Load data
    wave, sr = load_data(file_path)
    max_lenght = 80000 # = 10* 8000, this means 10 seconds 

    # Padding 
    wave = wave[:max_lenght] #grab first elements up to max(lengths)
    zero_padding = tf.zeros(max_lenght - tf.shape(wave), dtype=tf.float32) # pad with zeros what doesn't meet full length 
    wave = tf.concat([zero_padding, wave],0) 

    # Create spectogram 
    # 1. Fast fourier transform 
    spectrogram = tf.signal.stft(wave, frame_length=256, frame_step=128)  # Paper: 'Automated detection of gunshots in tropical forests using CNN' 
    # frame_length =  window length in samples
    # frame_step = number of samples to step
    # 'Time frequency compromise' 
    # if window size is small: you get good time resolution in exchange of poor frequency resolution 

    # 2. Obtain the magnitude of the STFT
    spectrogram = tf.abs(spectrogram)

    # 3. Tranform it into appropiate format for deep learning model by adding the channel dimension (in this case 1)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label


## 5. Shuffle the data such that not all gunshots are followed by gunshots, and similarly with no gunshots. 

In [7]:
data = data.map(preprocess) # calling preprocess method which generates spectograms
data = data.cache()
data = data.shuffle(buffer_size=1000) # mixing training samples 1000 at the time  

## 6. Extract samples and labels 

In [8]:
iterator = data.as_numpy_iterator()
x = []
y = []
while True:
    try: 
        x_temp, y_temp = iterator.next()
        x.append(x_temp)
        y.append(y_temp)
    except Exception:
        break 

x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.30, random_state=123)


2023-05-03 17:20:08.238979: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


## 7. Build model with hyperparameter tunning 
https://keras.io/guides/keras_tuner/getting_started/ <br>
https://www.youtube.com/watch?v=6Nf1x7qThR8&ab_channel=GregHogg 

In [10]:
def build_model(hp):
    input = (624, 129,1)
    model = keras.Sequential()
    
    # Add input layer 
    #matching samples.shape
    model.add(
        Conv2D(
            filters= hp.Int("conv_filters_0", min_value=16, max_value=128, step=16), 
            activation= hp.Choice("conv_activation_0", ["relu", "tanh"]),
            kernel_size = (3,3), 
            input_shape=input
        )
    ) 
    model.add(MaxPool2D(pool_size= (2,2)))

    # Tune the number of Conv layers 
    for i in range(hp.Int("num_conv_layers", 1, 3)):
        model.add(
            Sequential([
                layers.Conv2D(
                    filters=hp.Int(f"conv_filters_{i}", min_value=16, max_value=128, step=16),
                    activation=hp.Choice(f"conv_activation_{i}", ["relu", "tanh"]),
                    kernel_size=(3,3),
                ), 
                layers.MaxPool2D(pool_size=(2,2)),
            ])
        )

    model.add(layers.Flatten())

    # Tune the number of Dense layers and Tune whether to use dropout layer
    for i in range(hp.Int("num_dense_layers", 1, 3)):
            model.add(
                Sequential([
                    layers.Dense(
                        # Tune number of units separately.
                        units=hp.Int(f"dense_units_{i}", min_value=50, max_value=600, step=50),
                        activation=hp.Choice(f"dense_activation_{i}", ["relu", "tanh"]),
                    ), 
                    layers.Dropout(
                        rate=hp.Float(f"dense_dropout_{i}", min_value = 0, max_value = 1)
                    )
                ]) 
            )

    model.add(
        layers.Dense(
        units=1, #because we have 2 classes 
        activation=hp.Choice("activatio_last_layer", ["softmax", "sigmoid"]), 
        )
    )

    # Define the optimizer learning rate as a hyperparameter.
    # sampling="log", the step is multiplied between samples.
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-1, sampling="log")
    model.compile(
        optimizer=keras.optimizers.SGD(learning_rate=learning_rate), 
        loss="BinaryCrossentropy", 
        metrics=["accuracy"],
    )
    
    return model

In [11]:
build_model(keras_tuner.HyperParameters())



<keras.engine.sequential.Sequential at 0x171248760>

### Initialize tuner by specifying different arguments 

In [12]:
tuner = keras_tuner.BayesianOptimization(
    hypermodel=build_model,
    objective="val_accuracy", # we want maximize accuracy 
    max_trials= 10, #10 is default
    overwrite=True,
    directory="param_optimization",
    project_name="first_try",
)




In [10]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor= 'val_loss', patience=4) 
# patience refers to number of epochs: if the val loss is not improving fter 4 ephocs, we stop it. 

### During the search, the model is called with different hyperparameters 

In [14]:
tuner.search_space_summary()
# Default search space size: number of hyper parameters that we are tunning 

Search space summary
Default search space size: 9
conv_filters_0 (Int)
{'default': None, 'conditions': [], 'min_value': 16, 'max_value': 128, 'step': 16, 'sampling': 'linear'}
conv_activation_0 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
num_conv_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
num_dense_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
dense_units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 50, 'max_value': 600, 'step': 50, 'sampling': 'linear'}
dense_activation_0 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
dense_dropout_0 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 1.0, 'step': None, 'sampling': 'linear'}
activatio_last_layer (Choice)
{'default': 'softmax', 'conditions': [], 'values': ['softmax', 'sigm

In [16]:
tuner.search(np.stack(x_train), np.stack(y_train), epochs=20, validation_data=(np.stack(x_valid), np.stack(y_valid)), callbacks=[stop_early]) #similar to fit 

Trial 10 Complete [00h 02m 38s]
val_accuracy: 0.868297278881073

Best val_accuracy So Far: 0.940733790397644
Total elapsed time: 01h 27m 41s
INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


In [17]:
tuner.results_summary()

Results summary
Results in param_optimization/first_try
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 01 summary
Hyperparameters:
conv_filters_0: 96
conv_activation_0: tanh
num_conv_layers: 3
num_dense_layers: 2
dense_units_0: 100
dense_activation_0: relu
dense_dropout_0: 0.6282929702246306
activatio_last_layer: sigmoid
learning_rate: 0.02661877777328162
conv_filters_1: 16
conv_activation_1: relu
conv_filters_2: 16
conv_activation_2: relu
dense_units_1: 50
dense_activation_1: relu
dense_dropout_1: 0.0
Score: 0.940733790397644

Trial 07 summary
Hyperparameters:
conv_filters_0: 96
conv_activation_0: tanh
num_conv_layers: 1
num_dense_layers: 3
dense_units_0: 450
dense_activation_0: relu
dense_dropout_0: 0.7524207305138727
activatio_last_layer: sigmoid
learning_rate: 0.0006059003024667506
conv_filters_1: 96
conv_activation_1: relu
conv_filters_2: 64
conv_activation_2: relu
dense_units_1: 150
dense_activation_1: tanh
dense_dropout_1: 0.687365422259995
dense_u

### After all of that we don't have a model yet but rather a set of hyper parameters. Let's query the results and create a model:

In [18]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)

{'conv_filters_0': 96, 'conv_activation_0': 'tanh', 'num_conv_layers': 3, 'num_dense_layers': 2, 'dense_units_0': 100, 'dense_activation_0': 'relu', 'dense_dropout_0': 0.6282929702246306, 'activatio_last_layer': 'sigmoid', 'learning_rate': 0.02661877777328162, 'conv_filters_1': 16, 'conv_activation_1': 'relu', 'conv_filters_2': 16, 'conv_activation_2': 'relu', 'dense_units_1': 50, 'dense_activation_1': 'relu', 'dense_dropout_1': 0.0}


In [19]:
model = tuner.hypermodel.build(best_hps) #saving model 



In [21]:
location = 'data/models'
model.save(location)



INFO:tensorflow:Assets written to: data/models/assets


INFO:tensorflow:Assets written to: data/models/assets


In [None]:
# Not used: 
# Get the best model
best_model = tuner.get_best_models()[0]

# Build the model.
# Needed for `Sequential` without specified `input_shape`.
input = (624, 129, 1) 


best_model.build(input_shape=input)
best_model.summary()

# 8. Evaluate model performance 

In [27]:
# evaluate the model on a validation set
loss, accuracy = model.evaluate(np.stack(x_valid), np.stack(y_valid))

# print the evaluation results
print(f'Validation loss: {loss:.4f}')
print(f'Validation accuracy: {accuracy:.4f}')

Validation loss: 0.6930
Validation accuracy: 0.4570


In [31]:
from sklearn.metrics import accuracy_score
predictions = model.predict(np.stack(x_valid))


[[0.4972491 ]
 [0.49556306]
 [0.48432872]
 ...
 [0.49068755]
 [0.5039698 ]
 [0.49622053]]


In [35]:

y_pred = [1 if prediction > 0.5 else 0 for prediction in predictions]
accuracy = accuracy_score(np.stack(y_valid), y_pred)
accuracy



0.4524929444967074

In [36]:
from sklearn.metrics import precision_score, recall_score, f1_score
precision = precision_score(np.stack(y_valid), y_pred)
recall = recall_score(np.stack(y_valid), y_pred)
f1 = f1_score(np.stack(y_valid), y_pred)

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')

Precision: 0.4118
Recall: 0.2097
F1-score: 0.2779
