In [1]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import keras_tuner
import tensorflow as tf
import matplotlib.pyplot as plt
import os 
import numpy as np
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D
from tensorflow import keras
import seaborn as sns
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split



caused by: ["[Errno 2] The file to load file system plugin from does not exist.: '/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io_plugins.so'"]
caused by: ["dlopen(/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io.so, 0x0006): tried: '/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io.so' (no such file)"]


## Get the data 

In [None]:
# Data from the elephant listening project 
general_path = os.path.join('data', 'Clips')

# To ensure that both classes have same of samples and to increase the number of gunshots, 
# I extracted extra data from: https://data.mendeley.com/datasets/x48cwz364j/3 
background_path = os.path.join('data', 'Sounds_background')
guns_path = os.path.join('data', 'Sounds_gunshots')

gunshot_files = [os.path.join(general_path, 'pnnn*'), os.path.join(general_path, 'ecoguns*'), os.path.join(guns_path, '*\.wav')]

no_gunshot_files = [os.path.join(general_path, 'other*'), os.path.join(background_path, '*\.wav')] 
gunshot = tf.data.Dataset.list_files(gunshot_files) 
no_gunshot = tf.data.Dataset.list_files(no_gunshot_files) 

#to see how many files are in each group: 
#num_elements = tf.data.experimental.cardinality(no_gunshot).numpy()


## 1. Load data and return wave 

In [None]:
def load_data(file_name): 
    file_contents = tf.io.read_file(file_name) #retuns a string 
    wave, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1) # transforms string into actual wav
    wave = wave - tf.reduce_mean(wave) # remove the mean 
    wave = wave / tf.reduce_max(tf.abs(wave)) #normalize 
    wave = tf.squeeze(wave, axis= -1) #removes axis 
    #wave = tf.cast(wave * 32768, tf.float32) # value is scaled to look like int16, however, type is kept as float32 for compatibility issues

    return wave, sample_rate

## 2. Add labels
1: gunshot 
0: no gunshot

In [None]:
gunshot = tf.data.Dataset.zip((gunshot, tf.data.Dataset.from_tensor_slices(tf.ones(len(gunshot)))))
no_gunshot= tf.data.Dataset.zip((no_gunshot, tf.data.Dataset.from_tensor_slices(tf.zeros(len(gunshot)))))

## 3. Concatenate gunshots and no_gunshots into one data set 

In [None]:
data = gunshot.concatenate(no_gunshot)
data.as_numpy_iterator().next() # see how it looks like 

## 4. Convert data into Spectogram 
Time frequency compromise: 
https://www.tensorflow.org/tutorials/audio/simple_audio <br>
https://www.coursera.org/lecture/audio-signal-processing/stft-2-tjEQe 



In [None]:
def preprocess(file_path, label): 
    # Load data
    wave, sr = load_data(file_path)
    max_lenght = 80000 # = 10* 8000, this means 10 seconds 

    # Padding 
    wave = wave[:max_lenght] #grab first elements up to max(lengths)
    zero_padding = tf.zeros(max_lenght - tf.shape(wave), dtype=tf.float32) # pad with zeros what doesn't meet full length 
    wave = tf.concat([zero_padding, wave],0) 

    # Create spectogram 
    # 1. Fast fourier transform 
    spectrogram = tf.signal.stft(wave, frame_length=256, frame_step=128)  # Paper: 'Automated detection of gunshots in tropical forests using CNN' 
    # frame_length =  window length in samples
    # frame_step = number of samples to step
    # 'Time frequency compromise' 
    # if window size is small: you get good time resolution in exchange of poor frequency resolution 

    # 2. Obtain the magnitude of the STFT
    spectrogram = tf.abs(spectrogram)

    # 3. Tranform it into appropiate format for deep learning model by adding the channel dimension (in this case 1)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label


## 5. Shuffle the data such that not all gunshots are followed by gunshots, and similarly with no gunshots. 

In [None]:
data = data.map(preprocess) # calling preprocess method which generates spectograms
data = data.cache()
data = data.shuffle(buffer_size=1000) # mixing training samples 1000 at the time  

## 6. Extract samples and labels 

In [None]:
iterator = data.as_numpy_iterator()
x = []
y = []
while True:
    try: 
        x_temp, y_temp = iterator.next()
        x.append(x_temp)
        y.append(y_temp)
    except Exception:
        break 

x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.30, random_state=123)


## 7. Build model with hyperparameter tunning 
https://keras.io/guides/keras_tuner/getting_started/

In [2]:
def build_model(hp):
    model = keras.Sequential()

    # Tune the number of Conv layers 
    for i in range(hp.Int("num_conv_layers", 1, 3)):
        model.add(
            Sequential([
                layers.Conv2D(
                    filters=hp.Int(f"filters_{i}", min_value=16, max_value=128, step=16),
                    activation=hp.Choice(f"activation_{i}", ["relu", "tanh"]),
                    kernel_size=(3,3),
                ), 
                layers.MaxPool2D(pool_size=(2,2)),
            ])
        )

    model.add(layers.Flatten())

    # Tune the number of Dense layers and Tune whether to use dropout layer
    for i in range(hp.Int("num_dense_layers", 1, 3)):
            model.add(
                Sequential([
                    layers.Dense(
                        # Tune number of units separately.
                        units=hp.Int(f"units_{i}", min_value=50, max_value=600, step=50),
                        activation=hp.Choice(f"activation_{i}", ["relu", "tanh"]),
                    ), 
                    layers.Dropout(
                        rate=hp.Float("dropout", min_value = 0, max_value = 1)
                    )
                ]) 
            )


    model.add(
        layers.Dense(
        units=hp.Choice("units",[1,10]),
        activation=hp.Choice("activation", ["softmax", "sigmoid"])
        )
    )

    # Define the optimizer learning rate as a hyperparameter.
    # sampling="log", the step is multiplied between samples.
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-1, sampling="log")
    model.compile(
        optimizer=keras.optimizers.SGD(learning_rate=learning_rate), 
        loss="BinaryCrossentropy", 
        metrics=["accuracy", "Recall", "Precision"],
    )
    
    return model

In [3]:
build_model(keras_tuner.HyperParameters())



Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



<keras.engine.sequential.Sequential at 0x295f64e80>

### Initialize tuner by specifying different arguments 

In [4]:
tuner = keras_tuner.BayesianOptimization(
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=3, #10,  #default
    executions_per_trial=2,
    overwrite=True,
    directory="param_optimization",
    project_name="first_try",
)

#  executions_per_trial: The purpose of having multiple executions per trial is to reduce results variance and therefore 
# be able to more accurately assess the performance of a model.



### During the search, the model is called with different hyperparameters 

In [5]:
tuner.search_space_summary()

Search space summary
Default search space size: 9
num_conv_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
filters_0 (Int)
{'default': None, 'conditions': [], 'min_value': 16, 'max_value': 128, 'step': 16, 'sampling': 'linear'}
activation_0 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
num_dense_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 50, 'max_value': 600, 'step': 50, 'sampling': 'linear'}
dropout (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 1.0, 'step': None, 'sampling': 'linear'}
units (Choice)
{'default': 1, 'conditions': [], 'values': [1, 10], 'ordered': True}
activation (Choice)
{'default': 'softmax', 'conditions': [], 'values': ['softmax', 'sigmoid'], 'ordered': False}
lr (Float)
{'default': 0.0001, 'condi

In [6]:
from tensorflow import keras
import numpy as np

(x, y), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x[:-10000]
x_valid = x[-10000:]
y_train = y[:-10000]
y_valid = y[-10000:]

x_train = np.expand_dims(x_train, -1).astype("float32") / 255.0
x_valid = np.expand_dims(x_valid, -1).astype("float32") / 255.0
x_test = np.expand_dims(x_test, -1).astype("float32") / 255.0

num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_valid = keras.utils.to_categorical(y_valid, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

I was getting the error: "valueerror: 'logits' and 'labels' must have the same shape, received ((none, 10) vs (none, 1))."
Solution: https://stackoverflow.com/questions/48851558/tensorflow-estimator-valueerror-logits-and-labels-must-have-the-same-shape

y_train2 = np.asarray(y_train).astype('float32').reshape((-1,1)) <br>
y_valid2 = np.asarray(y_valid).astype('float32').reshape((-1,1))

This solved the issue initially, however for some reason the error dissapeared.

In [None]:
y_train = np.asarray(y_train).astype('float32').reshape((-1,1)) 
y_valid = np.asarray(y_valid).astype('float32').reshape((-1,1))

In [None]:
len(y_train)

In [7]:
tuner.search(x_train, y_train, epochs=2, validation_data=(x_valid, y_valid))

Trial 3 Complete [00h 01m 22s]
val_accuracy: 0.9561500549316406

Best val_accuracy So Far: 0.9561500549316406
Total elapsed time: 00h 01m 23s
INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


### Query the results: 

In [17]:
# Get the top 2 models.
print('hello')
best_model = tuner.get_best_models()[0]

# Build the model.
# Needed for `Sequential` without specified `input_shape`.
input = (624, 129, 1) 
input = (28,28,1)

best_model.build(input_shape=input)
best_model.summary()


hello




































































ValueError: Exception encountered when calling layer 'sequential_1' (type Sequential).

Input 0 of layer "conv2d" is incompatible with the layer: expected min_ndim=4, found ndim=3. Full shape received: (28, 28, 1)

Call arguments received by layer 'sequential_1' (type Sequential):
  • inputs=tf.Tensor(shape=(28, 28, 1), dtype=float32)
  • training=None
  • mask=None

## TODO: Not sure where to tune for number of batches? 

## 8. Retrain the model with entire dataset 

## 8. Evaluate it's performance by doing k-Fold Cross Validation and improve the weights from the model 
https://machinelearningmastery.com/evaluate-performance-deep-learning-models-keras/ <br>
https://repository.tudelft.nl/islandora/object/uuid%3A6f4f3def-f8e0-4820-8b4f-75b0254dadcd <br>
https://stackoverflow.com/questions/50997928/typeerror-only-integer-scalar-arrays-can-be-converted-to-a-scalar-index-with-1d


In [None]:
epoch = 40 
batch = 8
splits = 10
input = (624, 129, 1) 
# input matches with the size of data, which can be obtained as: samples,labels = data.as_numpy_iterator().next()

kfold = StratifiedKFold(n_splits=splits, shuffle=True, random_state=123)
acc_scores = []
histories = []
confusion_matrices = []
for train, test in kfold.split(x, y):
    # 1. Create model
    model = Sequential()

    model.add(Conv2D(filters= 32, kernel_size = (3,3), activation='relu', input_shape=input)) #matching samples.shape
    model.add(MaxPool2D(pool_size= (2,2)))

    model.add(Conv2D(filters = 16, kernel_size = (3,3), activation='relu'))
    model.add(MaxPool2D(pool_size= (2,2)))


    model.add(Flatten())

    model.add(Dense(500, activation='relu'))
    model.add(Dense(250, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    # 2. Compile model
    model.compile(loss="BinaryCrossentropy", optimizer=keras.optimizers.legacy.SGD(learning_rate=0.01), metrics = ['accuracy', 'Recall', 'Precision']) 
    
    # 3. Fit the model
    x_train = np.array(x)[train.astype(int)]
    y_train = np.array(y)[train.astype(int)]
    x_test = np.array(x)[test.astype(int)]
    y_test = np.array(y)[test.astype(int)]
    
    hist = model.fit(x_train, y_train, epochs=epoch, batch_size=batch, verbose=0, validation_data = (x_test, y_test))
    
    # Save information about model 
    histories.append(hist)
    
    # Display accuracy of validation set 
    print("%s: %.2f%%" % (model.metrics_names[1], hist.history['val_accuracy'][epoch-1] *100))
    acc_scores.append(hist.history['val_accuracy'][epoch-1] * 100)

    # Store confusion matrix 
    y_pred = model.predict(x_test)
    y_pred = [1 if prediction > 0.5 else 0 for prediction in y_pred]
    confusion_mtx = tf.math.confusion_matrix(y_test, y_pred)
    confusion_matrices.append(confusion_mtx)
 
print("%.2f%% (+/- %.2f%%)" % (np.mean(acc_scores), np.std(acc_scores)))