In [1]:
import tensorflow as tf
import os
import numpy as np

from helper_functions import (
    create_spectrogram_features,
    lite_model_from_file_predicts_dataset,
    get_file_size, 
    convert_bytes,
    evaluate_prediction
)

from sklearn.metrics import confusion_matrix



In [2]:
desired_length_of_audio = 144000
sample_rate = 48000

### CNN

##### CNN initial model in keras format

In [3]:
cnn_initial_model_path = 'spectrogram_models_to_test_with_RIOT_ML/cnn_spectrogram_sr_48000.keras'
convert_bytes(get_file_size(cnn_initial_model_path), "KB")

File size: 444.997 Kilobytes


In [4]:
# CNN model predicts all data from directory
directory = 'C:/Users/polin/Bird_song_detection/dataset/testing'

x_data = []
y_data = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
        # Prepare log mel spectrogram from audio
        spectrogram_feature = create_spectrogram_features(audio, desired_length_of_audio, sample_rate = 48000)
                
        x_data.append(spectrogram_feature)
        y_data.append(class_encoded)

cnn_initial_model = tf.keras.models.load_model(cnn_initial_model_path)
# input data should be in numpy array
y_pred_prob = cnn_initial_model.predict(np.array(x_data), verbose=0)
y_pred = tf.argmax(y_pred_prob, axis=1).numpy()

# Evaluate
evaluate_prediction(y_data, y_pred)

Accuracy: 99.43%
Recall: 99.41%
Precision: 98.91%
F1-score: 99.13%


##### CNN tf Lite model (without any additional quantization techniques)

In [5]:
cnn_tflite_model_path = 'spectrogram_models_to_test_with_RIOT_ML/cnn_spectrogram_sr_48000.tflite'
convert_bytes(get_file_size(cnn_tflite_model_path), "KB")

File size: 141.172 Kilobytes


In [6]:
# CNN model predicts all data from directory
directory = 'C:/Users/polin/Bird_song_detection/dataset/testing'

x_data = []
y_data = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
        # Prepare log mel spectrogram from audio
        spectrogram_feature = create_spectrogram_features(audio, desired_length_of_audio, sample_rate = 48000)
                
        x_data.append(spectrogram_feature)
        y_data.append(class_encoded)

y_pred = lite_model_from_file_predicts_dataset(cnn_tflite_model_path, x_data, y_data)
# print("\nTrue: ", y_data)
# print("Predicted: ", y_pred)
if all(x == y for x, y in zip(y_data, y_pred)):
    print("All data points were predicted correctly!")
else:
    print("Prediction was not correct for some points.")

tn, fp, fn, tp = confusion_matrix(y_data, y_pred).ravel()
specificity = tn / (tn+fp)
print(f'Specificity: {specificity * 100:.2f}%')

Accuracy: 99.43%
Recall: 99.41%
Precision: 98.91%
F1-score: 99.13%
Prediction was not correct for some points.
Specificity: 99.47%


##### CNN tf Lite model + Post Training Dynamic range quantization

In [7]:
cnn_tflite_drq_model_path = 'spectrogram_models_to_test_with_RIOT_ML/cnn_spectrogram_sr_48000_dynamic_range_quantization.tflite'
convert_bytes(get_file_size(cnn_tflite_drq_model_path), "KB")

File size: 39.469 Kilobytes


In [8]:
# CNN model predicts all data from directory
directory = 'C:/Users/polin/Bird_song_detection/dataset/testing'

x_data = []
y_data = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
        # Prepare log mel spectrogram from audio
        spectrogram_feature = create_spectrogram_features(audio, desired_length_of_audio, sample_rate = 48000)
                
        x_data.append(spectrogram_feature)
        y_data.append(class_encoded)

y_pred = lite_model_from_file_predicts_dataset(cnn_tflite_drq_model_path, x_data, y_data)
# print("\nTrue: ", y_data)
# print("Predicted: ", y_pred)
if all(x == y for x, y in zip(y_data, y_pred)):
    print("All data points were predicted correctly!")
else:
    print("Prediction was not correct for some points.")

tn, fp, fn, tp = confusion_matrix(y_data, y_pred).ravel()
specificity = tn / (tn+fp)
print(f'Specificity: {specificity * 100:.2f}%')

Accuracy: 99.43%
Recall: 99.41%
Precision: 98.91%
F1-score: 99.13%
Prediction was not correct for some points.
Specificity: 99.47%


### SqueezeNet

##### SqueezeNet initial model in keras format

In [14]:
squeezenet_initial_model_path = 'spectrogram_models_to_test_with_RIOT_ML/squeezenet_spectrogram_sr_48000.keras'
convert_bytes(get_file_size(squeezenet_initial_model_path), "KB")

File size: 8688.45 Kilobytes


In [15]:
# SqueezeNet model predicts all data from directory
directory = 'C:/Users/polin/Bird_song_detection/dataset/testing'

x_data = []
y_data = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
        # Prepare log mel spectrogram from audio
        spectrogram_feature = create_spectrogram_features(audio, desired_length_of_audio, sample_rate = 48000)
                
        x_data.append(spectrogram_feature)
        y_data.append(class_encoded)

squeezenet_initial_model = tf.keras.models.load_model(squeezenet_initial_model_path)
# input data should be in numpy array
y_pred_prob = squeezenet_initial_model.predict(np.array(x_data), verbose=0)
y_pred = tf.argmax(y_pred_prob, axis=1).numpy()

# Evaluate
evaluate_prediction(y_data, y_pred)

Accuracy: 98.06%
Recall: 98.56%
Precision: 94.43%
F1-score: 97.14%


##### SqueezeNet tf Lite model (without any additional quantization techniques)

In [18]:
squeezenet_tflite_model_path = 'spectrogram_models_to_test_with_RIOT_ML/squeezenet_spectrogram_sr_48000.tflite'
convert_bytes(get_file_size(squeezenet_tflite_model_path), "KB")

File size: 2855.73 Kilobytes


In [19]:
directory = 'C:/Users/polin/Bird_song_detection/dataset/testing'

x_data = []
y_data = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
        # Prepare log mel spectrogram from audio
        spectrogram_feature = create_spectrogram_features(audio, desired_length_of_audio, sample_rate = 48000)
                
        x_data.append(spectrogram_feature)
        y_data.append(class_encoded)

y_pred = lite_model_from_file_predicts_dataset(squeezenet_tflite_model_path, x_data, y_data)
# print("\nTrue: ", y_data)
# print("Predicted: ", y_pred)
if all(x == y for x, y in zip(y_data, y_pred)):
    print("All data points were predicted correctly!")
else:
    print("Prediction was not correct for some points.")

tn, fp, fn, tp = confusion_matrix(y_data, y_pred).ravel()
specificity = tn / (tn+fp)
print(f'Specificity: {specificity * 100:.2f}%')

Accuracy: 98.06%
Recall: 98.56%
Precision: 94.43%
F1-score: 97.14%
Prediction was not correct for some points.
Specificity: 97.11%


##### SqueezeNet tf Lite model + Post Training Dynamic range quantization

In [13]:
squeezenet_tflite_drq_model_path = 'spectrogram_models_to_test_with_RIOT_ML/squeezenet_spectrogram_sr_48000_dynamic_range_quantization.tflite'
convert_bytes(get_file_size(squeezenet_tflite_drq_model_path), "KB")

File size: 769.844 Kilobytes


In [13]:
# SqueezeNet model predicts all data from directory
directory = 'C:/Users/polin/Bird_song_detection/dataset/testing'

x_data = []
y_data = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
         # Prepare log mel spectrogram from audio
        spectrogram_feature = create_spectrogram_features(audio, desired_length_of_audio, sample_rate=48000)
                
        x_data.append(spectrogram_feature)
        y_data.append(class_encoded)

y_pred = lite_model_from_file_predicts_dataset(squeezenet_tflite_drq_model_path, x_data, y_data)
# print("\nTrue: ", y_data)
# print("Predicted: ", y_pred)
if all(x == y for x, y in zip(y_data, y_pred)):
    print("All data points were predicted correctly!")
else:
    print("Prediction was not correct for some points.")

tn, fp, fn, tp = confusion_matrix(y_data, y_pred).ravel()
specificity = tn / (tn+fp)
print(f'Specificity: {specificity * 100:.2f}%')

Accuracy: 98.06%
Recall: 98.56%
Precision: 94.43%
F1-score: 97.14%
Prediction was not correct for some points.
Specificity: 97.11%
