# **1. Prepare Data**

## **1.1 Mount Google Drive**

In [1]:
# Mount google drive for files and folder
from google.colab import drive
drive.mount("/content/Drive/", force_remount=True)

Mounted at /content/Drive/


## **1.2 Basic Imports**

In [2]:
# Basic python imports
import os
import shutil
!pip install tabulate --quiet
from tabulate import tabulate

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

## **1.3 Extract the Audio Data**

In [3]:
# Path to dataset zip file
dataset_path = os.path.normpath("/content/Drive/MyDrive/Deep-Audio-Classification/archive.zip")

In [4]:
# Check if data folder already exists
if "data" not in os.listdir("/content/"):
    # Create folder to store the data
    os.makedirs("/content/data/")

    # Extract the file to the data folder
    shutil.unpack_archive(dataset_path, "/content/data/")

## **1.4 View the Audio Composition**

In [5]:
# Base path to dataset
dataset_path = os.path.normpath("/content/data/")

# Table headers
table_headers = ["Folder", "No. of Audio Files"]
table_data = []

# Traverse over the folders
for folder in os.listdir(dataset_path):
    # Update the list with folder and audio count
    table_data.append([os.path.join(dataset_path, folder), len(os.listdir(os.path.join(dataset_path, folder)))])

# Print the table
print(tabulate(table_data, table_headers, tablefmt="grid"))

+---------------------------------------------+----------------------+
| Folder                                      |   No. of Audio Files |
| /content/data/Parsed_Not_Capuchinbird_Clips |                  593 |
+---------------------------------------------+----------------------+
| /content/data/Forest Recordings             |                  100 |
+---------------------------------------------+----------------------+
| /content/data/Parsed_Capuchinbird_Clips     |                  217 |
+---------------------------------------------+----------------------+


# **2. Install and Import Dependencies**

## **2.1 Install Dependencies**

In [6]:
# Install packages
!pip uninstall tensorflow --quiet --yes
!pip uninstall tensorflow-io --quiet --yes

!pip install pip install tensorflow==2.10.0 tensorflow-io==0.27.0 --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m578.0/578.0 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m25.0/25.0 MB[0m [31m60.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m83.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m69.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.9/5.9 MB[0m [31m82.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m438.7/438.7 kB[0m [31m47.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m96.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

## **2.2 Load Dependencies**

In [7]:
# Import requried packages
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio

# **3. Load Model**

## **3.1 Load Pretrained Model**

In [8]:
# Load the model
model = tf.keras.models.load_model("/content/Drive/MyDrive/Deep-Audio-Classification/model_05-0.90.h5")

# View model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 1739, 255, 32)     320       
                                                                 
 batch_normalization (BatchN  (None, 1739, 255, 32)    128       
 ormalization)                                                   
                                                                 
 spatial_dropout2d (SpatialD  (None, 1739, 255, 32)    0         
 ropout2D)                                                       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 869, 127, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 867, 125, 64)      18496     
                                                        

# **4. Build Forest Parsing Functions**

## **4.1 Load Up MP3s**

In [9]:
# Function to load the MP3 audio files
def load_mp3_16k_mono(filename):
    # Load the audio file
    res = tfio.audio.AudioIOTensor(filename)

    # Convert to tensor and combine channels
    tensor =  tf.math.reduce_sum(res.to_tensor(), axis=1) / 2

    # Extract sample rate and cast
    sample_rate = tf.cast(res.rate, dtype=tf.int64)

    # Resample to 16k hz
    wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=16000)

    # Return the wav
    return wav

In [10]:
# Load a sample file using the function
wav = load_mp3_16k_mono("/content/data/Forest Recordings/recording_00.mp3")

# View the data
wav

<tf.Tensor: shape=(2880666,), dtype=float32, numpy=
array([ 8.1433272e-12, -5.7019250e-12, -5.3486417e-12, ...,
       -1.1291276e-02, -1.4230422e-02, -3.0555837e-03], dtype=float32)>

## **4.2 Slice the Sample Audio**

In [11]:
# Slice the audio file into multiple segments
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=56000, sequence_stride=56000, batch_size=1)

# Extract the sample from from the slices
sample, idx = audio_slices.as_numpy_iterator().next()

In [12]:
# View the shape of audio and number of audio slices
sample.shape, len(audio_slices)

((1, 56000), 51)

## **4.3 Build Function to Convert Clips into Windowed Spectrograms**

In [13]:
# Function to preprocess and get the spectrogram
def preprocess_mp3(sample, idx):
    # Get the sample
    sample = sample[0]

    # Add the zero padding
    zero_padding = tf.zeros([56000] - tf.shape(sample), dtype=tf.float32)

    # Get the padded wave
    wav = tf.concat([zero_padding, sample], 0)

    # Get the spectrogram
    spectrogram = tf.expand_dims(tf.abs(tf.signal.stft(wav, frame_length=320, frame_step=32)), axis=2)

    # Return the spectrogram
    return spectrogram

## **4.4 Convert Longer Clips into Windows**

In [14]:
# Slice the audio file into multiple segments
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=56000, sequence_stride=56000, batch_size=1)

# Map the audio slices to the function
audio_slices = audio_slices.map(preprocess_mp3)

# Batch the slices
audio_slices = audio_slices.batch(64)

## **5. Prediction on Sample**

## **5.1 Make Predictions on Sample**

In [15]:
# Imports
import numpy as np

# Get the predictions
pred_labels = model.predict(audio_slices)



In [16]:
# Round the predictions
pred_labels = np.round(pred_labels.flatten())

# # Round the predictions with increased confidence
# pred_labels = np.where(pred_labels.flatten() > 0.8, 1, 0)

In [17]:
# View the predicted labels
len(pred_labels), pred_labels

(51,
 array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       dtype=float32))

In [18]:
# Get the sum to count the number of times the bird sound was found
np.sum(pred_labels)

5.0

## **5.2 Group Consecutive Detections**

In [19]:
# Imports
from itertools import groupby

# Apply the function to predictions
pred_labels = [key for key, group in groupby(pred_labels)]

# View the grouped results
pred_labels

[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]

In [20]:
# Get the sum to get the final number of calls
np.sum(pred_labels)

5.0

# **6. Prediction on All Forest Recordings**

## **6.1 Get the Number of Calls in Each Recording**

In [21]:
# Import
from tqdm import tqdm_notebook

# List to store the results
results = []

# Traverse over the folder for files
for file in tqdm_notebook(os.listdir("/content/data/Forest Recordings")):
    # Get the path to file
    file_path = os.path.join("/content/data/Forest Recordings", file)

    # Get the wave for the file
    wav = load_mp3_16k_mono(file_path)

    # Get the audio slices
    audio_slices = tf.keras.utils.timeseries_dataset_from_array(
        wav, wav,
        sequence_length=56000,
        sequence_stride=56000,
        batch_size=1
    ).map(preprocess_mp3).batch(64)

    # Get the predictions
    pred_labels = model.predict(audio_slices)

    # Round the predictions
    pred_labels = np.round(pred_labels.flatten())

    # Group consecutive calls
    pred_labels = [key for key, group in groupby(pred_labels)]

    # Get the sum to get the final number of calls
    total_calls = np.sum(pred_labels)

    # Add data to file
    results.append((file, total_calls))

  0%|          | 0/100 [00:00<?, ?it/s]











## **6.3 Conver the List to Pandas Dataframe**

In [24]:
# Imports
import pandas as pd

# Create pandas dataframe
result_df = pd.DataFrame(results, columns=["recording", "capuchin_calls"])

In [25]:
# View the dataframe
result_df

Unnamed: 0,recording,capuchin_calls
0,recording_17.mp3,3.0
1,recording_55.mp3,0.0
2,recording_74.mp3,3.0
3,recording_66.mp3,0.0
4,recording_79.mp3,0.0
...,...,...
95,recording_61.mp3,2.0
96,recording_50.mp3,0.0
97,recording_38.mp3,1.0
98,recording_58.mp3,0.0


In [27]:
# Sort the data by recordings column and reset index
result_df = result_df.sort_values(by=["recording"]).reset_index(drop=True)

# View the dataframe
result_df

Unnamed: 0,recording,capuchin_calls
0,recording_00.mp3,5.0
1,recording_01.mp3,0.0
2,recording_02.mp3,0.0
3,recording_03.mp3,0.0
4,recording_04.mp3,4.0
...,...,...
95,recording_95.mp3,4.0
96,recording_96.mp3,1.0
97,recording_97.mp3,3.0
98,recording_98.mp3,20.0


In [28]:
# Export to csv file
result_df.to_csv("capuchinbird_results.csv", index=False)

In [29]:
# Copy the file to drive
shutil.copy("capuchinbird_results.csv", "/content/Drive/MyDrive/Deep-Audio-Classification/")

'/content/Drive/MyDrive/Deep-Audio-Classification/capuchinbird_results.csv'