# 1. Import and Install Dependencies

## 1.1 Install Dependencies

In [1]:
!pip install tensorflow-io



In [2]:
!pip install tensorflow



## 1.2 Load Dependencies

In [3]:
!pip install matplotlib



In [4]:
import os
from matplotlib import pyplot as plt

In [6]:
import tensorflow as tf 
import tensorflow_io as tfio

# 2. Build Data Loading Function

## 2.1 Define Paths to Files

In [7]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
os.chdir('/content/drive/MyDrive/data')

In [None]:
# sub = os.chdir('/content/drive/MyDrive/Rajeev/data/Parsed_Capuchinbird_Clips')

In [None]:
# sub2 = os.chdir('/content/drive/MyDrive/Rajeev/data/Parsed_Not_Capuchinbird_Clips')

In [None]:
dir_list = os.listdir()
print(dir_list)

In [None]:
# sub = os.listdir()
# print(sub)

In [None]:
# sub2 = os.listdir()
# print(sub2)

In [None]:
CAPUCHIN_FILE  = os.path.join('Parsed_Capuchinbird_Clips', 'XC3776-3.wav')

In [None]:
NOT_CAPUCHIN_FILE = os.path.join('Parsed_Not_Capuchinbird_Clips', 'afternoon-birds-song-in-forest-0.wav')

In [None]:
# CAPUCHIN_FILE = os.path.join('data', 'Parsed_Capuchinbird_Clips', 'sub')
# NOT_CAPUCHIN_FILE = os.path.join('data', 'Parsed_Not_Capuchinbird_Clips', 'sub2')

In [None]:
# CAPUCHIN_FILE = os.path.join('dir_list', 'sub', 'XC3776-3.wav')
# NOT_CAPUCHIN_FILE = os.path.join('dir_list', 'sub2', 'afternoon-birds-song-in-forest-0.wav')

## 2.2 Build Dataloading Function

In [None]:
def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels) 
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

## 2.3 Plot Wave

In [None]:
!ls /content/drive/MyDrive/Rajeev/Parsed_Capuchinbird_Clips/

In [None]:
wave = load_wav_16k_mono(CAPUCHIN_FILE )

In [None]:

nwave = load_wav_16k_mono(NOT_CAPUCHIN_FILE)

In [None]:
plt.plot(wave)
plt.plot(nwave)
plt.show()

# 3. Create Tensorflow Dataset

## 3.1 Define Paths to Positive and Negative Data

In [None]:
POS = os.path.join('/content/drive/MyDrive/Rajeev/Parsed_Capuchinbird_Clips', '*.wav')
NEG = os.path.join('/content/drive/MyDrive/Rajeev/Parsed_Not_Capuchinbird_Clips', '*.wav')

In [None]:
# POS = os.path.join('/content/drive/MyDrive/Rajeev/data/Parsed_Capuchinbird_Clips', '*.wav')
# NEG = os.path.join('/content/drive/MyDrive/Rajeev/data/Parsed_Not_Capuchinbird_Clips', '*.wav')


In [None]:
# POS = os.path.join('data', 'Parsed_Capuchinbird_Clips')
# NEG = os.path.join('data', 'Parsed_Not_Capuchinbird_Clips')

## 3.2 Create Tensorflow Datasets

In [None]:
pos = tf.data.Dataset.list_files(POS)
neg = tf.data.Dataset.list_files(NEG)

In [None]:
# pos = tf.data.Dataset.list_files(POS+'\*.wav')
# neg = tf.data.Dataset.list_files(NEG+'\*.wav')

## 3.3 Add labels and Combine Positive and Negative Samples

In [None]:
positives = tf.data.Dataset.zip((pos, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))
negatives = tf.data.Dataset.zip((neg, tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))
data = positives.concatenate(negatives)

# 4. Determine Average Length of a Capuchin Call

## 4.1 Calculate Wave Cycle Length

In [None]:
lengths = []
for file in os.listdir(os.path.join('Parsed_Capuchinbird_Clips')):
    tensor_wave = load_wav_16k_mono(os.path.join('Parsed_Capuchinbird_Clips', file))
    lengths.append(len(tensor_wave))

## 4.2 Calculate Mean, Min and Max

In [None]:
tf.math.reduce_mean(lengths)

In [None]:
tf.math.reduce_min(lengths)

In [None]:
tf.math.reduce_max(lengths)

# 5. Build Preprocessing Function to Convert to Spectrogram

## 5.1 Build Preprocessing Function

In [None]:
def preprocess(file_path, label): 
    wav = load_wav_16k_mono(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

## 5.2 Test Out the Function and Viz the Spectrogram

In [None]:
filepath, label = positives.shuffle(buffer_size=10000).as_numpy_iterator().next()

In [None]:
spectrogram, label = preprocess(filepath, label)

In [None]:
plt.figure(figsize=(30,20))
plt.imshow(tf.transpose(spectrogram)[0])
plt.show()

# 6. Create Training and Testing Partitions

## 6.1 Create a Tensorflow Data Pipeline

In [None]:
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=1000)
data = data.batch(16)#reducing the batch size to 4 first and if it works then to 8
data = data.prefetch(8)

## 6.2 Split into Training and Testing Partitions

In [None]:
train = data.take(36)
test = data.skip(36).take(15)

## 6.3 Test One Batch

In [None]:
samples, labels = train.as_numpy_iterator().next()

In [None]:
samples.shape

# 7. Build Deep Learning Model

## 7.1 Load Tensorflow Dependencies

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten

## 7.2 Build Sequential Model, Compile and View Summary

In [None]:
model = Sequential()
model.add(Conv2D(16, (3,3), activation='relu', input_shape=(1491, 257,1)))
model.add(Conv2D(16, (3,3), activation='relu')) #reduced the complexity of the code, need more RAM to run it!
model.add(Flatten())
model.add(Dense(32, activation='relu')) #reduced the CNN tree density from 128 to 32
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])

In [None]:
model.summary()

## 7.3 Fit Model, View Loss and KPI Plots

In [None]:
hist = model.fit(train, epochs=4, validation_data=test)

In [None]:
plt.title('Loss')
plt.plot(hist.history['loss'], 'r')
plt.plot(hist.history['val_loss'], 'b')
plt.show()

In [None]:
plt.title('Precision')
plt.plot(hist.history['precision'], 'r')
plt.plot(hist.history['val_precision'], 'b')
plt.show()

In [None]:
plt.title('Recall')
plt.plot(hist.history['recall'], 'r')
plt.plot(hist.history['val_recall'], 'b')
plt.show()

# 8. Make a Prediction on a Single Clip

## 8.1 Get One Batch and Make a Prediction

In [None]:
X_test, y_test = test.as_numpy_iterator().next()

In [None]:
yhat = model.predict(X_test)

## 8.2 Convert Logits to Classes 

In [None]:
yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]

# 9. Build Forest Parsing Functions

## 9.1 Load up MP3s

In [None]:
def load_mp3_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    res = tfio.audio.AudioIOTensor(filename)
    # Convert to tensor and combine channels 
    tensor = res.to_tensor()
    tensor = tf.math.reduce_sum(tensor, axis=1) / 2 
    # Extract sample rate and cast
    sample_rate = res.rate
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Resample to 16 kHz
    wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=16000)
    return wav

# def load_mp3_16k_mono(filename):
#     """ Load an MP3 file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
#     file_contents = tf.io.read_file(filename)
#     # Decode MP3 (tensors by channels) 
#     mp3, _ = tf.audio.decode_mp3(file_contents)
#     # Convert to float and combine channels 
#     mp3 = tf.cast(mp3, dtype=tf.float32) / 32768.0
#     mp3 = (mp3[:, 0] + mp3[:, 1]) / 2
#     # Resample to 16 kHz
#     wav = tfio.audio.resample(mp3, rate_in=44100, rate_out=16000)
#     return wav








In [None]:
mp3 = os.path.join('Forest Recordings', 'recording_00.mp3')

In [None]:
wav = load_mp3_16k_mono(mp3)

In [None]:
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)

In [None]:
samples, index = audio_slices.as_numpy_iterator().next()

## 9.2 Build Function to Convert Clips into Windowed Spectrograms

In [None]:
def preprocess_mp3(sample, index):
    sample = sample[0]
    zero_padding = tf.zeros([48000] - tf.shape(sample), dtype=tf.float32)
    wav = tf.concat([zero_padding, sample],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram

## 9.3 Convert Longer Clips into Windows and Make Predictions

In [None]:
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=16000, sequence_stride=16000, batch_size=1)
audio_slices = audio_slices.map(preprocess_mp3)
audio_slices = audio_slices.batch(64)

In [None]:
yhat = model.predict(audio_slices)
yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]

## 9.4 Group Consecutive Detections

In [None]:
from itertools import groupby

In [None]:
yhat = [key for key, group in groupby(yhat)]
calls = tf.math.reduce_sum(yhat).numpy()

In [None]:
calls

# 10. Make Predictions

## 10.1 Loop over all recordings and make predictions

In [None]:
results = {}
for file in os.listdir(os.path.join('Forest Recordings')):
    FILEPATH = os.path.join('Forest Recordings', file)
    
    wav = load_mp3_16k_mono(FILEPATH)
    audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)
    audio_slices = audio_slices.map(preprocess_mp3)
    audio_slices = audio_slices.batch(64)
    
    yhat = model.predict(audio_slices)
    
    results[file] = yhat

In [None]:
results

## 10.2 Convert Predictions into Classes

In [None]:
class_preds = {}
for file, logits in results.items():
    class_preds[file] = [1 if prediction > 0.99 else 0 for prediction in logits]
class_preds

## 10.3 Group Consecutive Detections

In [None]:
postprocessed = {}
for file, scores in class_preds.items():
    postprocessed[file] = tf.math.reduce_sum([key for key, group in groupby(scores)]).numpy()
postprocessed

# 11. Export Results

In [None]:
import csv

In [None]:
with open('results_with_bird_sound.csv', 'w', newline='') as f:
    writer = csv.writer(f, delimiter=',')
    writer.writerow(['recording', 'capuchin_calls'])
    for key, value in postprocessed.items():
        writer.writerow([key, value])