## Import and Install dependencies

### Install dependencies

In [None]:
! pip install -q tensorflow==2.4.1 tensorflow-gpu==2.4.1 tensorflow-io matplotlib

In [None]:
#Downgrading to tensorflow==2.7.1 and tensorflow-io==0.23.1 resolves NotImplementedError issue for tf.resample
! pip install -q tensorflow-io==0.23.1

### Load dependencies

In [None]:
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio

## Build data loading function

### Define paths to files

In [None]:
capuchin_file=os.path.join("data","Parsed_Capuchinbird_Clips","XC114131-0.wav")
not_capuchin_file=os.path.join("data","Parsed_Not_Capuchinbird_Clips","afternoon-birds-song-in-forest-0.wav")

In [None]:
capuchin_file

### Build dataloading function

In [None]:
# function is from tensorflow documentation 
# processing the audio file and converting it to 16Hz and output is a single channel
def load_wav_16k_mono(filename):
    # load filepath in and output is a byte encoded string?
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels) 
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis 
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [None]:
file_contents = tf.io.read_file(capuchin_file)
file_contents

In [None]:
#shape=(120000, 1) 120000 amplitude of the wave
wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
wav 

In [None]:
#shape=(120000, 1) to shape=(120000, ) 
wav = tf.squeeze(wav, axis=-1)
wav

In [None]:
# castin sample_rate to integer format
# the amplitude/frequency of the wave is 48000Hz
sample_rate = tf.cast(sample_rate, dtype=tf.int64)
sample_rate # 48000Hz

In [None]:
# resample it from 48000Hz to 16000Hz
# to reduce the size of our final audio
# size of the wave went from 120000 to 4444 
wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
wav

In [None]:
len(wav)

### Plot wave

In [None]:
wave=load_wav_16k_mono(capuchin_file)
nwave=load_wav_16k_mono(not_capuchin_file)

In [None]:
plt.plot(wave,label="wave")
plt.plot(nwave,label="nwave")
plt.legend()
plt.show

## Create Tensorflow Dataset

### Define paths to positive and negative dataset

In [None]:
positive=os.path.join("data","Parsed_Capuchinbird_Clips")
negative=os.path.join("data","Parsed_Not_Capuchinbird_Clips")

In [None]:
negative

### Create tensorflow dataset

In [None]:
# list_files will give us a set of files as string inside of a tensorflow dataset format
pos=tf.data.Dataset.list_files(positive+"/*.wav")
neg=tf.data.Dataset.list_files(negative+"/*.wav")

In [None]:
print(neg)
print(neg.as_numpy_iterator())
print(neg.as_numpy_iterator().next())

### Add labels and combine positive and negative samples


In [None]:
len(pos)

In [None]:
tf.ones(len(pos))

In [None]:
positives=tf.data.Dataset.zip((pos,tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))
negatives=tf.data.Dataset.zip((neg,tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))
data=positives.concatenate(negatives)

In [None]:
print(negatives.as_numpy_iterator().next())
print(data.as_numpy_iterator().next())

In [None]:
data.shuffle(10000).as_numpy_iterator().next()

## Determine average length of capuchin calls

### Calculate wave cycle length

In [None]:
# Loop through the capuchin files and calculate how long each clip is
lengths=[]

for file in os.listdir(os.path.join("data","Parsed_Capuchinbird_Clips")):
    tensor_wave=load_wav_16k_mono(os.path.join("data","Parsed_Capuchinbird_Clips",file)) #to wave form
    lengths.append(len(tensor_wave))
    

In [None]:
# use os.listdir to get every single file
for file in os.listdir(os.path.join("data","Parsed_Capuchinbird_Clips")):
    print(file)

In [None]:
lengths[:10]

### Calculate Mean, Min, Max

In [None]:
mean=tf.math.reduce_mean(lengths)
mean

In [None]:
max_length=tf.math.reduce_max(lengths)
max_length

In [None]:
min_length=tf.math.reduce_min(lengths)
min_length

In [None]:
# 16000 cycles per second
# average = 54156
avg_capuchinbirds_callclips=54156/16000
avg_capuchinbirds_callclips

In [None]:
max_capuchinbirds_callclips=80000/16000
min_capuchinbirds_callclips=32000/16000
max_capuchinbirds_callclips,min_capuchinbirds_callclips
# the clips are between 2 seconds and 5 seconds

## Build preprocessing function to convert to spectrogram

### Build Proprocessing function

In [None]:
def preprocess(filepath, label):
    wav=load_wav_16k_mono(filepath)
    wav=wav[:48000]
    zero_padding=tf.zeros([48000]-tf.shape(wav),dtype=tf.float32)
    wav=tf.concat([zero_padding,wav],0)
    spectrogram=tf.signal.stft(wav,frame_length=320,frame_step=32) # 320 & 32 are the standard numbers(i think)
    spectrogram=tf.abs(spectrogram)
    spectrogram=tf.expand_dims(spectrogram, axis=2) # shape=(1491, 257, 1)
    return spectrogram,label
    

In [None]:
filepath,label=positives.shuffle(buffer_size=2000).as_numpy_iterator().next()

In [None]:
spectrogram,label=preprocess(filepath, label)

In [None]:
spectrogram

In [None]:
plt.figure(figsize=(30,20))
plt.imshow(spectrogram)
plt.show()

In [None]:
tf.transpose(spectrogram) # shape=(1, 257, 1491)
tf.transpose(spectrogram)[0] # shape=(257, 1491)

In [None]:
plt.figure(figsize=(30,20))
plt.imshow(tf.transpose(spectrogram)[0])
plt.show()

## Create training and testing partitions

### Create a Tensorflow Data Pipeline

In [None]:
#mchsbap
data=data.map(preprocess)
data=data.cache()
data=data.shuffle(buffer_size=1000)
data=data.batch(16)
data=data.prefetch(8)

In [None]:
len(data)

### Splitting into training and testing partitions

In [None]:
train=data.take(36)
test=data.skip(36).take(15)

### Test one batch

In [None]:
samples, labels=train.as_numpy_iterator().next()

In [None]:
labels

In [None]:
samples.shape

## Build deep learning model

### Load Tensorflow Dependencies

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten

### Build Sequential Model, Compile and View Summary

In [None]:
model=Sequential()
model.add(Conv2D(16, (3,3), activation="relu", input_shape=(1491,257,1)))
model.add(Conv2D(16, (3,3), activation="relu"))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dense(1, activation="sigmoid"))


In [None]:
model.compile(loss="BinaryCrossentropy", metrics=[tf.keras.metrics.Recall(), tf.keras.metrics.Precision()], optimizer="Adam")

In [None]:
model.summary()

### Fit model,view loss, KPI plots

In [None]:
hist= model.fit(train, epochs=4, validation_data=test)

In [None]:
hist

In [None]:
hist.history

In [None]:
plt.title("Loss")
plt.plot(hist.history["loss"],"r")
plt.plot(hist.history["val_loss"],"b")
pls.show()

In [None]:
plt.title("Precision")
plt.plot(hist.history["precision"],"r")
plt.plot(hist.history["val_precision"],"b")
plt.show()

In [None]:
plt.title("Recall")
plt.plot(hist.history["recall"],"r")
plt.plot(hist.history["val_recall"],"b")
plt.show()

## Make a prediction on a single clip

### Get one batch and make a prediction

In [None]:
X_test,y_test=test.as_numpy_iterator().next()

In [None]:
yhat=model.predict(X_test)

### Convert Logits to Classes

In [None]:
yhat=[1 if prediction >0.5 else 0 for prediction in yhat]

## Build Forest Parsing Functions

### Load up MP3

In [None]:
def load_mp3_16k_mono(filename):
    """Load a MP3 file, convert it to a float tensor,resample it to 16kHz single channel audio
    """
    res=tfio.audio.AudioIOTensor(filename)
    tensor=res.to_tensor()
    tensor=tf.math.reduce_sum(tensor,axis=1)/2
    sample_rate=res.rate
    sample_rate=tf.cast(sample_rate,dtypes=tf.int64)
    wav=tfio.audio.resample(tensor,rate_in=sample_rate,rate_out=16000)
    return wav

In [None]:
mp3=os.path.join("data", "Forest Recording", "recording_00.mp3")

In [None]:
wav=load_mp3_16k_mono(mp3)
audio_slices=tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)

In [None]:
samples,index=audio_slices.as_numpy_iterator().next()

In [None]:
def preprocess_mp3(sample, index):
    sample = sample[0]
    zero_padding = tf.zeros([48000] - tf.shape(sample), dtype=tf.float32)
    wav = tf.concat([zero_padding, sample],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram

audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=16000, sequence_stride=16000, batch_size=1)
audio_slices = audio_slices.map(preprocess_mp3)
audio_slices = audio_slices.batch(64)

yhat = model.predict(audio_slices)
yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]

In [None]:
from itertools import groupby

yhat = [key for key, group in groupby(yhat)]
calls = tf.math.reduce_sum(yhat).numpy()
calls

## Make predictions

In [None]:
results = {}
for file in os.listdir(os.path.join('data', 'Forest Recordings')):
    FILEPATH = os.path.join('data','Forest Recordings', file)
    
    wav = load_mp3_16k_mono(FILEPATH)
    audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)
    audio_slices = audio_slices.map(preprocess_mp3)
    audio_slices = audio_slices.batch(64)
    
    yhat = model.predict(audio_slices)
    
    results[file] = yhat

In [None]:
class_preds = {}
for file, logits in results.items():
    class_preds[file] = [1 if prediction > 0.99 else 0 for prediction in logits]
class_preds

In [None]:
postprocessed = {}
for file, scores in class_preds.items():
    postprocessed[file] = tf.math.reduce_sum([key for key, group in groupby(scores)]).numpy()
postprocessed

## Export results

In [None]:
import csv
with open('results.csv', 'w', newline='') as f:
    writer = csv.writer(f, delimiter=',')
    writer.writerow(['recording', 'capuchin_calls'])
    for key, value in postprocessed.items():
        writer.writerow([key, value])