<a href="https://colab.research.google.com/github/Tfcosendey/hungry_birds/blob/main/Full_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [216]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [217]:
!pip install tensorflow_io

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [218]:
import glob as glob
import os
import io
from sklearn.model_selection import train_test_split

from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio

In [219]:
directory = '/content/drive/MyDrive/Colab Notebooks/bird_songs/data/songs/'
files = glob.glob(directory + '/**/*', recursive=False)

In [220]:
df = pd.DataFrame(files)

In [221]:
df['gen_sp'] = df[0].apply(lambda x: x.split('/')[-2])
df['id'] = df[0].apply(lambda x: x.split('/')[-1][:-4])
df = df.rename(columns={df.columns[0]: 'file'})
df = df.sample(frac=1).reset_index(drop=True)
len(df['gen_sp'].unique())

121

In [222]:
df_filtered = df[df['gen_sp'].isin(df['gen_sp'].unique()[:20])]
print(len(df_filtered))
train_df, test_df = train_test_split(df_filtered, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2)

2781


In [223]:
df_filtered['gen_sp'].unique()

array(['Cichlocolaptes leucophrus', 'Tolmomyias sulphurescens',
       'Myrmoderus squamosus', 'Saltator fuliginosus',
       'Syndactyla rufosuperciliata', 'Synallaxis frontalis',
       'Rhopias gularis', 'Drymophila ferruginea', 'Turdus leucomelas',
       'Trogon rufus', 'Tyrannus melancholicus', 'Patagioenas plumbea',
       'Pyriglena leucoptera', 'Merulaxis ater', 'Vireo chivi',
       'Drymophila squamata', 'Megascops choliba', 'Turdus flavipes',
       'Todirostrum poliocephalum', 'Saltator atricollis'], dtype=object)

In [224]:
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

In [225]:
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [226]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

train_filenames = train_df['file']
train_targets = le.fit_transform(train_df['gen_sp'])
train_ds = tf.data.Dataset.from_tensor_slices((train_filenames, train_targets))

val_filenames = val_df['file']
val_targets = le.transform(val_df['gen_sp'])
val_ds = tf.data.Dataset.from_tensor_slices((val_filenames, val_targets))

test_filenames = test_df['file']
test_targets = le.transform(test_df['gen_sp'])
test_ds = tf.data.Dataset.from_tensor_slices((test_filenames, test_targets))

In [227]:
def load_wav_for_map(filename, label):
  return load_wav_16k_mono(filename), label

In [228]:
train_ds = train_ds.map(load_wav_for_map)
val_ds = val_ds.map(load_wav_for_map)
test_ds = test_ds.map(load_wav_for_map)



In [229]:
def extract_embedding(wav_data, label):
  ''' run YAMNet to extract embedding from the wav data '''
  scores, embeddings, spectrogram = yamnet_model(wav_data)
  scores = tf.reshape(scores[:,106], [-1, 1])
  embeddings =  scores * embeddings
#  top_scores, top_indices = tf.math.top_k(scores[:,106], k=3)
#  top_embeddings = tf.gather(embeddings, top_indices)
  num_embeddings = tf.shape(embeddings)[0]
  return embeddings, tf.repeat(label, num_embeddings)


In [230]:
train_ds = train_ds.map(extract_embedding).unbatch()
val_ds = val_ds.map(extract_embedding).unbatch()
test_ds = test_ds.map(extract_embedding).unbatch()

In [231]:
train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

In [232]:
my_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024), dtype=tf.float32,
                          name='input_embedding'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(20, activation = 'softmax')
], name='my_model')

my_model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 512)               524800    
                                                                 
 dense_7 (Dense)             (None, 20)                10260     
                                                                 
Total params: 535,060
Trainable params: 535,060
Non-trainable params: 0
_________________________________________________________________


In [233]:
top3_acc = tf.keras.metrics.TopKCategoricalAccuracy(k=3)

In [234]:
my_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                 optimizer="adam",
                 metrics=[top3_acc, 'accuracy'])

callback = tf.keras.callbacks.EarlyStopping(monitor='val_top_k_categorical_accuracy',
                                            patience=3,
                                            restore_best_weights=True)

history = my_model.fit(train_ds,
                       epochs=20,
                       validation_data=val_ds,
                       callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [235]:
class ReduceMeanLayer(tf.keras.layers.Layer):
  def __init__(self, axis=0, **kwargs):
    super(ReduceMeanLayer, self).__init__(**kwargs)
    self.axis = axis

  def call(self, input):
    return tf.math.reduce_mean(input, axis=self.axis)

In [236]:
saved_model_path = '/content/drive/MyDrive/Colab Notebooks/bird_songs/models/yamnet_bird_1'

input_segment = tf.keras.layers.Input(shape=(), dtype=tf.float32, name='audio')
embedding_extraction_layer = hub.KerasLayer('https://tfhub.dev/google/yamnet/1',
                                            trainable=False, name='yamnet')
_, embeddings_output, _ = embedding_extraction_layer(input_segment)
serving_outputs = my_model(embeddings_output)
serving_outputs = ReduceMeanLayer(axis=0, name='classifier')(serving_outputs)
serving_model = tf.keras.Model(input_segment, serving_outputs)
serving_model.save(saved_model_path, include_optimizer=False)



In [237]:
my_model.evaluate(test_ds)



[3.0254178047180176, 0.02511276863515377, 0.1812543421983719]

In [242]:
def predict(filename):
  wav = load_wav_16k_mono(filename)
  scores, embeddings, spectrogram = yamnet_model(wav)
  scores = scores[:,106]
  scores = tf.reshape(scores, [-1, 1])
  final_scores = scores * my_model(embeddings)
  final_scores = tf.reduce_sum(final_scores, axis=0)
  row_sum = tf.reduce_sum(final_scores)
  final_scores = tf.divide(final_scores, row_sum)
  final_score = pd.DataFrame(final_scores, columns = ['Probability'])
  final_score.index = le.inverse_transform(final_score.index)
  final_score = final_score.sort_values(by = 'Probability', ascending = False).applymap(lambda x: "{:.2%}".format(x))
  return final_score.head(3).index

In [243]:
sample = test_df.sample(1)
print(sample['gen_sp'])
predict(sample['file'].values[0])

6545    Tolmomyias sulphurescens
Name: gen_sp, dtype: object


Index(['Tolmomyias sulphurescens', 'Drymophila squamata', 'Trogon rufus'], dtype='object')

In [240]:
test_df['pred'] = test_df['file'].apply(predict)

KeyboardInterrupt: ignored

In [None]:
test_df['acc'] = test_df.apply(lambda row: 1 if row['gen_sp'] in row['pred'] else 0, axis=1)

In [None]:
test_df['acc'].mean()