<a href="https://colab.research.google.com/github/KuebikoSystems/birddataset/blob/main/Model_test_Spain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip uninstall tensorflow
!pip uninstall tensorflow-io
!pip install tensorflow-gpu
!pip install --no-deps tensorflow-io

Found existing installation: tensorflow 2.8.2+zzzcolab20220527125636
Uninstalling tensorflow-2.8.2+zzzcolab20220527125636:
  Would remove:
    /usr/local/bin/estimator_ckpt_converter
    /usr/local/bin/import_pb_to_tensorboard
    /usr/local/bin/saved_model_cli
    /usr/local/bin/tensorboard
    /usr/local/bin/tf_upgrade_v2
    /usr/local/bin/tflite_convert
    /usr/local/bin/toco
    /usr/local/bin/toco_from_protos
    /usr/local/lib/python3.7/dist-packages/tensorflow-2.8.2+zzzcolab20220527125636.dist-info/*
    /usr/local/lib/python3.7/dist-packages/tensorflow/*
Proceed (y/n)? y
  Successfully uninstalled tensorflow-2.8.2+zzzcolab20220527125636
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-gpu
  Downloading tensorflow_gpu-2.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (511.7 MB)
[K     |████████████████████████████████| 511.7 MB 5.5 kB/s 
Collecting tensorboard<2.10,>=2.9
  Downloading tensor

In [2]:
!pip install tensorflow-io 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import os
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
import tensorflow_io as tfio

from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display

# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [4]:
! git clone -b SpainTest "https://github.com/KuebikoSystems/birddataset.git"

Cloning into 'birddataset'...
remote: Enumerating objects: 290, done.[K
remote: Counting objects: 100% (43/43), done.[K
remote: Compressing objects: 100% (43/43), done.[K
remote: Total 290 (delta 20), reused 0 (delta 0), pack-reused 247[K
Receiving objects: 100% (290/290), 222.18 MiB | 29.00 MiB/s, done.
Resolving deltas: 100% (24/24), done.
Checking out files: 100% (246/246), done.


In [5]:
DATASET_PATH = '/content/birddataset/Species'

data_dir = pathlib.Path(DATASET_PATH)
if not data_dir.exists():
  tf.keras.utils.get_file(
      'mini_speech_commands.zip',
      origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
      extract=True,
      cache_dir='.', cache_subdir='data')

In [6]:
commands = np.array(tf.io.gfile.listdir(str(data_dir)))
commands = commands[commands != 'README.md']
print('Data Classes:', commands)

Data Classes: ['Red-rumped Swallow - Spain' 'Common Rock Thrush - Global'
 'Common Rock Thursh' 'European Crested Tit - Spain' 'Crested Tit'
 'Red-rumped Swallow']


In [7]:
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = tf.random.shuffle(filenames)
num_samples = len(filenames)
print('Number of total examples:', num_samples)
print('Number of examples per label:',
      len(tf.io.gfile.listdir(str(data_dir/commands[0]))))
print('Example file tensor:', filenames[0])

Number of total examples: 245
Number of examples per label: 46
Example file tensor: tf.Tensor(b'/content/birddataset/Species/Red-rumped Swallow - Spain/XC278139-cecropis daurica el acebuche 26-05-2015.mp3', shape=(), dtype=string)


In [8]:
# Metrics of the dataset 

train_files = filenames[:6400]
val_files = filenames[6400: 6400 + 800]
test_files = filenames[-800:]

print('Training set size', len(train_files))
print('Validation set size', len(val_files))
print('Test set size', len(test_files))

Training set size 245
Validation set size 0
Test set size 245


In [9]:
# Test Files in the dataset

test_file = tf.io.read_file(DATASET_PATH+'/Common Rock Thrush - Global/XC653283-20210530_090841.wav') #pick any files for testing
test_audio, _ = tf.audio.decode_wav(test_file)
test_audio.shape

NotFoundError: ignored

In [10]:
def decode_audio(audio_binary):
  # Decode WAV audio files to float 33 tensors , normanlized

  audio,_ = tf.audio.decode_wav(contents = audio_binary)
  #since all the data is single channel(mono), drop the channels.
  return tf.squeeze(audio,axis=-1)

In [14]:
def get_label(file_path):
  parts = tf.strings.split(
      input =file_path,
      sep = os.path.sep)
  return parts[-2]

In [12]:
def get_waveform_and_label(filepath):
  label = get_label(filepath)
  audio_binary = tf.io.read_file(filepath)
  waveform = decode_audio(audio_binary)
  return     waveform, label

In [15]:
AUTOTUNE = tf.data.AUTOTUNE
files_ds = tf.data.Dataset.from_tensor_slices(train_files)

waveform_ds = files_ds.map(
    map_func=get_waveform_and_label,
    num_parallel_calls=AUTOTUNE)

In [None]:
rows = 3
cols = 1
n = rows * cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 12))

for i, (audio, label) in enumerate(waveform_ds.take(n)):
  r = i // cols
  c = i % cols
  ax = axes[r][c]
  ax.plot(audio.numpy())
  ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
  label = label.numpy().decode('utf-8')
  ax.set_title(label)

plt.show()