In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle competitions download -c birdclef-2022 -p /content/

Downloading birdclef-2022.zip to /content
100% 6.11G/6.12G [00:49<00:00, 129MB/s]
100% 6.12G/6.12G [00:49<00:00, 132MB/s]


In [None]:
! mkdir /content/birdclef-2022
! unzip /content/birdclef-2022.zip -d /content/drive/MyDrive/birdclef-2022

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608252.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608253.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608254.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608255.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608256.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608257.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608258.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608259.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608447.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608450.ogg  
  inflating: /content/drive/MyDrive/birdclef-2022/train_audio/normoc/XC608452.ogg  
  inflating

In [None]:
from tensorflow import keras
# force a channel ordering
from keras import backend

import numpy as np
import librosa as lb
import librosa.display as lbd
from pathlib import Path
from matplotlib import pyplot as plt
import pandas as pd

import mimetypes
mimetypes.init()
mimetypes.add_type('audio/ogg','.ogg')
import IPython.display as ipd

Enter the Path:

In [None]:
DATA_ROOT = Path("/content/drive/MyDrive/birdclef-2022")
TRAIN_AUDIO_ROOT = Path("/content/drive/MyDrive/birdclef-2022/train_audio")
NO_CALL_ROOT = Path("/content/drive/MyDrive/no_call_detect")
TRAIN_AUDIO_IMAGES_SAVE_ROOT = Path("/content/drive/MyDrive/audio_images") # Where to save the mels images

MODEL_SAVE_ROOT = Path("/content/drive/MyDrive/model_save")
MODEL_SAVE_NAME = 'BirdClef2022-ResNet50V2_model.h5'


In [None]:
import json
f = open(DATA_ROOT/"scored_birds.json")
data = json.load(f)
print(data)

['akiapo', 'aniani', 'apapan', 'barpet', 'crehon', 'elepai', 'ercfra', 'hawama', 'hawcre', 'hawgoo', 'hawhaw', 'hawpet1', 'houfin', 'iiwi', 'jabwar', 'maupar', 'omao', 'puaioh', 'skylar', 'warwhe1', 'yefcan']


Enter the Audio File

In [None]:
file_no = 0
byFileNo = True
byFilePath = False
byRandom = False

In [None]:
df = pd.read_csv(NO_CALL_ROOT/"nocalldetection_for_shortaudio_fold0.csv")

if byRandom:
    file_path = df.sample().iloc[0]['filename']
    file_name = file_path.split('/')[1]
    primary_label = file_path.split('/')[0]
elif byFilePath:
    file_path = "akekee/XC174954.ogg"
    file_name = file_path.split('/')[1]
    primary_label = file_path.split('/')[0]
elif byFileNo:
    print(file_no)
    primary_label = "bubsan"
    # amewig/XC384955.ogg
    temp_rows = df.loc[df['primary_label'].str.match(primary_label)]
    file_path = temp_rows.iloc[file_no]['filename']
    file_name = file_path.split('/')[1]
    file_no = file_no + 1
else:
    primary_label = "amewig"
    file_name = 'XC593011.ogg'
    file_path = primary_label + '/' + file_name

print(file_path)
print(file_name)

0
bubsan/XC435876.ogg
XC435876.ogg


In [None]:
no_call_prob = df.loc[df['filename'].str.match(file_path)]
temp_str = no_call_prob.iloc[0]['nocalldetection']

call_prob = [float(x) for x in temp_str.split()]
print(call_prob)

[0.3790661692619324]


In [None]:
print(str( (TRAIN_AUDIO_IMAGES_SAVE_ROOT/primary_label/file_name).as_posix() + ".npy"))
mels = np.load( str( (TRAIN_AUDIO_IMAGES_SAVE_ROOT/primary_label/file_name).as_posix() + ".npy") )

print(mels.shape)

/content/drive/MyDrive/audio_images/bubsan/XC435876.ogg.npy
(1, 128, 281)


Listen to Audio and View the Mel

In [None]:
# force channels-first ordering
backend.set_image_data_format('channels_first')
print(backend.image_data_format())

model = keras.models.load_model(MODEL_SAVE_ROOT/MODEL_SAVE_NAME)

channels_first


In [None]:
def normalize(image):
        image = image.astype("float32", copy=False) / 255.0
        image = np.stack([image, image, image])
        return image

In [None]:
ipd.display(ipd.Audio(str(TRAIN_AUDIO_ROOT/primary_label/file_name)))

In [None]:
%matplotlib inline 
ipd.display(ipd.Audio(str(TRAIN_AUDIO_ROOT/primary_label/file_name)))
for i in range(len(mels)):
  print(mels[i].shape)
  print(mels)
  im = normalize(mels)
  print(im.shape)
  print(im)
  predictions = model.predict(im)
  # print(predictions)
  fig = plt.figure()
  fig.suptitle(f"MelSpec Start {i*5//60}m {i*5%60}s Bird: {primary_label}, File: {file_name}  Call Prob {call_prob[i]:.2f}")
  lbd.specshow(mels[i], x_axis='s', y_axis='hz')
  #ipd.display(ipd.Audio(str(TRAIN_AUDIO_ROOT/primary_label/file_name)))


(128, 281)
[[[  0  89 111 ...   6 127 146]
  [  0  91 113 ...  60 127 146]
  [  0  92 115 ...  72 124 143]
  ...
  [  0  71 111 ... 126 126 124]
  [  0  66 103 ... 113 113 108]
  [  0   0   0 ...   0  31  50]]]
(3, 1, 128, 281)
[[[[0.         0.34901962 0.43529412 ... 0.02352941 0.49803922
    0.57254905]
   [0.         0.35686275 0.44313726 ... 0.23529412 0.49803922
    0.57254905]
   [0.         0.36078432 0.4509804  ... 0.28235295 0.4862745
    0.56078434]
   ...
   [0.         0.2784314  0.43529412 ... 0.49411765 0.49411765
    0.4862745 ]
   [0.         0.25882354 0.40392157 ... 0.44313726 0.44313726
    0.42352942]
   [0.         0.         0.         ... 0.         0.12156863
    0.19607843]]]


 [[[0.         0.34901962 0.43529412 ... 0.02352941 0.49803922
    0.57254905]
   [0.         0.35686275 0.44313726 ... 0.23529412 0.49803922
    0.57254905]
   [0.         0.36078432 0.4509804  ... 0.28235295 0.4862745
    0.56078434]
   ...
   [0.         0.2784314  0.43529412 ... 0.49

ValueError: ignored