Training Jupyter Notebook

Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [20]:
# ! cp -vr /content/drive/MyDrive/audio_images/ /content/audio_images
# ! cp -vr /content/drive/MyDrive/audio_images-20220324T215740Z-001.zip /content/
! cp -vr /content/drive/MyDrive/audio_images.tar.gz /content/

'/content/drive/MyDrive/audio_images.tar.gz' -> '/content/audio_images.tar.gz'


In [None]:
! tar -czvf audio_images.tar.gz /content/audio_images
# ! unzip /content/audio_images-20220324T215740Z-001.zip


In [None]:
! du -h /content/audio_images/

Paths and Imports

In [15]:
import pandas as pd, numpy as np, gc
import librosa as lb
import librosa.display as lbd

# from kaggle_datasets import KaggleDatasets
import tensorflow as tf, re, math
import tensorflow.keras.backend as K

import matplotlib.pyplot as plt

from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from pathlib import Path
from tqdm import tqdm

import json

In [16]:
# TRAIN_AUDIO_IMAGES_SAVE_ROOT = Path("/content/drive/MyDrive/audio_images")
TRAIN_AUDIO_IMAGES_SAVE_ROOT = Path("/content/audio_images")

MODEL_SAVE_ROOT = Path("/content/drive/MyDrive/model_save")

# Threshold for no-call detector
BIRD_CALL_PROB = 0.5

# No Call Label
NO_CALL = "no_call"

NUM_FOLDS = 5

Some Birds Only Have A Few Training Samples and no-call will reduce their values even more

In [None]:
# ignore these birds for no-call
NO_CALL_IGNORE = [ 'akikik', 'brnboo', 'bubsan', 'bulpet', 'coopet', 'crehon', 'ercfra', 'hawpet1', 'layalb', 'lessca', 'magpet1', 'mauala', 'pomjae', 'puaioh', 'shtsan']

Connect To TPU

In [17]:
DEVICE = "TPU" # "TPU" or "GPU"

In [18]:
# https://www.kaggle.com/code/itsuki9180/birdcall-using-tpu-train/notebook
if DEVICE == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        DEVICE = "GPU"

if DEVICE != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if DEVICE == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

connecting to TPU...
Could not connect to TPU
Using default strategy for CPU and single GPU
Num GPUs Available:  0
REPLICAS: 1


Import Data

In [19]:
x_data = []
y_data= []

df = pd.read_csv('/content/drive/MyDrive/no_call_detect/nocalldetection_for_shortaudio_fold0.csv')
for row in tqdm(df.itertuples(False)):
    mels = np.load(str((TRAIN_AUDIO_IMAGES_SAVE_ROOT/row.filename).as_posix() + ".npy"))
    # print(mels.shape)

    # extract the calculated call probability
    temp_str = row.nocalldetection
    call_prob = [float(x) for x in temp_str.split()]

    # for each image, append each audio segment
    for i in range(len(mels)):
        x_data.append( (str((TRAIN_AUDIO_IMAGES_SAVE_ROOT/row.filename).as_posix() + ".npy"), i) )

        if (call_prob[i] >= BIRD_CALL_PROB):
            y_data.append(row.primary_label)
        else:
            y_data.append(NO_CALL)




14852it [00:43, 339.47it/s]


In [20]:
print(len(x_data))
print(len(y_data))

144843
144843


In [21]:
le = LabelEncoder()
le.classes_ = np.load(MODEL_SAVE_ROOT/"classes.npy")
y_data = le.fit_transform(y_data)
le_name_mapping = dict(zip(le.classes_.astype(str), le.transform(le.classes_)))
print(le_name_mapping)

# np.save(MODEL_SAVE_ROOT/"classes.npy", le.classes_)

{'afrsil1': 0, 'akekee': 1, 'akepa1': 2, 'akiapo': 3, 'akikik': 4, 'amewig': 5, 'aniani': 6, 'apapan': 7, 'arcter': 8, 'barpet': 9, 'bcnher': 10, 'belkin1': 11, 'bkbplo': 12, 'bknsti': 13, 'bkwpet': 14, 'blkfra': 15, 'blknod': 16, 'bongul': 17, 'brant': 18, 'brnboo': 19, 'brnnod': 20, 'brnowl': 21, 'brtcur': 22, 'bubsan': 23, 'buffle': 24, 'bulpet': 25, 'burpar': 26, 'buwtea': 27, 'cacgoo1': 28, 'calqua': 29, 'cangoo': 30, 'canvas': 31, 'caster1': 32, 'categr': 33, 'chbsan': 34, 'chemun': 35, 'chukar': 36, 'cintea': 37, 'comgal1': 38, 'commyn': 39, 'compea': 40, 'comsan': 41, 'comwax': 42, 'coopet': 43, 'crehon': 44, 'dunlin': 45, 'elepai': 46, 'ercfra': 47, 'eurwig': 48, 'fragul': 49, 'gadwal': 50, 'gamqua': 51, 'glwgul': 52, 'gnwtea': 53, 'golphe': 54, 'grbher3': 55, 'grefri': 56, 'gresca': 57, 'gryfra': 58, 'gwfgoo': 59, 'hawama': 60, 'hawcoo': 61, 'hawcre': 62, 'hawgoo': 63, 'hawhaw': 64, 'hawpet1': 65, 'hoomer': 66, 'houfin': 67, 'houspa': 68, 'hudgod': 69, 'iiwi': 70, 'incter1': 

In [22]:
for i in range(3):
    print(x_data[i])
    print(y_data[i])

('/content/audio_images/afrsil1/XC125458.ogg.npy', 0)
0
('/content/audio_images/afrsil1/XC125458.ogg.npy', 1)
0
('/content/audio_images/afrsil1/XC125458.ogg.npy', 2)
94


Split Into Train and Validation Sets

In [23]:
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.1, stratify=y_data)

ValueError: ignored

In [41]:
print(type(y_data))
counts = np.bincount(y_data)
print(counts)


y = le.inverse_transform(y_data)
print(y)
unique, counts = np.unique(y, return_counts=True)
values = dict(zip(unique, counts))
d = dict((k, v) for k, v in values.items() if v <= 20)
print(d)

<class 'numpy.ndarray'>
[   93    61   220   153    13   184    73   486  1153   103  1340   529
   847   796   110   410    54   318   439    13    26  2203    66     3
    61     3   120   172   241  1884  1774    75   979   530    26    26
   180    84   980  3210   161  2070  1626     1    20  2365   133    17
  1415   184   934  1763   203  1855    95   444    82    47   176  1410
   143    37   489    33    24     4   141  3348  7070    43   395    32
   719   278    86    39   407     2   145   265   545    19   669   370
   245    27    19  2174    34    16    50   497   538  1479 39731  5792
   160  9587   319   411   437    60   226  1509   155    46   286   217
   489  1068  1410    17     7   279   255   513   263    39   175   118
   410   116  1623   679  2226  1055    72  1211   271   310   570     3
  5158  1217    36    85    28  1062   570   576  1267    49   524  4112
   196    45   157    91    45   931    66   448   644]
['afrsil1' 'afrsil1' 'no_call' ... 'zebdov' 

In [None]:
def normalize(image):
        image = image.astype("float32", copy=False) / 255.0
        image = np.stack([image, image, image])
        return image

In [None]:
class CustomDataGen(tf.keras.utils.Sequence):
    

In [None]:
tf.keras.applications.resnet_v2.ResNet50V2(
    include_top=True,
    weights='imagenet',
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation='softmax'
)