Training Jupyter Notebook

Imports

In [None]:
COLAB = False
# On Windows Run in ENSC_413 Folder

In [None]:
if COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

In [None]:
if COLAB:
    # ! cp -vr /content/drive/MyDrive/audio_images/ /content/audio_images
    # ! cp -vr /content/drive/MyDrive/audio_images-20220324T215740Z-001.zip /content/
    ! cp -vr /content/drive/MyDrive/audio_images.tar.gz /content/

In [None]:
if COLAB:
    ! ls -alt /content/
    ! mkdir /content/audio_images
    ! tar -zxvf audio_images.tar.gz 
    # ! unzip /content/audio_images-20220324T215740Z-001.zip


In [None]:
if COLAB:
    ! du -h /content/audio_images/

Paths and Imports

In [None]:
import pandas as pd, numpy as np, gc
import librosa as lb
import librosa.display as lbd

# from kaggle_datasets import KaggleDatasets
import tensorflow as tf, re, math
import tensorflow.keras.backend as K
from tensorflow.keras import layers
from keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
# force a channel ordering
from keras import backend
from tensorflow import keras


import matplotlib.pyplot as plt

from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from pathlib import Path
from tqdm import tqdm
from functools import lru_cache

import json
import random
from datetime import datetime

import pickle

In [None]:
if COLAB:
    # TRAIN_AUDIO_IMAGES_SAVE_ROOT = Path("/content/drive/MyDrive/audio_images")
    TRAIN_AUDIO_IMAGES_SAVE_ROOT = Path("/content/audio_images")

    MODEL_SAVE_ROOT = Path("/content/drive/MyDrive/model_save")
    MODEL_SAVE_NAME = 'BirdClef2022-ResNet50V2_model.h5'
else:
    # TRAIN_AUDIO_IMAGES_SAVE_ROOT = Path("/content/drive/MyDrive/audio_images")
    TRAIN_AUDIO_IMAGES_SAVE_ROOT = Path(r"C:\Users\xuewi\Desktop\SFU\ENSC_413\audio_images")

    MODEL_SAVE_ROOT = Path(r"C:\Users\xuewi\Desktop\SFU\ENSC_413\BirdCLEF2022-Project\model_save")
    MODEL_SAVE_NAME = 'Local-ResNet50V2_model.h5'


LOAD_SAVED_MODEL = False

# Threshold for no-call detector
BIRD_CALL_PROB = 0.5

# No Call Label
NO_CALL = "no_call"

# NUM_FOLDS = 5

if COLAB:
    BATCH_SIZE = 128
else:
    BATCH_SIZE = 96
EPOCHS = 30

Some Birds Only Have A Few Training Samples and no-call will reduce their values even more

In [None]:
# ignore these birds for no-call
# samples too little to filter through no-call
NO_CALL_IGNORE = [ 'akikik', 'brnboo', 'bubsan', 'bulpet', 'coopet', 'crehon', 'ercfra', 'hawpet1', 'layalb', 'lessca', 'magpet1', 'mauala', 'pomjae', 'puaioh', 'shtsan']

Connect To TPU

In [None]:
DEVICE = "TPU" # "TPU" or "GPU"

In [None]:
# https://www.kaggle.com/code/itsuki9180/birdcall-using-tpu-train/notebook
if DEVICE == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        DEVICE = "GPU"

if DEVICE != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if DEVICE == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

Import Data

In [None]:
x_data = []
y_data= []

if COLAB:
    df = pd.read_csv('/content/drive/MyDrive/no_call_detect/nocalldetection_for_shortaudio_fold0.csv')
else:
    df = pd.read_csv(r'C:\Users\xuewi\Desktop\SFU\ENSC_413\BirdCLEF2022-Project\no_call_detect\nocalldetection_for_shortaudio_fold0.csv')
for row in tqdm(df.itertuples(False)):
    mels = np.load(str((TRAIN_AUDIO_IMAGES_SAVE_ROOT/row.filename).as_posix() + ".npy"))
    # print(mels.shape)

    # extract the calculated call probability
    temp_str = row.nocalldetection
    call_prob = [float(x) for x in temp_str.split()]

    # for each image, append each audio segment
    for i in range(len(mels)):
        x_data.append( (str((TRAIN_AUDIO_IMAGES_SAVE_ROOT/row.filename).as_posix() + ".npy"), i) )

        if (call_prob[i] >= BIRD_CALL_PROB or row.primary_label in NO_CALL_IGNORE):
            y_data.append(row.primary_label)
        else:
            y_data.append(NO_CALL)




In [None]:
print(len(x_data))
print(len(y_data))

Label Encode the Output and Save the Mappings

In [None]:
LOAD_LABELS = True

le = LabelEncoder()
if LOAD_LABELS:
    le.classes_ = np.load(MODEL_SAVE_ROOT/"classes.npy")
y_label = le.fit_transform(y_data)
le_name_mapping = dict(zip(le.classes_.astype(str), le.transform(le.classes_)))
print(le_name_mapping)

if not LOAD_LABELS:
    np.save(MODEL_SAVE_ROOT/"classes.npy", le.classes_)

In [None]:
for i in range(3):
    print(x_data[i])
    print(y_label[i])
    

Split Into Train and Validation Sets

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_data, y_label, test_size=0.2, stratify=y_data)

Checking Train / Val Split

In [None]:
print(type(y_label))
counts = np.bincount(y_label)
print(counts)


y = le.inverse_transform(y_label)
print(y)
unique, counts = np.unique(y, return_counts=True)
values = dict(zip(unique, counts))
d = dict((k, v) for k, v in values.items() if v <= 20)
print(d)

In [None]:
def normalize(image):
        image = image.astype("float32", copy=False) / 255.0
        image = np.stack([image, image, image])
        return image

In [None]:

# cache file loads?, doesn't seem to work
@lru_cache(maxsize=None)
def load_data(im_path):
    return np.load(im_path)



# https://medium.com/analytics-vidhya/write-your-own-custom-data-generator-for-tensorflow-keras-1252b64e41c3
class CustomDataGen(tf.keras.utils.Sequence):
    def __init__(self, x_data, y_data, batch_size, shuffle=True):
        self.x_data = x_data
        self.y_data = y_data
        self.batch_size = batch_size
        self.shuffle = shuffle

    def on_epoch_end(self):

        # Print Time
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print("Current Time =", current_time)

        # Shuffle Data at the End of Epoch
        if self.shuffle:
            c = list(zip(self.x_data, self.y_data))
            random.shuffle(c)
            x_data, y_data = zip(*c)
        

    def __getitem__(self, index):

        # print(f"Index {index}")
        
        x_batch = self.x_data[index * self.batch_size:(index + 1) * self.batch_size]
        y_batch = self.y_data[index * self.batch_size:(index + 1) * self.batch_size]
        # print(f"x_batch {x_batch}")

        x_images = self.__get_data(x_batch)
        y_labels = self.__get_output(y_batch)

        x_images = np.array(x_images)

        # ensure type TODO REMOVEME
        # print(type(x_images))
        # assert isinstance(x_images, (np.ndarray, np.generic))
        # assert isinstance(y_labels, (np.ndarray, np.generic))

        # print(f"x_images shape {x_images.shape}")
        return x_images, y_labels


    def __len__(self):
        return len(self.x_data) // self.batch_size

    def __get_data(self, x_batch):
        x_im = []
        for index, tup in enumerate(x_batch):
            file_name = tup[0]
            mel_num = tup[1]
            # mels = np.load(str((TRAIN_AUDIO_IMAGES_SAVE_ROOT/file_name).as_posix()))
            mels = load_data(str((TRAIN_AUDIO_IMAGES_SAVE_ROOT/file_name).as_posix()))
            norm_im = normalize(mels[mel_num])
            x_im.append(norm_im)
        return x_im


    def __get_output(self, y_batch):
        # num classes from the label encoder
        num_classes = len(le.classes_)
        # Target for 0.99 instead of 1
        return tf.keras.utils.to_categorical(y_batch, num_classes) * 0.99
        




In [None]:
# force channels-first ordering
backend.set_image_data_format('channels_first')
print(backend.image_data_format())

base_model = tf.keras.applications.resnet_v2.ResNet50V2(
    include_top=False,
    input_shape=(3, 128, 281),
    weights='imagenet',
)
x = base_model.output
# https://cv-tricks.com/keras/understand-implement-resnets/
# Global Average Pooling
x = GlobalAveragePooling2D()(x)
d1 = Dense(1024, activation='relu')(x)
d1 = Dropout(0.5)(d1)
predictions = Dense(153, activation='softmax')(d1)

model = Model(inputs=base_model.input, outputs=predictions)

opt = tf.keras.optimizers.Adam(
        learning_rate=1e-3,
        epsilon=1e-07,
      )
model.compile(opt, loss='binary_crossentropy', metrics=['accuracy'])
model.summary()



Training Callbacks

In [None]:
# https://www.kaggle.com/code/enukuro/108th-place-solution-birdcall-keras-tpu/notebook
es = tf.keras.callbacks.EarlyStopping(
              monitor='val_loss', 
              verbose=1, 
              patience=6)
sv = tf.keras.callbacks.ModelCheckpoint(
              MODEL_SAVE_ROOT/MODEL_SAVE_NAME,
              monitor='val_loss',
              verbose=1,
              save_best_only=True) #, save_weights_only=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
              monitor='val_loss', 
              verbose=1, 
              factor=0.2, 
              patience=5, 
              min_delta=0.0001, 
              cooldown=1, 
              min_lr=1e-7)

In [None]:
# x_train, x_val, y_train, y_val

traingen = CustomDataGen(x_train, y_train, batch_size = BATCH_SIZE, shuffle = True)
valgen = CustomDataGen(x_val, y_val, batch_size = BATCH_SIZE, shuffle = True)

In [47]:
STEPS_PER_EPOCH = len(x_train) // BATCH_SIZE
VALIDATION_STEP = len(x_val) // BATCH_SIZE

if LOAD_SAVED_MODEL:
    model = keras.models.load_model(MODEL_SAVE_ROOT/'BirdClef2022-ResNet50V2_model.h5')

history = model.fit(
    traingen,
    epochs = EPOCHS,
    steps_per_epoch= STEPS_PER_EPOCH,
    callbacks = [es, sv, reduce_lr],
    validation_data=valgen,
    validation_steps = VALIDATION_STEP
)



with open(MODEL_SAVE_ROOT/'trainHistoryDict', 'wb') as file_pi:
        pickle.dump(history.history, file_pi)


Epoch 00007: val_loss did not improve from 0.00755
Current Time = 13:40:06
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.00755
Current Time = 13:49:25
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.00755
Current Time = 13:57:38
Epoch 10/30

Epoch 00010: val_loss did not improve from 0.00755
Current Time = 14:05:59
Epoch 11/30

Epoch 00011: val_loss did not improve from 0.00755

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Current Time = 14:14:47
Epoch 12/30

Epoch 00012: val_loss did not improve from 0.00755
Epoch 00012: early stopping


In [49]:
# ---- display history ----
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.ylabel('accuracy')


plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('train_test_accuracy_vgg16_augmentation.png')
plt.clf() # clear figure
# summarize history for loss (binary cross-entropy)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.ylabel('binary cross-entropy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('train_test_loss_vgg16_augmentation.png')
plt.clf()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy', 'lr'])


<Figure size 432x288 with 0 Axes>