In [1]:
import numpy as np
import pandas as pd
import wave
from scipy.io import wavfile
import os
import librosa
from librosa.feature import melspectrogram
import warnings
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from PIL import Image
from uuid import uuid4
import sklearn
from tqdm import tqdm
import IPython.display as ipd

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, Rescaling
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, LSTM, SimpleRNN, Conv1D, Input, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

seed = 30
tf.random.set_seed(seed)
np.random.seed(seed)

**Read files and choose part of them**

In [18]:
train_df = pd.read_csv('../input/birdclef-2021/train_metadata.csv')
train_df = train_df.query("rating>=5")
birds_count = {}
for bird_species, count in zip(train_df.primary_label.unique(), train_df.groupby("primary_label")["primary_label"].count().values):
    birds_count[bird_species] = count
chosen_birds = [key for key,value in birds_count.items() if value in range(50,70)]
train_df = train_df.query("primary_label in @chosen_birds")
train_df = shuffle(train_df)
train_df.primary_label.unique()

**Dataset Separation**

In [22]:
training_percentage = 0.8
training_item_count = int(len(train_df)*0.8)
validation_item_count = int(len(train_df)*0.1)
test_item_count = int(len(train_df)*0.1)
training_df = train_df[:training_item_count]
validation_df = train_df[training_item_count:training_item_count+validation_item_count]
test_df = train_df[training_item_count+validation_item_count:]

**Rean an audio file as an example**

In [24]:
wav, sr = librosa.load("../input/birdclef-2021/train_short_audio/amecro/XC109768.ogg")

**Function of drawing images**

In [25]:
def plot_time_series(data):
    fig = plt.figure(figsize=(14, 8))
    plt.title('Raw wave ')
    plt.ylabel('Amplitude')
    plt.plot(np.linspace(0, 1, len(data)), data)
    plt.show()

**Time Stretch**

In [26]:
def stretch(data, rate=1):
    input_length = sr
    data = librosa.effects.time_stretch(data, rate)
    data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
    return data

data_stretch =stretch(wav, 1.2)
ipd.Audio(data_stretch, rate=sr)
plot_time_series(data_stretch)

**White noise**

In [29]:
wn = np.random.randn(len(wav))
wav_wn = wav + 0.01*wn
ipd.Audio(wav_wn, rate=sr)
plot_time_series(wav_wn)

**Pitch shifting**

In [30]:
wav_p = librosa.effects.pitch_shift(wav, sr, 4)
ipd.Audio(wav_p, rate=sr)
plot_time_series(wav)
plot_time_series(wav_p)

**The function of getting image samples from audios**

In [None]:
def get_sample(filename, bird, output_folder):
    wave_data, wave_rate = librosa.load(filename)
    wave_data, _ = librosa.effects.trim(wave_data)
    ## Data augmentation part
    wave_data = stretch(wave_data, 1.2)
    wn = np.random.randn(len(wave_data))
    wave_data = wave_data + 0.01*wn
    wave_data = librosa.effects.pitch_shift(wave_data, wave_rate, 4)
    
    song_sample = []
    sample_length = 5*wave_rate
    samples_from_file = []
    N_mels=216
    for idx in range(0,len(wave_data),sample_length): 
        song_sample = wave_data[idx:idx+sample_length]
        if len(song_sample)>=sample_length:
            mel = melspectrogram(song_sample, n_mels=N_mels)
            db = librosa.power_to_db(mel)
            normalised_db = sklearn.preprocessing.minmax_scale(db)
            filename = str(uuid4())+".jpg"
            db_array = (np.asarray(normalised_db)*255).astype(np.uint8)
            db_image =  Image.fromarray(np.array([db_array, db_array, db_array]).T)
            db_image.save("{}{}".format(output_folder,filename))
            
            samples_from_file.append({"song_sample":"{}{}".format(output_folder,filename),
                                            "db":db_array,"bird":bird})
    return samples_from_file

**Transform all audios into images**

In [None]:
warnings.filterwarnings("ignore")
train_samples = pd.DataFrame(columns=["song_sample","bird"])
train_list = []

output_folder = "/kaggle/working/melspectrogram/"
os.mkdir(output_folder)
output_folder += "train/"
os.mkdir(output_folder)
with tqdm(total=len(training_df)) as pbar:
    for idx, row in training_df.iterrows():
        pbar.update(1)
        try:
            audio_file_path = "../input/birdclef-2021/train_short_audio/"
            audio_file_path += row.primary_label
            if row.primary_label in birds_to_recognise:
                outf = output_folder + row.primary_label + "/"
                if os.path.isdir(outf) == False:
                    os.mkdir(outf)
                train_list += get_sample('{}/{}'.format(audio_file_path, row.filename), row.primary_label, outf) 
        except:
            raise
            print("{} is corrupted".format(audio_file_path))
            
train_samples = pd.DataFrame(train_list)

In [None]:
warnings.filterwarnings("ignore")
validation_samples = pd.DataFrame(columns=["song_sample","bird"])
validation_list = []

output_folder = "/kaggle/working/melspectrogram/validation/"
os.mkdir(output_folder)
with tqdm(total=len(validation_df)) as pbar:
    for idx, row in validation_df.iterrows():
        pbar.update(1)
        try:
            audio_file_path = "../input/birdclef-2021/train_short_audio/"
            audio_file_path += row.primary_label
            if row.primary_label in birds_to_recognise:
                outf = output_folder + row.primary_label + "/"
                if os.path.isdir(outf) == False:
                    os.mkdir(outf)
                validation_list += get_sample('{}/{}'.format(audio_file_path, row.filename), row.primary_label, outf) 
        except:
            raise
            print("{} is corrupted".format(audio_file_path))
            
validation_samples = pd.DataFrame(validation_list)

In [None]:
warnings.filterwarnings("ignore")
test_samples = pd.DataFrame(columns=["song_sample","bird"])
test_list = []

output_folder = "/kaggle/working/melspectrogram/test/"
os.mkdir(output_folder)
with tqdm(total=len(test_df)) as pbar:
    for idx, row in test_df.iterrows():
        pbar.update(1)
        try:
            audio_file_path = "../input/birdclef-2021/train_short_audio/"
            audio_file_path += row.primary_label
            if row.primary_label in birds_to_recognise:
                outf = output_folder + row.primary_label + "/"
                if os.path.isdir(outf) == False:
                    os.mkdir(outf)
                test_list += get_sample('{}/{}'.format(audio_file_path, row.filename), row.primary_label, outf) 
        except:
            raise
            print("{} is corrupted".format(audio_file_path))
            
test_samples = pd.DataFrame(test_list)

**Read all the images as dataset**

In [None]:
data_dir = "/kaggle/working/melspectrogram/train/"
batch_size = 32
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  seed=123,
  image_size=(216, 216),
  batch_size=batch_size)

In [None]:
data_dir_val = "/kaggle/working/melspectrogram/validation/"
batch_size = 32
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir_val,
  seed=123,
  image_size=(216, 216),
  batch_size=batch_size)

In [None]:
data_dir_test = "/kaggle/working/melspectrogram/test/"
batch_size = 32
test_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir_test,
  seed=123,
  image_size=(216, 216),
  batch_size=batch_size)

In [32]:
class_names = val_ds.class_names

**For training, choose one of 3 following models**

In [None]:
## Baseline Model
num_classes = len(class_names)
model = Sequential([
  layers.Rescaling(1./255, input_shape=(216,216, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.4),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

In [None]:
## InceptionV3 Model
input_shape = (299,299,3)
effnet_layers = InceptionV3(weights=None, include_top=False, input_shape=input_shape)

for layer in effnet_layers.layers:
    layer.trainable = True

dropout_dense_layer = 0.3

model = Sequential()
model.add(Rescaling(1./255, input_shape=input_shape))
model.add(effnet_layers)
model.add(GlobalAveragePooling2D())
model.add(Dense(256, use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(dropout_dense_layer))
model.add(Dense(37, activation="softmax"))

In [None]:
## EfficientNetB0 Model
input_shape = (216,216,3)
effnet_layers = EfficientNetB0(weights=None, include_top=False, input_shape=input_shape)
for layer in effnet_layers.layers:
    layer.trainable = True
dropout_dense_layer = 0.3

model = Sequential()
model.add(Rescaling(1./255, input_shape=(216,216, 3)))
model.add(effnet_layers)
model.add(GlobalAveragePooling2D())
model.add(Dense(256, use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(dropout_dense_layer))
model.add(Dense(len(train_df.primary_label.unique()), activation="softmax"))

**Training Part**

In [None]:
callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.7),
             EarlyStopping(monitor='val_loss', patience=5),
             ModelCheckpoint(filepath='model.h5', monitor='val_loss', save_best_only=True)]
model.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=['accuracy'])
epoch = 25
history = model.fit(train_ds,
          epochs = epoch, 
          validation_data=val_ds,
          callbacks = callbacks)

**History Plot**

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

**Test Accuracy**

In [None]:
evaluate = model.evaluate(test_ds)
print(evaluate)