## BIRDCLEF 2024 - Rami El Wazzi & Sacha Guenoun

### Partie 3 - Training

#### Objectif du projet : entrainer un réseau de neurones permettant de reconnaitre un oiseau grâce à son cri.

In [53]:
import pandas as pd
import sklearn as sk
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

### Chargement des propriétés du signal calculées en partie 2

In [54]:
X = np.load("./X.npy")
y = np.load("./y.npy")
df_train = pd.read_csv("./birdclef-2024/train_metadata.csv")[["primary_label", "filename"]]

In [55]:
X.shape

(24459, 684)

In [56]:
# concatenate X to df_train

df_train["X"] = X.tolist()

# def get_max_audio_length(file_paths):
#     max_length = 0
#     for file_path in file_paths:
#         y, sr = librosa.load(file_path, sr=None)
#         length = len(y)
#         if length > max_length:
#             max_length = length
#     return max_length

# file_paths = df_train['filename'].apply(lambda x: "./birdclef-2024/train_audio/" + x).tolist()

# max_length = get_max_audio_length(file_paths)
# print(f"The maximum length of the audio files is: {max_length} samples")
max_length = 190855200 # already calculated, takes too long to calculate


In [57]:
df_train

Unnamed: 0,primary_label,filename,X
0,asbfly,asbfly/XC134896.ogg,"[-79.97372436523438, -79.92324829101562, -79.5..."
1,asbfly,asbfly/XC164848.ogg,"[-15.896378517150879, -16.278581619262695, -16..."
2,asbfly,asbfly/XC175797.ogg,"[-58.841304779052734, -53.871551513671875, -55..."
3,asbfly,asbfly/XC207738.ogg,"[-18.35483741760254, -15.606270790100098, -16...."
4,asbfly,asbfly/XC209218.ogg,"[-14.528952598571777, -8.531251907348633, -9.6..."
...,...,...,...
24454,zitcis1,zitcis1/XC845747.ogg,"[-10.991899490356445, -12.973498344421387, -17..."
24455,zitcis1,zitcis1/XC845817.ogg,"[-9.823176383972168, -15.329643249511719, -24...."
24456,zitcis1,zitcis1/XC856176.ogg,"[-26.30453872680664, -12.45651626586914, -8.18..."
24457,zitcis1,zitcis1/XC856723.ogg,"[-24.176374435424805, -19.10907745361328, -18...."


In [58]:
len(df_train["X"]), len(df_train["X"].iloc[0])

(24459, 684)

### Après plusieurs essais tests, nous décidons d'entrainer un CNN contenant l'image du spectrogramme (partie 1) ET les propriétés du signal (partie 2) pour s'assurer la meilleure accuracy.

Chargement des images du spectrogramme

In [59]:
# function to get the rgb spectrogram of the file in filename

import librosa

def get_spectrogram(filename):
    y, sr = librosa.load(filename, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr)
    S_dB = librosa.power_to_db(S, ref=np.max)
    return S_dB

In [60]:
class CFG:
    sample_rate = 32000
    nfft = 2028
    hop_length = 512
    fmin = 20
    fmax = 16000
    img_height = 128
    img_width = 128  # Fixed width for all spectrograms
    batch_size = 16  # Batch size for processing
    data_dir = './birdclef-2024/train_audio/'
    output_dir = './processed_spectrograms/'
    seed = 42
    patience = 10
    num_classes = 182
    epochs = 50

In [61]:
# Convert string labels to categorical codes
df_train['label_code'] = pd.Categorical(df_train['primary_label']).codes

# Save the mapping for later use (if needed)
label_mapping = dict(enumerate(pd.Categorical(df_train['primary_label']).categories))


In [62]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [63]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert labels to categorical
y = to_categorical(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X.shape, y.shape)
# Define the neural network model
def create_model(input_shape, optimizer='adam'):
    model = Sequential([
        Dense(256, input_shape=(input_shape,), activation='relu'),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(y_train.shape[1], activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

(24459, 684) (24459, 182)


In [64]:

# Create and train the model
model_adam = create_model(X_train.shape[1], optimizer='adam')
# model.summary()

# Train the model
history_adam = model_adam.fit(X_train, y_train, epochs=200, batch_size=256, validation_data=(X_test, y_test))


# Evaluate the model_adam
loss_adam, accuracy_adam = model_adam.evaluate(X_test, y_test)
print(f"Adam Test Accuracy: {accuracy_adam:.4f}, Adam Test Loss: {loss_adam:.4f}")

model_nadam = create_model(X_train.shape[1], optimizer='nadam')

# Train the model

history_nadam = model_nadam.fit(X_train, y_train, epochs=200, batch_size=256, validation_data=(X_test, y_test))

# Evaluate the model_nadam

loss_nadam, accuracy_nadam = model_nadam.evaluate(X_test, y_test)

print(f"Nadam Test Accuracy: {accuracy_nadam:.4f}, Nadam Test Loss: {loss_nadam:.4f}")

model_adagrad = create_model(X_train.shape[1], optimizer='adagrad')

# Train the model

history_adagrad = model_adagrad.fit(X_train, y_train, epochs=200, batch_size=256, validation_data=(X_test, y_test))

# Evaluate the model_adagrad

loss_adagrad, accuracy_adagrad = model_adagrad.evaluate(X_test, y_test)

print(f"Adagrad Test Accuracy: {accuracy_adagrad:.4f}, Adagrad Test Loss: {loss_adagrad:.4f}")

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0247 - loss: 5.0866 - val_accuracy: 0.1081 - val_loss: 4.4001
Epoch 2/200
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0829 - loss: 4.3458 - val_accuracy: 0.1603 - val_loss: 3.9356
Epoch 3/200
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1285 - loss: 3.9693 - val_accuracy: 0.1913 - val_loss: 3.6642
Epoch 4/200
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1461 - loss: 3.7854 - val_accuracy: 0.2124 - val_loss: 3.5014
Epoch 5/200
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1676 - loss: 3.6457 - val_accuracy: 0.2179 - val_loss: 3.4225
Epoch 6/200
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1830 - loss: 3.5479 - val_accuracy: 0.2414 - val_loss: 3.3296
Epoch 7/200
[1m77/77[0m [32m━━━━━━━━━━━━━━━

In [65]:
print(f"Adam Test Accuracy: {accuracy_adam:.4f}, Adam Test Loss: {loss_adam:.4f}")
print(f"Nadam Test Accuracy: {accuracy_nadam:.4f}, Nadam Test Loss: {loss_nadam:.4f}")
print(f"Adagrad Test Accuracy: {accuracy_adagrad:.4f}, Adagrad Test Loss: {loss_adagrad:.4f}")


Adam Test Accuracy: {accuracy_adam:.4f}, Adam Test Loss: {loss_adam:.4f}
Nadam Test Accuracy: {accuracy_nadam:.4f}, Nadam Test Loss: {loss_nadam:.4f}
Adagrad Test Accuracy: {accuracy_adagrad:.4f}, Adagrad Test Loss: {loss_adagrad:.4f}
