In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Function to extract Mel spectrograms from audio files
def extract_mel_spectrogram(audio_file, n_mels=64, hop_length=512):
    y, sr = librosa.load(audio_file, sr=None)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, hop_length=hop_length)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return mel_spectrogram

# Path to your bird audio dataset
dataset_path = "birds_audio_dataset/"

# List of supported audio file extensions
supported_extensions = ['.wav', '.mp3', '.mp4']

# Preprocessing: Extract Mel spectrograms for each audio file in the dataset
mel_spectrograms = []
labels = []

for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    if os.path.isdir(class_path):
        for audio_file in os.listdir(class_path):
            file_extension = os.path.splitext(audio_file)[1].lower()  # Get the file extension
            if file_extension in supported_extensions:
                audio_path = os.path.join(class_path, audio_file)
                try:
                    mel_spec = extract_mel_spectrogram(audio_path)
                    mel_spectrograms.append(mel_spec)
                    labels.append(class_name)
                except Exception as e:
                    print(f"Error processing {audio_path}: {e}")

# Find the maximum width of all spectrograms
max_width = max(spec.shape[1] for spec in mel_spectrograms)

with open('max_width.txt', 'w') as file:
    file.write(str(max_width))

# Pad each spectrogram to the maximum width
padded_mel_spectrograms = []
for spec in mel_spectrograms:
    pad_width = max_width - spec.shape[1]
    padded_spec = np.pad(spec, ((0, 0), (0, pad_width)), mode='constant')
    padded_mel_spectrograms.append(padded_spec)

# Reshape the Mel spectrograms to add the channels dimension
padded_mel_spectrograms = np.array(padded_mel_spectrograms)

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Save Label Encoder
np.save("label_encoder.npy", label_encoder.classes_)

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(padded_mel_spectrograms, encoded_labels, test_size=0.2, random_state=42)

# Define Multilayer Perceptron model
model_mlp = Sequential([
    Flatten(input_shape=(64, max_width)),  # Flatten the input spectrograms
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model_mlp.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define callbacks
checkpoint = ModelCheckpoint("mlp_birds.h5", monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model_mlp.fit(X_train, y_train,
                         epochs=3,
                         batch_size=8,
                         validation_data=(X_val, y_val),
                         callbacks=[checkpoint, early_stopping])

# Save the trained model
model_mlp.save("mlp_model.h5")
