In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from skimage.color import gray2rgb
import matplotlib.pyplot as plt

# Function to extract Mel spectrograms from audio files
def extract_mel_spectrogram(audio_file, n_mels=64, hop_length=512):
    y, sr = librosa.load(audio_file, sr=None)
    y_high_pass = librosa.effects.preemphasis(y)
    mel_spectrogram = librosa.feature.melspectrogram(y=y_high_pass, sr=sr, n_mels=n_mels, hop_length=hop_length)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return mel_spectrogram, sr, hop_length

# Path to your bird audio dataset
dataset_path = "birds_audio_dataset/"

# List of supported audio file extensions
supported_extensions = ['.wav', '.mp3', '.mp4']

# Preprocessing: Extract Mel spectrograms for each audio file in the dataset
mel_spectrograms = []
labels = []
sampling_rates = []  # List to store sampling rates
hop_lengths = []     # List to store hop lengths

for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    if os.path.isdir(class_path):
        for audio_file in os.listdir(class_path):
            file_extension = os.path.splitext(audio_file)[1].lower()  # Get the file extension
            if file_extension in supported_extensions:
                audio_path = os.path.join(class_path, audio_file)
                try:
                    mel_spec, sr, hop_length = extract_mel_spectrogram(audio_path)
                    mel_spectrograms.append(mel_spec)
                    labels.append(class_name)
                    sampling_rates.append(sr)
                    hop_lengths.append(hop_length)
                except Exception as e:
                    print(f"Error processing {audio_path}: {e}")

# Find the maximum width of all spectrograms
if mel_spectrograms:  # Check if mel_spectrograms is not empty
    max_width = max(spec.shape[1] for spec in mel_spectrograms)
else:
    max_width = 0
    print("No spectrograms found.")

with open('max_width.txt', 'w') as file:
    file.write(str(max_width))

# Pad each spectrogram to the maximum width
padded_mel_spectrograms = []
if mel_spectrograms:  # Proceed only if mel_spectrograms is not empty
    for spec in mel_spectrograms:
        pad_width = max_width - spec.shape[1]
        padded_spec = np.pad(spec, ((0, 0), (0, pad_width)), mode='constant')
        padded_mel_spectrograms.append(padded_spec)
else:
    print("No spectrograms found.")

# Convert grayscale spectrograms to RGB
rgb_mel_spectrograms = []
for spec in padded_mel_spectrograms:
    # Normalize spectrogram values to range [0, 1]
    spec_normalized = (spec - spec.min()) / (spec.max() - spec.min())
    # Convert grayscale spectrogram to RGB
    rgb_spec = gray2rgb(spec_normalized)
    rgb_mel_spectrograms.append(rgb_spec)

# Reshape the Mel spectrograms to add the channels dimension
padded_mel_spectrograms = np.array(rgb_mel_spectrograms)

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Convert lists to numpy arrays
mel_spectrograms = np.array(padded_mel_spectrograms)
encoded_labels = np.array(encoded_labels)

# Save Label Encoder
np.save("label_encoder.npy", label_encoder.classes_)

# Create directories for each class
for label in label_encoder.classes_:
    os.makedirs(os.path.join('spectrogram_dataset', label), exist_ok=True)

# Save spectrograms in corresponding directories
for spec, label, sr, hop_length in zip(mel_spectrograms, labels, sampling_rates, hop_lengths):
    save_path = os.path.join('spectrogram_dataset', label, f'spectrogram_{label}.png')
    librosa.display.specshow(spec, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel')
    plt.savefig(save_path)
    plt.close()
