In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import librosa
from skimage.color import gray2rgb
from skimage.transform import resize
from pydub import AudioSegment

# Load the trained model
model = load_model("birds_classifier_model.h5")

# Load the Label Encoder
label_encoder = LabelEncoder()
label_encoder.classes_ = np.load('label_encoder.npy', allow_pickle=True)

with open('max_width.txt', 'r') as file:
    max_width = int(file.read().strip())

# Function to preprocess input audio
def preprocess_audio(audio_path):
    SR = 16000  # Sample rate
    DURATION = 5  # Duration of audio clips in seconds
    audio, _ = librosa.load(audio_path, sr=SR, duration=DURATION, mono=True)
    # Convert audio to spectrogram
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=SR)
    return np.expand_dims(spectrogram, axis=0)  # Add batch dimension

# Function to convert MP3 to WAV
def convert_mp3_to_wav(mp3_file, wav_file):
    audio = AudioSegment.from_mp3(mp3_file)
    audio.export(wav_file, format="wav")

# Function to predict bird species from audio file
def predict_bird_species(audio_file):
    # Convert MP3 to WAV
    wav_file = audio_file.replace('.mp3', '.wav')
    convert_mp3_to_wav(audio_file, wav_file)
    # Preprocess input audio file
    processed_input = preprocess_audio(wav_file)
    # Make prediction
    predictions = model.predict(processed_input)
    # Decode predicted label
    predicted_label = label_encoder.inverse_transform([np.argmax(predictions)])[0]
    return predicted_label

# Example usage
audio_file_path = "birds_audio_dataset\egyptian goose\XC838177 - Egyptian Goose - Alopochen aegyptiaca.wav"
predicted_species = predict_bird_species(audio_file_path)
print("Predicted bird species:", predicted_species)
