In [1]:
import tensorflow as tf

BATCH_SIZE = 32
IMG_SIZE = (224, 224)

dataset = tf.keras.utils.image_dataset_from_directory(
    r"C:\Users\ashua\Downloads\birdclef-2025\train_spectrograms",
    labels="inferred",
    label_mode="categorical",  
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42
)


Found 28564 files belonging to 206 classes.


In [7]:
import os
CLASS_NAMES = sorted(os.listdir("train_audio"))
NUM_CLASSES = len(CLASS_NAMES)


In [2]:
val_ds = dataset.take(50)
train_ds = dataset.skip(50)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)


In [9]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense


model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3),
           kernel_regularizer=regularizers.l2(0.001)),
    MaxPooling2D(2, 2),
    Dropout(0.3),

    Conv2D(64, (3, 3), activation='relu',
           kernel_regularizer=regularizers.l2(0.001)),
    MaxPooling2D(2, 2),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu',
          kernel_regularizer=regularizers.l2(0.001)),
    Dropout(0.5),

    Dense(NUM_CLASSES, activation='softmax')
])


model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True)


history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    callbacks=[early_stop, lr_reduce, checkpoint]
)


Epoch 1/20
[1m843/843[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m939s[0m 1s/step - accuracy: 0.0240 - loss: 34.0291 - val_accuracy: 0.0394 - val_loss: 5.7137 - learning_rate: 0.0010
Epoch 2/20
[1m843/843[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m901s[0m 1s/step - accuracy: 0.0341 - loss: 5.6167 - val_accuracy: 0.0406 - val_loss: 5.3528 - learning_rate: 0.0010
Epoch 3/20
[1m843/843[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m909s[0m 1s/step - accuracy: 0.0340 - loss: 5.2934 - val_accuracy: 0.0413 - val_loss: 5.1321 - learning_rate: 0.0010
Epoch 4/20
[1m843/843[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m922s[0m 1s/step - accuracy: 0.0345 - loss: 5.0888 - val_accuracy: 0.0400 - val_loss: 4.9782 - learning_rate: 0.0010
Epoch 5/20
[1m843/843[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m887s[0m 1s/step - accuracy: 0.0337 - loss: 4.9530 - val_accuracy: 0.0388 - val_loss: 4.8825 - learning_rate: 0.0010
Epoch 6/20
[1m843/843[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [10]:

model.save("birdclef_cnn_model.keras")





In [1]:
from tensorflow.keras.models import load_model

model = load_model("birdclef_cnn_model.keras")
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [7]:

import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from warnings import filterwarnings
filterwarnings("ignore")



AUDIO_PATH = r"C:\Users\ashua\Downloads\birdclef-2025\train_audio\wbwwre1\iNat45372.ogg" 
TEMP_IMG_PATH = "temp_test.png"
MODEL_PATH = "birdclef_cnn_model.keras"
CLASS_NAMES = sorted(os.listdir(r"C:\Users\ashua\Downloads\birdclef-2025\train_spectrograms")) 



def audio_to_image(audio_path, out_path):
    try:
        y, sr = librosa.load(audio_path, sr=None, duration=60.0)
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        S_DB = librosa.power_to_db(S, ref=np.max)

        fig, ax = plt.subplots(figsize=(2.24, 2.24))
        ax.axis('off')
        librosa.display.specshow(S_DB, sr=sr, ax=ax)
        fig.savefig(out_path, bbox_inches='tight', pad_inches=0, dpi=100)
        plt.close(fig)
        return out_path
    except Exception as e:
        print(f"❌ Failed to generate spectrogram: {e}")
        return None


model = load_model(MODEL_PATH)



def predict_from_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)
    idx = np.argmax(pred)
    return CLASS_NAMES[idx]


img_path = audio_to_image(AUDIO_PATH, TEMP_IMG_PATH)

if img_path:
    predicted_label = predict_from_image(img_path)
    print(f"✅ Predicted Bird Species: {predicted_label}")
else:
    print("⚠️ Prediction skipped due to spectrogram failure.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 272ms/step
✅ Predicted Bird Species: grekis
