In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import optimizers
from sklearn.model_selection import train_test_split

In [9]:
# Function to extract Mel spectrograms from audio files
def extract_features(file_path, max_length=216):  # Define a max_length suitable for your dataset
    audio, sample_rate = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
    mel_db = librosa.power_to_db(mel, ref=np.max)

    # Pad or truncate MFCCs
    if mfccs.shape[1] < max_length:
        pad_width = max_length - mfccs.shape[1]
        mfccs = np.pad(mfccs, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mfccs = mfccs[:, :max_length]

    # Pad or truncate Mel spectrogram
    if mel_db.shape[1] < max_length:
        pad_width = max_length - mel_db.shape[1]
        mel_db = np.pad(mel_db, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_db = mel_db[:, :max_length]

    return mfccs, mel_db

def plot_features(mfccs, mel_db, title):
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    librosa.display.specshow(mfccs, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')

    plt.subplot(1, 2, 2)
    librosa.display.specshow(mel_db, x_axis='time', y_axis='mel', sr=sample_rate)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')

    plt.suptitle(title)
    plt.tight_layout()
    plt.show()



In [10]:
base_path = '/content/drive/MyDrive/Colab Notebooks/Birds_Sounds'
classes = ['Brown_Tinamou', 'Cinereous_Tinamou', 'Great_Tinamou']

data = []
labels = []

for label in classes:
    folder_path = os.path.join(base_path, label)
    for file in os.listdir(folder_path):
        if file.endswith('.mp3'):
            file_path = os.path.join(folder_path, file)
            mfccs, mel_db = extract_features(file_path)
            data.append((mfccs, mel_db))
            labels.append(label)



In [11]:
from sklearn.preprocessing import LabelEncoder

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)

# Convert data to numpy arrays
mfccs_data = np.array([x[0] for x in data])
mel_data = np.array([x[1] for x in data])

# Split the dataset
X_train_mfcc, X_test_mfcc, y_train, y_test = train_test_split(mfccs_data, y, test_size=0.2, random_state=42)
X_train_mel, X_test_mel, _, _ = train_test_split(mel_data, y, test_size=0.2, random_state=42)


In [12]:
def build_model(input_shape):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(3, activation='softmax')
    ])
    return model

input_shape = (mfccs_data.shape[1], mfccs_data.shape[2], 1)  # Adjust shape as needed

model = build_model(input_shape)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

X_train_mfcc = X_train_mfcc[..., np.newaxis]  # Add channel dimension
X_test_mfcc = X_test_mfcc[..., np.newaxis]

history = model.fit(X_train_mfcc, y_train, epochs=20, validation_data=(X_test_mfcc, y_test))




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [13]:
test_loss, test_acc = model.evaluate(X_test_mfcc, y_test, verbose=2)
print(f'Test accuracy: {test_acc:.2f}')


1/1 - 0s - loss: 0.5456 - accuracy: 0.7931 - 77ms/epoch - 77ms/step
Test accuracy: 0.79


In [16]:
def predict_bird_species(file_path):
    mfccs, _ = extract_features(file_path)
    mfccs = mfccs[..., np.newaxis]
    mfccs = np.expand_dims(mfccs, axis=0)  # Add batch dimension
    predictions = model.predict(mfccs)
    predicted_label = np.argmax(predictions, axis=1)
    return label_encoder.inverse_transform(predicted_label)[0]

# Example usage
file_path = '/content/drive/MyDrive/Colab Notebooks/Birds_Sounds/Cinereous_Tinamou/XC387680 - Cinereous Tinamou - Crypturellus cinereus.mp3'
print(predict_bird_species(file_path))


Cinereous_Tinamou


In [17]:
model.save('bird_sound_classification_model2.h5')

  saving_api.save_model(
