In [15]:
#Importing necessary libraries
import pandas as pd
import numpy as np 
from matplotlib import pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
import librosa
import os
import librosa.display
from skimage.io import imread
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

In [11]:
import pandas as pd

# Load the CSV file
metadata_path = "C:/Users/19147/Downloads/archive (12)/bird_songs_metadata.csv"
df = pd.read_csv(metadata_path)

# Display the first few rows of the dataframe to understand its structure
print(df.head())


       id       genus   species subspecies           name  \
0  557838  Thryomanes  bewickii        NaN  Bewick's Wren   
1  557838  Thryomanes  bewickii        NaN  Bewick's Wren   
2  557838  Thryomanes  bewickii        NaN  Bewick's Wren   
3  557838  Thryomanes  bewickii        NaN  Bewick's Wren   
4  557838  Thryomanes  bewickii        NaN  Bewick's Wren   

                recordist        country  \
0  Whitney Neufeld-Kaiser  United States   
1  Whitney Neufeld-Kaiser  United States   
2  Whitney Neufeld-Kaiser  United States   
3  Whitney Neufeld-Kaiser  United States   
4  Whitney Neufeld-Kaiser  United States   

                                  location  latitude  longitude altitude  \
0  Arlington, Snohomish County, Washington   48.0708  -122.1006      100   
1  Arlington, Snohomish County, Washington   48.0708  -122.1006      100   
2  Arlington, Snohomish County, Washington   48.0708  -122.1006      100   
3  Arlington, Snohomish County, Washington   48.0708  -122.1006 

In [12]:

def load_bird_sounds_from_csv(metadata_path, audio_files_path):
    df = pd.read_csv(metadata_path)
    bird_sounds = []
    labels = []
    sr = None  # Sampling rate will be set based on the first loaded file

    for _, row in df.iterrows():
        file_path = os.path.join(audio_files_path, row['filename'])
        if os.path.exists(file_path):
            sound, sr = librosa.load(file_path, sr=None)
            bird_sounds.append(sound)
            # Creating a label combining genus and species for more specificity
            labels.append(f"{row['genus']} {row['species']}")
        else:
            print(f"File not found: {file_path}")

    return bird_sounds, labels, sr

# Update these paths according to your setup
metadata_path = "C:/Users/19147/Downloads/archive (12)/bird_songs_metadata.csv"
audio_files_path =  "C:/Users/19147/Downloads/archive (12)/wavfiles"

bird_sounds, labels, sampling_rate = load_bird_sounds_from_csv(metadata_path, audio_files_path)


In [14]:


# Assuming a fixed length for all clips for simplicity
fixed_length = 5  # seconds
sr = 22050  # Default sampling rate from librosa
audio_length = fixed_length * sr

def process_audio_clips(bird_sounds, sr):
    processed_audio = []
    for sound in bird_sounds:
        if len(sound) > audio_length:
            sound = sound[:audio_length]
        else:
            padding = audio_length - len(sound)
            sound = np.pad(sound, (0, padding), 'constant')
        processed_audio.append(sound)
    return processed_audio

def extract_features(bird_sounds, sr):
    mfcc_features = []
    for sound in bird_sounds:
        mfcc = librosa.feature.mfcc(y=sound, sr=sr, n_mfcc=13)
        mfcc_scaled = np.mean(mfcc.T, axis=0)
        mfcc_features.append(mfcc_scaled)
    return np.array(mfcc_features)

# Process and extract features
processed_audio = process_audio_clips(bird_sounds, sr)
features = extract_features(processed_audio, sr)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(features, y_encoded, test_size=0.3, random_state=42)


In [16]:
# Initialize the model
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred)}\n")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Accuracy: 0.876459741856177

                       precision    recall  f1-score   support

Cardinalis cardinalis       0.92      0.91      0.91       311
    Melospiza melodia       0.82      0.90      0.86       372
    Mimus polyglottos       0.90      0.82      0.86       348
  Thryomanes bewickii       0.88      0.84      0.86       298
   Turdus migratorius       0.88      0.93      0.90       298

             accuracy                           0.88      1627
            macro avg       0.88      0.88      0.88      1627
         weighted avg       0.88      0.88      0.88      1627



In [17]:
def extract_mel_spectrogram_features(audio, sr, n_fft=2048, hop_length=512, n_mels=128):
    # Calculate Mel spectrogram
    S = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    S_DB = librosa.power_to_db(S, ref=np.max)
    
    # Extract statistical features from the Mel spectrogram
    features = np.array([
        np.mean(S_DB),
        np.std(S_DB),
        np.median(S_DB),
        np.min(S_DB),
        np.max(S_DB)
    ])
    return features

mel_features = extract_mel_spectrogram_features(bird_sounds[0], sampling_rate)

In [18]:
def extract_waveform_features(audio):
    # Calculate simple features from the waveform
    rms = np.mean(librosa.feature.rms(y=audio))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=audio))
    features = np.array([rms, zcr])
    return features


waveform_features = extract_waveform_features(bird_sounds[0])


In [19]:

all_features = []
for sound in bird_sounds:
    mel_features = extract_mel_spectrogram_features(sound, sampling_rate)
    waveform_features = extract_waveform_features(sound)
    combined_features = np.concatenate((mel_features, waveform_features))
    all_features.append(combined_features)

all_features = np.array(all_features)


In [20]:

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(all_features, y_encoded, test_size=0.3, random_state=42)

# Initialize and train the Random Forest model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Accuracy: 0.5802089735709896
                       precision    recall  f1-score   support

Cardinalis cardinalis       0.52      0.61      0.56       311
    Melospiza melodia       0.56      0.56      0.56       372
    Mimus polyglottos       0.64      0.63      0.64       348
  Thryomanes bewickii       0.66      0.47      0.55       298
   Turdus migratorius       0.56      0.61      0.58       298

             accuracy                           0.58      1627
            macro avg       0.59      0.58      0.58      1627
         weighted avg       0.59      0.58      0.58      1627

