# Bird Species Classification using TensorFlow Hub Model

This code performs bird species classification in audio recordings using a TensorFlow Hub model for bird vocalization classification. It uses the BirdCLEF 2023 dataset and employs various functions to load audio files, load the model and labels, frame audio data, resample waveforms, and make predictions.

## Functionality

- **Load Audio Files**: The `load_audio_files()` function loads and plays audio files from the specified directory.
- **Load Model**: The `load_model()` function loads the TensorFlow Hub model and its corresponding labels.
- **Class Names from CSV**: The `class_names_from_csv()` function retrieves the list of class names corresponding to the score vector.
- **Load Train Metadata**: The `load_train_metadata()` function loads the training metadata, specifically the unique primary labels.
- **Frame Audio**: The `frame_audio()` function frames audio data into fixed-size windows for inference.
- **Ensure Sample Rate**: The `ensure_sample_rate()` function ensures that the waveform has the desired sample rate.
- **Predict for Sample**: The `predict_for_sample()` function predicts the class probabilities for each window of the audio sample.
- **Main Function**: The `main()` function orchestrates the entire process, including loading audio files, making predictions, and generating the submission file.

## TensorFlow Hub Model - Bird Vocalization Classifier

The code utilizes a pre-trained TensorFlow Hub model for bird vocalization classification. The model path and label CSV file are specified to load the model and labels.

Important Parameters:
- `model_path`: The path to the TensorFlow Hub model for bird vocalization classification.
- `labels_path`: The path to the label CSV file corresponding to the model.

       +-------------------------+
       | TensorFlow Hub Model   |
       | (Bird Vocalization     |
       | Classification Model)  |
       +----------^--------------+
                  |
                  |
       +----------v--------------+
       |    Label CSV File       |
       +-------------------------+


# Import libraries


In [1]:
import numpy as np
import pandas as pd
import librosa
import glob
import csv
import tensorflow as tf
import tensorflow_hub as hub
from scipy.signal import resample
from IPython.display import Audio

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


# Function to load and play audio files


In [2]:
def load_audio_files():
    # Loading and playing audio files
    path = "/kaggle/input/birdclef-2023/train_audio/"
    bird_species = ["abethr1/XC128013.ogg", "abhori1/XC127317.ogg", "edcsun3/XC470591.ogg"]
    
    for bird in bird_species:
        audio, sr = librosa.load(path + bird)
        print(Audio(data=audio, rate=sr))

# Function to load the model and labels


In [3]:
def load_model():
    # Load the model and labels
    model_path = '/kaggle/input/bird-vocalization-classifier/tensorflow2/bird-vocalization-classifier/2'
    model = hub.load(model_path)
    labels_path = hub.resolve(model_path) + "/assets/label.csv"
    
    return model, labels_path

# Function to retrieve class names from a CSV file


In [4]:
def class_names_from_csv(class_map_csv_text):
    # Returns list of class names corresponding to score vector
    with open(class_map_csv_text) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        class_names = [mid for mid, desc in csv_reader][1:]
    return class_names

# Function to load the competition classes from the train metadata


In [5]:
def load_train_metadata():
    # Load train metadata
    train_metadata = pd.read_csv("/kaggle/input/birdclef-2023/train_metadata.csv")
    competition_classes = sorted(train_metadata.primary_label.unique())
    return competition_classes

# Function to frame audio into fixed-size windows


In [6]:
def frame_audio(audio_array: np.ndarray, window_size_s: float = 5.0, hop_size_s: float = 5.0, sample_rate=32000) -> np.ndarray:
    # Framing audio for inference
    if window_size_s is None or window_size_s < 0:
        return audio_array[np.newaxis, :]
    frame_length = int(window_size_s * sample_rate)
    hop_length = int(hop_size_s * sample_rate)
    framed_audio = tf.signal.frame(audio_array, frame_length, hop_length, pad_end=True)
    return framed_audio

# Function to ensure the desired sample rate for audio


In [7]:
def ensure_sample_rate(waveform, original_sample_rate, desired_sample_rate=32000):
    if original_sample_rate != desired_sample_rate:
        num_samples = len(waveform)
        original_duration = num_samples / original_sample_rate
        time_old = np.linspace(0, original_duration, num_samples)
        time_new = np.linspace(0, original_duration, int(num_samples * desired_sample_rate / original_sample_rate))
        waveform = resample(waveform, int(num_samples * desired_sample_rate / original_sample_rate))
        sample_rate = desired_sample_rate
    return sample_rate, waveform

# Function to predict class probabilities for a sample


In [8]:
def predict_for_sample(filename, sample_submission, frame_limit_secs=None):
    # Get the file ID from the filename
    file_id = filename.split(".ogg")[0].split("/")[-1]

    # Load the audio data and ensure the correct sample rate
    audio, sample_rate = librosa.load(filename)
    sample_rate, wav_data = ensure_sample_rate(audio, sample_rate)

    # Frame the audio data into fixed-size windows
    fixed_tm = frame_audio(wav_data)

    # Load the model and labels
    model, labels_path = load_model()
    classes = class_names_from_csv(labels_path)
    competition_classes = load_train_metadata()
    competition_class_map = [classes.index(c) if c in classes else 0 for c in competition_classes]

    # Predict the class probabilities for each window
    frame = 5
    all_logits, all_embeddings = model.infer_tf(fixed_tm[:1])
    for window in fixed_tm[1:]:
        if frame_limit_secs and frame > frame_limit_secs:
            continue

        # Infer the logits and embeddings for the current window
        logits, embeddings = model.infer_tf(window[np.newaxis, :])

        # Append the logits to the array of all logits
        all_logits = np.concatenate([all_logits, logits], axis=0)

        # Increment the frame counter
        frame += 5

    # Convert the logits to probabilities and update the sample submission
    frame = 5
    for frame_logits in all_logits:
        probabilities = tf.nn.softmax(frame_logits).numpy()

        # set the appropriate row in the sample submission
        sample_submission.loc[sample_submission.row_id == file_id + "_" + str(frame), competition_classes] = probabilities[competition_class_map]

        # Increment the frame counter
        frame += 5

# Main function


In [9]:
def main():
    # Load the audio files
    load_audio_files()

    # Load the test samples
    test_samples = glob.glob("/kaggle/input/birdclef-2023/test_soundscapes/*.ogg")

    # Load the sample submission file
    sample_sub = pd.read_csv("/kaggle/input/birdclef-2023/sample_submission.csv")

    # Load competition classes
    competition_classes = load_train_metadata()

    # Set the desired frame limit in seconds
    frame_limit_secs = 15 if sample_sub.shape[0] == 3 else None

    # Iterate over test samples and make predictions
    for sample_filename in test_samples:
        predict_for_sample(sample_filename, sample_sub, frame_limit_secs=frame_limit_secs)

    # Save the submission file
    sample_sub.to_csv("submission.csv", index=False)
