In [1]:
import os
import re
import sys
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import soundfile as sf
import tensorflow as tf
from collections import defaultdict
from scipy.special import softmax
from scipy.stats import entropy
from scipy.signal import spectrogram
import gc



In [2]:
# -------------------- Load GPU Delegate --------------------
delegate = None
try:
    import tflite_runtime.interpreter as tflite
except ModuleNotFoundError:
    from tensorflow import lite as tflite

try:
    delegate = tf.lite.experimental.load_delegate("libtensorflowlite_gpu_delegate.so")
    print("GPU delegate loaded successfully.")
except Exception as e:
    print("GPU delegate not available:", e)

# -------------------- Patch Interpreter BEFORE importing wrapper --------------------
if not hasattr(tflite, "_original_interpreter"):
    tflite._original_interpreter = tflite.Interpreter

    def Interpreter_with_delegate(*args, **kwargs):
        if delegate is not None:
            kwargs["experimental_delegates"] = [delegate]
        return tflite._original_interpreter(*args, **kwargs)

    tflite.Interpreter = Interpreter_with_delegate

Exception ignored in: <function Delegate.__del__ at 0x15a279d30>
Traceback (most recent call last):
  File "/Users/rachit/Documents/Python shit/.venv/lib/python3.9/site-packages/tensorflow/lite/python/interpreter.py", line 121, in __del__
    if self._library is not None:
AttributeError: 'Delegate' object has no attribute '_library'


GPU delegate not available: dlopen(libtensorflowlite_gpu_delegate.so, 0x0006): tried: 'libtensorflowlite_gpu_delegate.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibtensorflowlite_gpu_delegate.so' (no such file), '/usr/lib/libtensorflowlite_gpu_delegate.so' (no such file, not in dyld cache), 'libtensorflowlite_gpu_delegate.so' (no such file)


In [7]:
# -------------------- Setup Paths --------------------
DATASET_PATH = "/Users/rachit/Documents/Python shit/SPOT1/recordings"
STATIC_NOISE_PATH = "/Users/rachit/Documents/Python shit/StaticNoise/Untitled video - Made with Clipchamp.wav"
WRAPPER_PATH = "/Users/rachit/Documents/Python shit/BirdNET-Analyzer-main"
sys.path.append(WRAPPER_PATH)
import birdnet_analyzer__wrapper as birdnet

TARGET_SR = 48000
WINDOW_DURATION = 30.0  # seconds per prediction window
WINDOW_SAMPLES = int(TARGET_SR * WINDOW_DURATION)
OUTPUT_CSV = "classified_birdnet_per_file_results.csv"

ModuleNotFoundError: No module named 'birdnet_analyzer__wrapper'

In [4]:
def extract_year_month_date_hour_and_minute(filename):
    """Extracts hour and minute from filenames like '2MM07103_20250330_143000.wav'."""
    match_date = re.search(r'_(\d{8})_', filename)
    match = re.search(r'_(\d{6})\.wav$', filename)
    if match and match_date:
        time_str = match.group(1)
        date_str = match_date.group(1)
        year = date_str[:4]
        month = date_str[4:6]
        date = date_str[6:]
        hour = int(time_str[:2])
        minute = int(time_str[2:4])
        return year, month, date, hour, minute
    return None, None, None, None, None

def segment_audio(audio, fs=TARGET_SR, segment_duration=WINDOW_DURATION):
    """
    Splits audio into non-overlapping segments of WINDOW_DURATION seconds.
    """
    segment_samples = int(segment_duration * fs)
    n_segments = len(audio) // segment_samples
    if n_segments < 1:
        return None
    audio = audio[:n_segments * segment_samples]
    segments = audio.reshape((n_segments, segment_samples))
    return segments

def remove_static_noise(audio, noise_ref, sr=TARGET_SR, snr_db=18):
    """
    Combines time-domain noise subtraction and spectral gating for static noise removal.
    
    1. Time-Domain Subtraction:
       - If noise_ref is shorter than audio, pad it using 'wrap' mode (repeat the noise).
       - Scale the noise_ref based on the desired SNR and subtract it from the audio.
    
    2. Spectral Gating:
       - Computes the STFT of the time-domain subtracted audio.
       - Estimates a noise threshold from the noise_ref’s STFT.
       - Zeroes out frequency bins below the threshold.
       - Reconstructs the audio using inverse STFT.
    """
    # --- Time-Domain Subtraction ---
    if len(noise_ref) > len(audio):
        noise_ref = noise_ref[:len(audio)]
    else:
        noise_ref = np.pad(noise_ref, (0, len(audio) - len(noise_ref)), 'wrap')
    
    audio_power = np.mean(audio ** 2)
    noise_power = np.mean(noise_ref ** 2)
    desired_noise_power = audio_power / (10 ** (snr_db / 10))
    noise_ref_scaled = noise_ref * np.sqrt(desired_noise_power / noise_power)
    audio_td = audio - noise_ref_scaled

    # --- Spectral Gating ---
    stft = librosa.stft(audio_td, n_fft=2048, hop_length=512)
    magnitude, phase = np.abs(stft), np.angle(stft)
    noise_stft = librosa.stft(noise_ref, n_fft=2048, hop_length=512)
    noise_mag = np.abs(noise_stft)
    noise_threshold = np.mean(noise_mag, axis=1, keepdims=True) * 1.2
    gated_mag = np.where(magnitude > noise_threshold, magnitude, 0)
    cleaned_stft = gated_mag * np.exp(1j * phase)
    audio_cleaned = librosa.istft(cleaned_stft, hop_length=512)
    return audio_cleaned

def compute_acoustic_indices(y, sr):
    """
    Computes acoustic indices from the audio segment:
      - ADI (Acoustic Diversity Index): Based on Shannon entropy over frequency bins.
      - ACI (Acoustic Complexity Index): Based on spectral flux.
      - AEI (Acoustic Evenness Index): 1 - (normalized entropy).
      - NDSI (Normalized Difference Soundscape Index): Ratio between bio and anthropogenic energy.
    """
    f, t, Sxx = spectrogram(y, fs=sr, nperseg=1024, noverlap=512)
    Sxx = Sxx + 1e-8  # Avoid log(0)
    S_norm = Sxx / np.sum(Sxx, axis=0, keepdims=True)
    ADI = np.mean(entropy(S_norm, axis=0))
    AEI = 1.0 - (ADI / np.log(Sxx.shape[0]))
    delta = np.abs(np.diff(Sxx, axis=1))
    ACI_vals = np.sum(delta, axis=1) / (np.sum(Sxx[:, :-1], axis=1) + 1e-8)
    ACI_total = np.mean(ACI_vals)
    bio = np.logical_and(f >= 2000, f <= 11000)
    anthro = np.logical_and(f >= 100, f <= 2000)
    B = np.sum(Sxx[bio])
    A = np.sum(Sxx[anthro])
    NDSI = (B - A) / (B + A + 1e-8)
    return ADI, ACI_total, AEI, NDSI

def classify_audio_file(filepath, model, noise_clip):
    """
    Loads the audio file, applies combined noise removal, segments it into 3-second windows,
    gets BirdNET predictions, and computes acoustic indices per segment.
    Returns a list of dictionaries (one per segment).
    """
    audio, sr = librosa.load(filepath, sr=TARGET_SR)
    audio_denoised = remove_static_noise(audio, noise_clip)
    segments = segment_audio(audio_denoised)
    if segments is None:
        return []
    
    predictions = []
    for i, segment in enumerate(segments):
        try:
            segment_input = segment.reshape(1, -1)
            df = model.predict(segment_input, samplerate=TARGET_SR, strict=True)
            scores = df.iloc[0].values
            if np.any(scores < 0):
                scores = softmax(scores)
                df.iloc[0] = scores
            top_idx = np.argmax(df.values)
            species = df.columns[top_idx]
            confidence = df.values[0][top_idx]
            ADI, ACI, AEI, NDSI = compute_acoustic_indices(segment.flatten(), sr)
            predictions.append({
                'Bird Species': species,
                'Confidence': confidence,
                'Segment': i,
                'ADI': ADI,
                'ACI': ACI,
                'AEI': AEI,
                'NDSI': NDSI
            })
        except Exception as e:
            print(f"Prediction failed for {filepath} segment {i}:", e)
    return predictions



NameError: name 'TARGET_SR' is not defined

In [None]:
# -------------------- Main Execution --------------------
DATASET_SUBFOLDER = "/kaggle/input/rupa12/2"

all_results = []
model = birdnet.Model(class_output=True)
# Pre-load the static noise clip.
noise_clip, _ = librosa.load(STATIC_NOISE_PATH, sr=TARGET_SR)

for filename in sorted(os.listdir(DATASET_SUBFOLDER)):
    if filename.lower().endswith(".wav"):
        year, month, date, hour, minute = extract_hour_and_minute(filename)
        filepath = os.path.join(DATASET_SUBFOLDER, filename)
        print(f"Processing {filename} (Hour: {hour}, Minute: {minute}) ...")
        preds = classify_audio_file(filepath, model, noise_clip)
        for pred in preds:
            all_results.append({
                "Filename": filename,
                "Year": year,
                "Month": month,
                "Date": date,
                "Hour": hour,
                "Minute": minute,
                "Second": (pred["Segment"]+1)*WINDOW_DURATION
                "Segment": pred["Segment"],
                "Bird Species": pred["Bird Species"],
                "Confidence": pred["Confidence"],
                "ADI": pred["ADI"],
                "ACI": pred["ACI"],
                "AEI": pred["AEI"],
                "NDSI": pred["NDSI"]
            })


results_df = pd.DataFrame(all_results)
results_df.to_csv(OUTPUT_CSV, index=False)
print("Classification results saved to", OUTPUT_CSV)
print(results_df.head())

In [None]:
# -------------------- Boxplot Visualization --------------------
# Create boxplots for each index (Confidence, ADI, ACI, AEI, NDSI) by Hour.
for idx in ["Confidence", "ADI", "ACI", "AEI", "NDSI"]:
    plt.figure(figsize=(12, 6))
    results_df.boxplot(column=idx, by="Hour", grid=True)
    plt.title(f"{idx} per Hour (Boxplot)")
    plt.suptitle("")
    plt.xlabel("Hour")
    plt.ylabel(idx)
    plt.grid(True, linestyle="--", alpha=0.6)
    plt.tight_layout()
    plt.savefig(f"boxplot_{idx.lower()}_per_hour.png")
    plt.show()