# Ziel des Notebooks
* Wir validieren, ob wir die BirdNET Python Library so zum Laufen kriegen, dass sie:

    * Exakt (oder fast exakt) die gleichen Ergebnisse liefert wie deine Birdnet-Analyzer-App.

    * Auf deiner CPU schnell genug läuft.

* Relevante Funktionen aus der Dokumentation nutzen

In [3]:
PROJECT_ROOT = Path.cwd().parent   # passe an, falls du im notebooks/ Ordner bist
PROJECT_ROOT.exists()

True

In [2]:
import os, sys, platform, json, time
from pathlib import Path

print("Python:", sys.version)
print("Platform:", platform.platform())
print("CWD:", Path.cwd())

PROJECT_ROOT = Path.cwd().parent  # passe an, falls du im notebooks/ Ordner bist
DATA_DIR = Path("D:/Samsung SSD 850 EVO extern/wmv")
OUT_DIR = PROJECT_ROOT / "outputs" / "experiments" / "birdnet"
OUT_DIR.mkdir(parents=True, exist_ok=True)

print("DATA_DIR exists:", DATA_DIR.exists(), DATA_DIR)
print("OUT_DIR:", OUT_DIR)

Python: 3.11.14 | packaged by Anaconda, Inc. | (main, Oct 21 2025, 18:30:03) [MSC v.1929 64 bit (AMD64)]
Platform: Windows-10-10.0.26200-SP0
CWD: C:\Users\mghan\OneDrive\01.Learn AI\01.BA_Birdmonitoring\project\notebooks
DATA_DIR exists: False D:\Samsung SSD 850 EVO extern\wmv
OUT_DIR: C:\Users\mghan\OneDrive\01.Learn AI\01.BA_Birdmonitoring\project\outputs\experiments\birdnet


# BirdNET importieren und Version testen

In [4]:
import birdnet
print("birdnet version:", getattr(birdnet, "__version__", "unknown"))


birdnet version: unknown


In [5]:
from importlib.metadata import version, PackageNotFoundError

for pkg in ["birdnet", "birdnet-analyzer", "birdnet_analyzer"]:
    try:
        print(pkg, "->", version(pkg))
    except PackageNotFoundError:
        print(pkg, "-> not installed (as distribution name)")


birdnet -> 0.2.11
birdnet-analyzer -> not installed (as distribution name)
birdnet_analyzer -> not installed (as distribution name)


In [6]:
print("CPU cores:", os.cpu_count())

CPU cores: 8


In [9]:
wav_files = sorted(list(DATA_DIR.glob("*.WAV")))
print(f'Anzahl WAVs ist: {len(wav_files)}')

wav_path = wav_files[10]
print(f'Beispieldatei: {wav_path}')
print(wav_path.exists())

Anzahl WAVs ist: 427
Beispieldatei: D:\Samsung SSD 850 EVO extern\wmv\2453AC0263FBD00C_20250409_055500.WAV
True


In [10]:
Path.cwd()

WindowsPath('C:/Users/mghan/OneDrive/01.Learn AI/01.BA_Birdmonitoring/project/notebooks')

# BirdNET Modell laden

In [16]:
model = birdnet.load("acoustic", "2.4", "tf")
print("Model loaded:", model)

Model loaded: <birdnet.acoustic_models.v2_4.model.AcousticModelV2_4 object at 0x00000141F44F7890>


# Predict auf eine Datei

Im folgenden sehe ich alle Parameter/default Werte für das model

In [12]:
import inspect
print(inspect.signature(model.predict))
print(inspect.getdoc(model.predict))

(inp: 'Path | str | Iterable[Path | str]', /, *, top_k: 'int | None' = 5, n_feeders: 'int' = 1, n_workers: 'int | None' = None, batch_size: 'int' = 1, prefetch_ratio: 'int' = 1, overlap_duration_s: 'float' = 0, bandpass_fmin: 'int' = 0, bandpass_fmax: 'int' = 15000, speed: 'float' = 1.0, apply_sigmoid: 'bool' = True, sigmoid_sensitivity: 'float | None' = 1.0, default_confidence_threshold: 'float | None' = 0.1, custom_confidence_thresholds: 'dict[str, float] | None' = None, custom_species_list: 'str | Path | Collection[str] | None' = None, half_precision: 'bool' = False, max_audio_duration_min: 'float | None' = None, device: 'str | list[str]' = 'CPU', show_stats: "Literal['minimal', 'progress', 'benchmark'] | None" = None, progress_callback: 'Callable[[AcousticProgressStats], None] | None' = None) -> 'AcousticPredictionResultBase'
None


In [13]:
import time

t0 = time.perf_counter()

predictions = model.predict(
    str(wav_path),
    overlap_duration_s=2.0,
    default_confidence_threshold=0.1,
    sigmoid_sensitivity=1.0
    )

t1 = time.perf_counter()
print(f"Predict-Zeit: {t1 - t0:.2f} s")

Predict-Zeit: 487.67 s


In [None]:
df = predictions.to_dataframe()
df.head(3)

# Speichern der Datei

In [20]:
teil1 = str(wav_path.stem) + (".BirdNET.results.csv")
zieldatei = OUT_DIR / teil1

In [21]:
zieldatei

WindowsPath('C:/Users/mghan/OneDrive/01.Learn AI/01.BA_Birdmonitoring/project/outputs/experiments/birdnet/2453AC0263FBD00C_20250409_055500.BirdNET.results.csv')

In [22]:
predictions.to_csv(zieldatei)

Preparing CSV export...


Writing CSV: 100%|██████████████████████████████████████████| 8667/8667 [00:00<00:00, 117679.92predictions/s, CSV=1 MB]


# Vergleichen der beiden Dateien

Die beiden Dateien sind gleich. Zwar ist die Struktur der Dtein unterschiedlich aber die Detektionen sind identisch

In [None]:
import pandas as pd
import numpy as np

# === Pfade anpassen ===
PY_CSV = r"C:\Users\mghan\OneDrive\01.Learn AI\01.BA_Birdmonitoring\project\outputs\experiments\birdnet\2453AC0263FBD00C_20250610_042001_first30min.BirdNET.results.csv"
AN_CSV = r"C:\Users\mghan\OneDrive\01.Learn AI\01.BA_Birdmonitoring\project\outputs\experiments\birdnet\2453AC0263FBD00C_20250610_042001_first30min.BirdNET_Analyzer.results.csv"

# --- Helfer ---
def hms_to_seconds(x):
    """
    Konvertiert 'HH:MM:SS.xx' -> float Sekunden.
    Falls schon numerisch, wird float(x) versucht.
    """
    if pd.isna(x):
        return np.nan
    s = str(x).strip()
    if ":" not in s:
        return float(s)
    hh, mm, ss = s.split(":")
    return int(hh) * 3600 + int(mm) * 60 + float(ss)

def extract_scientific(species_name: str) -> str:
    """
    Aus python-CSV: 'Scientific_Common' -> 'Scientific'
    """
    if pd.isna(species_name):
        return np.nan
    return str(species_name).split("_", 1)[0].strip()

def normalize_python_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    out = pd.DataFrame({
        "start_s": df["start_time"].map(hms_to_seconds),
        "end_s": df["end_time"].map(hms_to_seconds),
        "scientific": df["species_name"].map(extract_scientific),
        "confidence_py": pd.to_numeric(df["confidence"], errors="coerce"),
    })
    return out

def normalize_analyzer_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    out = pd.DataFrame({
        "start_s": pd.to_numeric(df["Start (s)"], errors="coerce"),
        "end_s": pd.to_numeric(df["End (s)"], errors="coerce"),
        "scientific": df["Scientific name"].astype(str).str.strip(),
        "confidence_an": pd.to_numeric(df["Confidence"], errors="coerce"),
    })
    return out

# --- Laden & normalisieren ---
py = normalize_python_csv(PY_CSV)
an = normalize_analyzer_csv(AN_CSV)

# (Optional) Sortieren, damit Reihenfolge egal ist
KEYS = ["start_s", "end_s", "scientific"]
py = py.sort_values(KEYS).reset_index(drop=True)
an = an.sort_values(KEYS).reset_index(drop=True)

# --- Merge: finden, was matcht / fehlt / doppelt ist ---
m = py.merge(an, on=KEYS, how="outer", indicator=True)

# Confidence-Differenz
m["conf_abs_diff"] = (m["confidence_py"] - m["confidence_an"]).abs()

# --- Toleranzen (wegen Rundung) ---
CONF_TOL = 1e-4  # falls Analyzer auf 4 Dezimalen rundet

# --- Reports ---
total_py = len(py)
total_an = len(an)

missing_in_an = m[m["_merge"] == "left_only"]
missing_in_py = m[m["_merge"] == "right_only"]
matched = m[m["_merge"] == "both"]

conf_mismatch = matched[matched["conf_abs_diff"] > CONF_TOL]

print("=== BASIC CHECKS ===")
print(f"Rows python:   {total_py}")
print(f"Rows analyzer: {total_an}")
print()

print("=== KEY MATCHING ===")
print(f"Matched rows:           {len(matched)}")
print(f"Only in python CSV:     {len(missing_in_an)}")
print(f"Only in analyzer CSV:   {len(missing_in_py)}")
print()

print("=== CONFIDENCE CHECK ===")
print(f"Confidence mismatches (> {CONF_TOL}): {len(conf_mismatch)}")

if len(conf_mismatch) > 0:
    print("\nBeispiele (max 10):")
    cols = KEYS + ["confidence_py", "confidence_an", "conf_abs_diff"]
    print(conf_mismatch[cols].head(10).to_string(index=False))

# --- Falls du eine Diff-Datei willst ---
OUT_DIFF = "birdnet_compare_diff.csv"
m.to_csv(OUT_DIFF, index=False)
print(f"\nFull comparison exported to: {OUT_DIFF}")


=== BASIC CHECKS ===
Rows python:   512
Rows analyzer: 512

=== KEY MATCHING ===
Matched rows:           512
Only in python CSV:     0
Only in analyzer CSV:   0

=== CONFIDENCE CHECK ===
Confidence mismatches (> 0.0001): 0

Full comparison exported to: birdnet_compare_diff.csv


In [None]:
python_bn = pd.read_csv(PY_CSV)
analyzer_bn = pd.read_csv(AN_CSV)

In [None]:
python_bn['confidence'].mean()

np.float64(0.19839494531250002)

In [None]:
analyzer_bn['Confidence'].mean()

np.float64(0.19839609375)

# Perch

## Perch Modell laden

In [1]:
p = Path("C:/Users/mghan/Desktop/BirdDetection_BA/Bird-Song-Detector/Data/Audios/AM1_20230510_073000.WAV")

NameError: name 'Path' is not defined

In [19]:
p.exists()

True

In [20]:
perch = birdnet.load_perch_v2("CPU")
print(perch)

<birdnet.acoustic_models.perch_v2.model.AcousticModelPerchV2 object at 0x00000141F69A27D0>


In [21]:
t0 = time.perf_counter()
preds = perch.predict(
    str(p),
    default_confidence_threshold=0.1,
    overlap_duration_s=2,
    device="CPU",
)

t1 = time.perf_counter()
print(f"Predict-Zeit: {t1 - t0:.2f} s")

Predict-Zeit: 91.57 s


In [22]:
a = preds.to_dataframe()

In [23]:
a

  has_large_values = (abs_vals > 1e6).any()


Unnamed: 0,input,start_time,end_time,species_name,confidence
0,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,0.0,5.0,Singing,7.793657
1,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,0.0,5.0,Music,6.938717
2,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,0.0,5.0,Musical_instrument,6.935538
3,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,0.0,5.0,Engine,6.811213
4,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,0.0,5.0,Strix aluco,6.620333
...,...,...,...,...,...
95,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,57.0,60.0,Emberiza hortulana,6.821391
96,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,57.0,60.0,Tyto alba,6.767152
97,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,57.0,60.0,Strix aluco,6.710098
98,C:\Users\mghan\Desktop\BirdDetection_BA\Bird-S...,57.0,60.0,Nycticorax nycticorax,6.669561
