In [46]:
import os
import csv
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import datetime
from pprint import pprint
from pathlib import Path

from birdnetlib import Recording
from birdnetlib.analyzer import Analyzer
from birdnetlib.batch import DirectoryMultiProcessingAnalyzer

## Utils for data shaping

In [47]:
def extract_datetime_from_filename(filename):
    try:
        # Isolation of the base name without extension
        base = Path(filename).stem  # ex: "20231027_142533_recording"
        parts = base.split('_')
        if len(parts) < 2:
            return None  # format inattendu
        
        date_str, heure_str = parts[0], parts[1]
        # Datetime conversion
        return datetime.strptime(date_str + heure_str, "%Y%m%d%H%M%S")
    except Exception as e:
        print(f"⚠️ Erreur pour {filename}: {e}")
        return None

# Application au DataFrame

def set_id_from_filename(df):
    df["id"] = df["folder"] + "_" + df["file_name"].str.split('.').str[0]
    return df

def set_gps_from_folder(df, gps_coord):

    df["lat"] = df["folder"].str.split('_').str[1].astype(int).map(gps_coord.set_index('folder')['latitude'])
    df["lon"] = df["folder"].str.split('_').str[1].astype(int).map(gps_coord.set_index('folder')['longitude'])
    df["alt"] = df["folder"].str.split('_').str[1].astype(int).map(gps_coord.set_index('folder')['altitude'])

    return df

### DataFrame shaping (not needed if dataframe already correct)

In [24]:
# dossier_racine = r"./Ecoacoustics_Longitudinal_Altitude_Project"

# df_detection["datetime"] = df_detection["file_name"].apply(extract_datetime_from_filename)
# df_detection = set_id_from_filename(df_detection)

# gps_coord = pd.read_csv(dossier_racine+'/gps_coord.csv')  # Charger les coordonnées GPS si nécessaire

# df_detection = set_gps_from_folder(df_detection, gps_coord)

## Renaming the files so they can be read by birdnet
Birdnet only accepts ".wav" files, the ones provided by the Audiomoths are ".WAV" files. We have to rename the files so birdnet can use them 

In [None]:
# === Parameters ===
DATA_PATH = "./Ecoacoustics_Longitudinal_Altitude_Project/"  # root folder to process

# Recursively browse all subfolders
for root, dirs, files in os.walk(DATA_PATH):
    for file_name in files:
        # Check if the file has a WAV extension (uppercase or mixed)
        if file_name.lower().endswith(".wav"):
            old_path = os.path.join(root, file_name)
            # Create the new name with lowercase extension
            base_name = os.path.splitext(file_name)[0]
            new_path = os.path.join(root, base_name + ".wav")
            # Rename if the name is different
            if old_path != new_path:
                os.rename(old_path, new_path)
                print(f"Renamed: {old_path} -> {new_path}")

## Main function for birdnet analysis and data formatting

In [69]:
def on_analyze_directory_complete(recordings, writer, gps_csv_path, export_dir,
                                  export_audio=True, export_spectrograms=True):
    print("-" * 80)
    print("directory_completed: recordings processed ", len(recordings))
    print("-" * 80)

    gps_coord = pd.read_csv(gps_csv_path) 

    for recording in recordings:
        print(recording.path)
        if recording.error:
            print("Error: ", recording.error_message)
        else:
            # Export audio et spectrogrammes
            if export_audio:
                recording.extract_detections_as_audio(directory=export_dir)
            if export_spectrograms:
                recording.extract_detections_as_spectrogram(directory=export_dir)

             # Extract date/time from filename
            dt = extract_datetime_from_filename(os.path.basename(recording.path))
            # date_str = dt.strftime("%Y-%m-%d") if dt else ""

            # Extract folder number (e.g. "site_12" → 12)
            folder_name = os.path.basename(os.path.dirname(recording.path))
            file_name = os.path.basename(recording.path)
            try:
                folder_num = int(folder_name.split('_')[-1])
            except ValueError:
                folder_num = None

            # Look up GPS coordinates
            latitude = longitude = altitude = None
            if folder_num is not None and folder_num in gps_coord['folder'].values:
                row = gps_coord.loc[gps_coord['folder'] == folder_num].iloc[0]
                latitude, longitude, altitude = row['latitude'], row['longitude'], row['altitude']

            # Écrit les détections dans le CSV
            for det in recording.detections:
                writer.writerow({
                    "folder": folder_name,
                    "file_name": file_name,
                    "datetime": dt,
                    "species": det["common_name"],
                    "confidence": det["confidence"],
                    "start_time": det["start_time"],
                    "end_time": det["end_time"],
                    "latitude": latitude if latitude is not None else "",
                    "longitude": longitude if longitude is not None else "",
                    "altitude": altitude if altitude is not None else "",
                })
            # pprint(recording.detections)
        print("-" * 80)

## Applying birdnet algorithm to ONE folder

In [70]:
# === Parameters ===
ROOT_DIR = "./Ecoacoustics_Longitudinal_Altitude_Project"
DATA_PATH = "./Ecoacoustics_Longitudinal_Altitude_Project/Point_1" 
OUTPUT_CSV = "birdnet_detections_final.csv"
EXPORT_DIR = "extractions" # directory to export audio/spectrograms
LON = 6.07342607201401
LAT = 46.5104596567472
MIN_CONF = 0.4 # minimum confidence for detections
ANALYSIS_DATE = datetime(year=2025, month=10, day=1)
EXPORT_SPECTROGRAMS = False
EXPORT_AUDIO = False
one_folder = False # True to analyze only one folder (will use DATA_PATH), False to analyze all subfolders (will use ROOT_DIR)

# Create export directory if it doesn't exist
os.makedirs(EXPORT_DIR, exist_ok=True)

# Initialize the analyzer
analyzer = Analyzer()

# Prepare the CSV file
csv_file = open(OUTPUT_CSV, mode="w", newline="")
fieldnames = [
"folder", "file_name", "datetime", "species", "confidence",
    "start_time", "end_time", "latitude", "longitude", "altitude" 
]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()


if one_folder == True :
    
    # Create the multi-processing batch for a single folder
    batch = DirectoryMultiProcessingAnalyzer(
        DATA_PATH,
        analyzers=[analyzer],
        lon=LON,
        lat=LAT,
        date=ANALYSIS_DATE,
        min_conf=MIN_CONF,
    )
    # Replace the on_analyze_directory_complete callback
    batch.on_analyze_directory_complete = lambda recs: on_analyze_directory_complete(
        recs,
        writer=writer,
        gps_csv_path=ROOT_DIR + '/gps_coord.csv',
        export_dir=EXPORT_DIR,
        export_audio=EXPORT_AUDIO,
        export_spectrograms=EXPORT_SPECTROGRAMS
    )
    # Launch the analysis
    batch.process()

else :

    # Create the multi-processing batch for all subfolders
    for folder in os.listdir(ROOT_DIR):
        full_path = os.path.join(ROOT_DIR, folder)
        print(f"folder : {folder}")
        if os.path.isdir(full_path):
            print(f"Analyse de {folder} ...")
            batch = DirectoryMultiProcessingAnalyzer(
                full_path,          # liste de fichiers
                analyzers=[analyzer],
                lon=LON,
                lat=LAT,
                date=ANALYSIS_DATE,
                min_conf=MIN_CONF,
            )
            # Replace the on_analyze_directory_complete callback
            batch.on_analyze_directory_complete = lambda recs: on_analyze_directory_complete(
            recs,
            writer=writer,
            gps_csv_path=ROOT_DIR + '/gps_coord.csv',
            export_dir=EXPORT_DIR,
            export_audio=EXPORT_AUDIO,
            export_spectrograms=EXPORT_SPECTROGRAMS
            )
            # Launch the analysis
            batch.process()

# Close the CSV file
csv_file.close()

print(f"✅ Analysis finished ; generated CSV :  {OUTPUT_CSV}")


Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.



    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    



process_from_queueprocess_from_queueprocess_from_queue

process_from_queueInitializing analyzer(s)Initializing analyzer(s)

process_from_queue

Initializing analyzer(s)Initializing analyzer(s)


Labels loaded.process_from_queueInitializing analyzer(s)


Labels loaded.load modelprocess_from_queueLabels loaded.Initializing analyzer(s)Labels loaded.
 


Labels loaded.
load modelload modelTrueload model
Initializing analyzer(s)
   
TrueTrueload modelTrue
Labels loaded.


 Labels loaded.load modelTrue
 
Trueload model
 True
Model loaded.
Labels loaded.
Model loaded.load_species_list_model

Labels loaded.
load_species_list_model
Model loaded.
Model loaded.Model loaded.Model loaded.Labels loaded.

Model loaded.


load_species_list_modelLabels loaded.
Labels loaded.Labels loaded.Labels loaded.


load_species_list_modelload_species_list_modelload_species_list_model


load_species_list_model

Meta model loaded.
read_audio_data
Meta model loaded.
read_audio_data
Meta model loaded.
Meta model load


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    



Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
read_audio_data
read_audio_data: complete, read  20 chunks.
analyze_recording 20251001_153000.wav
recording has lon/lat
set_predicted_species_list_from_position
return_predicted_species_list
37
process_from_queue
Initializing analyzer(s)
139Labels loaded. 
load modelspecies loaded.
 True



    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    



process_from_queueprocess_from_queue

process_from_queueInitializing analyzer(s)Initializing analyzer(s)

Initializing analyzer(s)

Labels loaded.Labels loaded.

Labels loaded.process_from_queueload model
load model
  Initializing analyzer(s)load modelTrueTrue 


True


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    







    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    



Labels loaded.


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    




Model loaded.
load modelLabels loaded. True

load_species_list_model


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    




Model loaded.Model loaded.

Labels loaded.
Labels loaded.
load_species_list_modelModel loaded.load_species_list_model


Labels loaded.
process_from_queue
Meta model loaded.
Initializing analyzer(s)read_audio_dataload_species_list_model

Model loaded.

Meta model loaded.
Labels loaded.Labels loaded.read_audio_dataMeta model loaded.



load modelload_species_list_modelread_audio_data 
True

read_audio_data: complete, read 


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    



 Meta model loaded.20
 read_audio_data
chunks.
analyze_recordingread_audio_data: complete, read  20251001_141000.wav 20
 recording has lon/latchunks.
read_audio_data: complete, read 
set_predicted_species_list_from_positionMeta model loaded.analyze_recording
 
 return_predicted_species_list2020250930_171000.wav
read_audio_dataread_audio_data: complete, read 
 
 37chunks.recording has lon/lat
20

analyze_recording  chunks.set_predicted_species_list_from_positionModel loaded.
20250930_191000.wav
recording has lon/lat
analyze_recording
Labels loaded.
139  return_predicted_species_list
set_predicted_species_list_from_position20250930_220000.wav
species loaded.37
load_species_list_modelread_audio_data: complete, read  


recording has lon/lat

return_predicted_species_list20
 set_predicted_species_list_from_positionchunks.

return_predicted_species_list13937analyze_recording  
species loaded.20251001_054000.wav


recording has lon/lat37

Meta model loaded.set_predicted_species_list_from_pos

## Data analysis

### Load Data from csv if not already available

In [71]:
df_detection = pd.read_csv('birdnet_detections_onef3.csv')

### Data exploration

#### A few useful lines if you want to check your dataframe

In [73]:
df_detection.columns # will show all column names

df_detection.head() # will show the first 5 rows of the dataframe

df_detection["species"].unique() # will show unique species names
df_detection["species"].nunique() # will show the number of unique species

df_detection["confidence"].describe() # will show statistics of confidence scores

df_detection.shape # will show the shape of the dataframe (rows, columns)

df_detection.size # will show the total number of elements in the dataframe

df_detection.groupby(['folder']).size()  # will show the number of detections per folder

df_detection.groupby(['species']).size().sort_values(ascending=False)  # will show the number of detections per species, sorted descending



species
Eurasian Bullfinch           17
Goldcrest                    17
Eurasian Wren                11
Common Chaffinch             10
Tawny Owl                     5
Lesser Spotted Woodpecker     5
Song Thrush                   4
Spotted Flycatcher            3
Eurasian Treecreeper          3
Eurasian Jay                  3
Long-tailed Tit               3
Willow Tit                    2
Hawfinch                      2
Common Buzzard                1
Dunnock                       1
Great Spotted Woodpecker      1
Tree Pipit                    1
dtype: int64

#### A few useful lines to shape your dataframe

In [74]:
df_detection.sort_values(by=['altitude', 'datetime'], inplace=True) # Sort by altitude and datetime

### Basic plot

In [75]:
fig = px.histogram(
    df_detection,
    x='datetime',
    color='altitude',
    # nbins=50,
    title='Histogramme des détections BirdNet au fil du temps',
    barmode='group', 
    histnorm=''
)

fig.show()

### Compute the number of microphones per altitude (in order to be able to compare the results)

In [76]:
df_detec_norm = (
    df_detection
    .groupby(["altitude", "datetime"])
    .size()
    .rename("counts")
    .reset_index()
)

# Ajout du nombre de micros par altitude
mic_counts = df_detection.groupby("altitude")["folder"].nunique().rename("mic_count")
df_detec_norm = df_detec_norm.merge(mic_counts, on="altitude", how="left")

# Normalisation par micro
df_detec_norm["norm_counts"] = df_detec_norm["counts"] / df_detec_norm["mic_count"]

### Figure

In [77]:
fig = px.histogram(
    df_detec_norm,
    x="datetime",
    y="norm_counts",
    color="altitude",
    title="Détections BirdNET normalisées par le nombre de micros",
    labels={"norm_counts": "Détections / micro"},
    nbins=25,
    barmode='group',
)
fig.show()

### A few lines to analyze our birbs

In [78]:
df_detection.groupby(['altitude']).size() # will show the number of detections per altitude


species_df = df_detection.groupby(['species'], as_index=False)['file_name'].count() # creates a DataFrame with species and their detection counts
species_df.rename(columns={'file_name': 'detection_count'}, inplace=True)  # rename column for clarity
species_df["mean_confidence"] = df_detection.groupby('species')['confidence'].mean().values # calculate mean confidence per species
species_df["mean_confidence"] = df_detection.groupby('species')['confidence'].median().values # calculate median confidence per species
species_df.sort_values(by='detection_count', ascending=False, inplace=True) # sort by detection count descending

sorted_species_df = species_df[species_df["mean_confidence"] >0.5] # filter species with mean confidence > 0.5


In [79]:
specific_richness_df = df_detection.groupby(['altitude', 'datetime'])['species'].nunique().reset_index() # count unique species per altitude and datetime

In [80]:
fig = px.histogram(
    specific_richness_df,
    x="datetime",
    y="species",
    color="altitude",
    title="Specific richness per altitude per datetime",
    # labels={"norm_counts": "Détections / micro"},
    nbins=25,
    barmode='group',
)
fig.show()