In [48]:
#@title Mount Google Drive
# Mount Google Drive to access files stored in it
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [66]:
#@title Change Directory
# Change the working directory to a specific path in your Google Drive
%cd /content/drive/MyDrive/

/content/drive/MyDrive


In [73]:
#@title Set Up Repository Path & Clone Repository
import os
# Define path in Google Drive where you want to clone the repository
repo_path = '/content/drive/MyDrive/'

# Check if directory already exists, create it if not
if not os.path.exists(repo_path):
    os.makedirs(repo_path)

# Change working directory to defined path
os.chdir(repo_path)

# Clone the repository if doesnt exist, otherwise pull latest changes
if not os.path.exists(os.path.join(repo_path, 'BirdnetProject')):
    !git clone https://github.com/Jamess200/BirdnetProject
else:
    os.chdir('BirdnetProject')
    !git pull

# Verify cloned repository
!ls

Already up to date.
Files  README.md


In [28]:
#@title Install Required Python Packages
# Install required packages
!pip3 install birdnetlib
!pip3 install tflite-runtime
!pip3 install resampy
!pip3 install ffmpeg



In [51]:
#@title Import Necessary Libraries
# Import necessary libraries
import resampy
import birdnetlib
import tflite_runtime
import ffmpeg
import pandas as pd
import numpy as np
from birdnetlib import Recording
from birdnetlib.analyzer import Analyzer
from datetime import datetime

In [59]:
#@title Set Up Directory and File Paths
# Define the directory where the data is stored
dataDir = '/content/drive/MyDrive/BirdnetProject/Files/data/Ignore/My_XC_Files/'

# List all files in the directory
file_names = os.listdir(dataDir)

# Print the file names to verify the data files
print(file_names)

['XC2_20200530_171400.wav', '.ipynb_checkpoints']


In [60]:
#@title Initialise BirdNET Analyzer
# Initialise the BirdNET-Analyzer model
analyzer = Analyzer()
# Define the coordinates and date for the recordings
testamp = [49.0211,9.0244]

Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.


In [61]:
#@title Analyse Audio Files and Store Results
# Create a dictionary to store the results
results = {}

# Date of recording
recording_date = datetime(year=2020, month=5, day=30)

for file_name in file_names:
    try:
        file_path = os.path.join(dataDir, file_name)

        if os.path.isdir(file_path):
            continue

        if not file_name.lower().endswith(('.wav', '.mp3', '.flac')):
            print(f"Skipping non-audio file: {file_name}")
            continue

        recording = Recording(
            analyzer,
            file_path,  # path to the recording file
            lat=testamp[0],  # latitude of the recording location
            lon=testamp[1],  # longitude of the recording location
            date=recording_date,  # the date of the recording
            min_conf=0.25,
        )
        recording.analyze()
        results[file_name] = recording.detections
    except Exception as e:
        print(f"Error processing {file_name}: {e}")

read_audio_data
read_audio_data: complete, read  682 chunks.
analyze_recording XC2_20200530_171400.wav
recording has lon/lat
set_predicted_species_list_from_position
return_predicted_species_list
20
140 species loaded.


In [62]:
#@title Convert Results to DataFrame and Save as CSV
# Flatten the results dictionary
flattened_data = []
for filename, records in results.items():
    for record in records:
        record['filename'] = filename
        flattened_data.append(record)

# Convert the flattened data to a DataFrame
df = pd.DataFrame(flattened_data)

# Save the DataFrame to a CSV file
csv_file_path = '/content/drive/MyDrive/BirdnetProject/Files/data/CSV_data/XC/XC2_20200530_171400.csv'
df.to_csv(csv_file_path, index=False)

# Confirm that the results have been saved successfully
print(f"Results have been successfully converted to CSV and saved to {csv_file_path}")

Results have been successfully converted to CSV and saved to /content/drive/MyDrive/BirdnetProject/Files/data/CSV_data/XC/XC2_20200530_171400.csv


In [63]:
#@title Load and Verify CSV Data
# Load the saved CSV file into a DataFrame to verify the contents
csv_file_path = '/content/drive/MyDrive/BirdnetProject/Files/data/CSV_data/XC/XC2_20200530_171400.csv'
df = pd.read_csv(csv_file_path)

# Print the DataFrame to verify the data
print(df.head)

<bound method NDFrame.head of         common_name     scientific_name  start_time  end_time  confidence  \
0         Great Tit         Parus major         9.0      12.0    0.267441   
1    European Robin  Erithacus rubecula        15.0      18.0    0.302582   
2    European Robin  Erithacus rubecula        33.0      36.0    0.313827   
3    European Robin  Erithacus rubecula        51.0      54.0    0.644889   
4    European Robin  Erithacus rubecula        63.0      66.0    0.340919   
..              ...                 ...         ...       ...         ...   
123  European Robin  Erithacus rubecula      1905.0    1908.0    0.662246   
124  European Robin  Erithacus rubecula      1944.0    1947.0    0.394734   
125  European Robin  Erithacus rubecula      2004.0    2007.0    0.289836   
126  European Robin  Erithacus rubecula      2016.0    2019.0    0.254990   
127  European Robin  Erithacus rubecula      2025.0    2028.0    0.331637   

                                 label       

In [68]:
#@title Change Working Directory
# Change working directory to defined path
os.chdir('/content/drive/MyDrive/BirdnetProject/Files/data/CSV_data/XC')

# Verify working directory
print(os.getcwd())

/content/drive/MyDrive/BirdnetProject/Files/data/CSV_data/XC


In [69]:
#@title Create Merged Dataframe File For All Transects

# Read in the CSV files
df_XC1 = pd.read_csv('XC1_20230615_180000.csv')
df_XC2 = pd.read_csv('XC2_20200530_171400.csv')

# Add a column to each dataframe to indicate the transect source
df_XC1['transect'] = 1
df_XC2['transect'] = 2

# Function to extract date and hour from the filename
def extract_datetime(filename):
    date_str = filename.split('_')[1]
    time_str = filename.split('_')[2].split('.')[0]
    datetime_str = date_str + ' ' + time_str
    datetime = pd.to_datetime(datetime_str, format='%Y%m%d %H%M%S')
    return datetime

# Apply the function to create a new datetime column
df_XC1['datetime'] = df_XC1['filename'].apply(extract_datetime)
df_XC2['datetime'] = df_XC2['filename'].apply(extract_datetime)

# Function to convert seconds to hour:minutes:seconds format based on the datetime
def seconds_to_hms(datetime, seconds):
    new_time = datetime + pd.to_timedelta(seconds, unit='s')
    return new_time.strftime('%-H:%M:%S')

# Apply the function to convert start_time and end_time
df_XC1['start_time'] = df_XC1.apply(lambda row: seconds_to_hms(row['datetime'], row['start_time']), axis=1)
df_XC1['end_time'] = df_XC1.apply(lambda row: seconds_to_hms(row['datetime'], row['end_time']), axis=1)
df_XC2['start_time'] = df_XC2.apply(lambda row: seconds_to_hms(row['datetime'], row['start_time']), axis=1)
df_XC2['end_time'] = df_XC2.apply(lambda row: seconds_to_hms(row['datetime'], row['end_time']), axis=1)

# Concatenate the dataframes
concatenated_df = pd.concat([df_XC1, df_XC2], ignore_index=True)

# Extract date and start hour
concatenated_df['date'] = concatenated_df['datetime'].dt.date
concatenated_df['start_hour'] = concatenated_df['datetime'].dt.hour

# Sort the dataframe by transect, date, start_time, common_name, and confidence
concatenated_df = concatenated_df.sort_values(by=['transect', 'date', 'start_time', 'common_name', 'confidence'])

# Reorder the columns to have transect, date, start_hour, time, and readable start/end times
reordered_columns = ['transect', 'date', 'start_hour', 'start_time', 'end_time', 'common_name', 'scientific_name', 'confidence', 'label', 'filename']
concatenated_df = concatenated_df[reordered_columns]

# Display the first few rows of the concatenated dataframe
print("\nNew Dataframe:")
print(concatenated_df.head())
print(concatenated_df.tail())



New Dataframe:
   transect        date  start_hour start_time  end_time         common_name  \
0         1  2023-06-15          18   18:00:03  18:00:06   Eurasian Blackcap   
1         1  2023-06-15          18   18:00:06  18:00:09   Eurasian Blackcap   
2         1  2023-06-15          18   18:00:12  18:00:15   Eurasian Blackcap   
3         1  2023-06-15          18   18:00:18  18:00:21  Eurasian Blackbird   
4         1  2023-06-15          18   18:00:21  18:00:24   Eurasian Blackcap   

      scientific_name  confidence                                 label  \
0  Sylvia atricapilla    0.490236  Sylvia atricapilla_Eurasian Blackcap   
1  Sylvia atricapilla    0.586808  Sylvia atricapilla_Eurasian Blackcap   
2  Sylvia atricapilla    0.568825  Sylvia atricapilla_Eurasian Blackcap   
3       Turdus merula    0.417446      Turdus merula_Eurasian Blackbird   
4  Sylvia atricapilla    0.774901  Sylvia atricapilla_Eurasian Blackcap   

                  filename  
0  XC1_20230615_180000.

In [70]:
#@title Save data as a CSV File
# Save the concatenated dataframe to a CSV file
output_filename = 'XC_Merged.csv'
concatenated_df.to_csv(output_filename, index=False)
print(f"The combined dataframe has been saved to {output_filename}")

The combined dataframe has been saved to XC_Merged.csv


In [71]:
#@title Save Data as a Excel File
# Save the dataframe to an Excel file
output_file = 'XC_Merged.xlsx'
concatenated_df.to_excel(output_file, index=False)
print(f"\nDataframe has been saved to {output_file}")


Dataframe has been saved to XC_Merged.xlsx
