<a href="https://colab.research.google.com/github/Jamess200/BirdnetProject/blob/main/Lost_Data_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
#@title Mount Google Drive
#Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [34]:
#@title Imports
import pandas as pd
import os
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

In [35]:
#@title Set Up Repository Path and Clone Repository
# Define path in Google Drive where you want to clone the repository
repo_path = '/content/drive/MyDrive/'

# Check if directory already exists
if not os.path.exists(repo_path):
    os.makedirs(repo_path)

# Change working directory to defined path
os.chdir(repo_path)

# Clone the repository if doesnt exist, otherwise pull latest changes
if not os.path.exists(os.path.join(repo_path, 'BirdnetProject')):
    !git clone https://github.com/Jamess200/BirdnetProject.git
else:
    os.chdir('BirdnetProject')
    !git pull

# Verify cloned repository
!ls

Already up to date.
Files  README.md


In [36]:
#@title Change Working Directory
# Change working directory to defined path
os.chdir('/content/drive/MyDrive/BirdnetProject/Files/data/CSV_data')

# Verify working directory
print(os.getcwd())

/content/drive/MyDrive/BirdnetProject/Files/data/CSV_data


In [40]:
#@title Data Loading and Preprocessing
data = pd.read_csv('transect_combined.csv')
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data['datetime_start'] = pd.to_datetime(data['date'].astype(str) + ' ' + data['start_time'], format='%Y-%m-%d %H:%M:%S')
data['datetime_end'] = pd.to_datetime(data['date'].astype(str) + ' ' + data['end_time'], format='%Y-%m-%d %H:%M:%S')

data = data.sort_values(by='datetime_start')

data['detection_interval'] = data['datetime_start'].diff().shift(-1).dt.total_seconds()
average_interval = data['detection_interval'].mean()
print(f"Average detection interval: {average_interval:.2f} seconds")

Average detection interval: 13.07 seconds


In [43]:
#@title Calculate Total Detections and Segments
total_detections = len(data)
print(f"Total number of detections: {total_detections}")

# Define the recording schedule parameters
start_hour = 4
end_hour = 9
interval_minutes = 30
detection_interval = 3

# Number of 3-second slots per 30-minute interval
slots_per_session = (interval_minutes * 60) / detection_interval

# Number of sessions per day per transect (one session per hour from 4 AM to 9 AM, including the half-hour at 9)
sessions_per_day_per_transect = 6

# Number of slots per day per transect
slots_per_day_per_transect = slots_per_session * sessions_per_day_per_transect

# Number of transects
number_of_transects = 4

# Calculate total expected slots per day across all transects
expected_slots_per_day = slots_per_day_per_transect * number_of_transects

# Calculate the number of days in the dataset
data['day'] = data['date'].dt.date
total_days = data['day'].nunique()

# Calculate the total number of expected detections
expected_detections = expected_slots_per_day * total_days
print(f"Expected number of detections: {expected_detections}")

# Calculate the number of missing detections
missing_detections = expected_detections - total_detections
print(f"Number of missing detections: {missing_detections}")


Total number of detections: 54397
Expected number of detections: 129600.0
Number of missing detections: 75203.0


In [50]:
#@title Detection Analysis by 3-Second Slots and Transects
# Detection Analysis by 3-Second Slots and Transects

# Assign each detection to a 3-second slot
data['slot'] = data['datetime_start'].dt.floor('3S')

# Count the number of detections in each slot per transect
slot_counts_per_transect = data.groupby(['transect', 'slot']).size().reset_index(name='count')

# Identify slots with multiple detections within the same transect
multiple_detections_per_transect = slot_counts_per_transect[slot_counts_per_transect['count'] > 1]
print(f"Number of slots with multiple detections per transect: {multiple_detections_per_transect.shape[0]}")
print("Details of slots with multiple detections per transect:")
print(multiple_detections_per_transect)

# Extract hour from the 'slot' datetime and group by hour
multiple_detections_per_transect['hour'] = multiple_detections_per_transect['slot'].dt.hour
multiple_detections_per_hour_per_transect = multiple_detections_per_transect.groupby(['transect', 'hour']).size().reset_index(name='slots_with_multiple_detections')

# Plot the number of multiple detection slots per hour for each transect
fig = px.bar(
    multiple_detections_per_hour_per_transect,
    x='hour',
    y='slots_with_multiple_detections',
    color='transect',
    labels={'hour': 'Hour of Day', 'slots_with_multiple_detections': 'Number of Slots with Multiple Detections'},
    title='Number of Slots with Multiple Detections per Hour per Transect'
)

# Update layout to include legend and remove the color bar
fig.update_layout(
    legend_title_text='Transect',
    xaxis=dict(dtick=1),
    coloraxis_showscale=False,  # Remove the color bar
    showlegend=True  # Ensure the legend is shown
)

# Show the plot
fig.show()

Number of slots with multiple detections per transect: 3693
Details of slots with multiple detections per transect:
       transect                slot  count
10            5 2024-05-02 04:01:54      2
12            5 2024-05-02 04:02:06      2
32            5 2024-05-02 04:17:00      2
46            5 2024-05-02 04:18:39      2
195           5 2024-05-02 05:29:36      2
...         ...                 ...    ...
50561         8 2024-05-10 09:26:36      2
50564         8 2024-05-10 09:26:51      2
50569         8 2024-05-10 09:27:12      2
50573         8 2024-05-10 09:28:18      2
50584         8 2024-05-10 09:29:18      2

[3693 rows x 3 columns]




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

