In [31]:
import pandas as pd
import ast
import glob

In [2]:
#loading the data

df = pd.read_csv(r'C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\df_with_sim_phase.csv')

In [3]:
## filter out all 6 audiomoth data

df_moth1 = df[df['Recorder'] == 'Audio_Moth_1'].copy()
df_moth2 = df[df['Recorder'] == 'Audio_Moth_2'].copy()
df_moth3 = df[df['Recorder'] == 'Audio_Moth_3'].copy()
df_moth4 = df[df['Recorder'] == 'Audio_Moth_4'].copy()
df_moth5 = df[df['Recorder'] == 'Audio_Moth_5'].copy()
df_moth6 = df[df['Recorder'] == 'Audio_Moth_6'].copy()


In [4]:
## Function one: the below function is created to generate the sim blocks. 


def get_sim_blocks(df, sim_label, gap_threshold_sec=6):
    
    df_sim = df[df['Sim Type Label'] == sim_label].copy()
    if df_sim.empty:
        return pd.DataFrame(columns=['Sim Type Label', 'Start_Time', 'End_Time', 'Duration_Minutes'])
    
    df_sim = df_sim.sort_values('Timestamp')
    df_sim['Timestamp'] = pd.to_datetime(df_sim['Timestamp'])
    df_sim['Gap_sec'] = df_sim['Timestamp'].diff().dt.total_seconds()
    df_sim['Sim_Event_ID'] = (df_sim['Gap_sec'] > gap_threshold_sec).cumsum()
    
    sim_blocks = (
        df_sim.groupby('Sim_Event_ID')['Timestamp']
        .agg(Start_Time='min', End_Time='max')
        .reset_index(drop=True)
    )
    sim_blocks['Sim Type Label'] = sim_label
    sim_blocks['Duration_Minutes'] = (sim_blocks['End_Time'] - sim_blocks['Start_Time']).dt.total_seconds() / 60
    
    return sim_blocks




In [None]:
## Function two: process each audiomoth data and extract its sim event blocks. 


def process_moth_data(df, moth_name):


    # correct dtypes
    df['Sim Type'] = df['Sim Type'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    # create combined label
    df['Sim Type Label'] = df['Sim Type'].apply(lambda x: ', '.join(sorted(x)) if isinstance(x, list) else '')
    
    # get unique sim types
    sim_labels = [lbl for lbl in df['Sim Type Label'].unique() if lbl]

    
    #now let's generate blocks for each sim type using above function one 

    sim_blocks_all = pd.concat([get_sim_blocks(df, lbl) for lbl in sim_labels], ignore_index=True)
    
    # sorting
    sim_blocks_all = sim_blocks_all.sort_values('Start_Time').reset_index(drop=True)

    sim_blocks_all['Recorder'] = moth_name
    
    # save it to csv
    output_name = f"sim_blocks_{moth_name}.csv"
    sim_blocks_all.to_csv(output_name, index=False)
    print(f"Saved: {output_name} ({len(sim_blocks_all)} rows)")
    
    return sim_blocks_all


In [36]:
## now, we are going to apply above functions to all 6 audiomoth datasets 

moth_datasets = {
    "Audio_Moth_1": df_moth1,
    "Audio_Moth_2": df_moth2,
    "Audio_Moth_3": df_moth3,
    "Audio_Moth_4": df_moth4,
    "Audio_Moth_5": df_moth5,
    "Audio_Moth_6": df_moth6,
}

sim_blocks_all_moths = {}

for name, df in moth_datasets.items():
    print(f"\n Processing {name.upper()} ...")
    sim_blocks_all_moths[name] = process_moth_data(df, name)



 Processing AUDIO_MOTH_1 ...
Saved: sim_blocks_Audio_Moth_1.csv (7 rows)

 Processing AUDIO_MOTH_2 ...
Saved: sim_blocks_Audio_Moth_2.csv (7 rows)

 Processing AUDIO_MOTH_3 ...
Saved: sim_blocks_Audio_Moth_3.csv (4 rows)

 Processing AUDIO_MOTH_4 ...
Saved: sim_blocks_Audio_Moth_4.csv (21 rows)

 Processing AUDIO_MOTH_5 ...
Saved: sim_blocks_Audio_Moth_5.csv (28 rows)

 Processing AUDIO_MOTH_6 ...
Saved: sim_blocks_Audio_Moth_6.csv (23 rows)


In [38]:
## let's check out those tables 

df_moth1_table = pd.read_csv(r"C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\six sim block tables\sim_blocks_Audio_Moth_1.csv")

In [39]:
df_moth1_table

Unnamed: 0,Start_Time,End_Time,Sim Type Label,Duration_Minutes,Recorder
0,2025-03-18 17:14:18,2025-03-18 17:19:18,Vehicle,5.0,Audio_Moth_1
1,2025-03-19 07:25:54,2025-03-19 07:37:24,Human Presence on Trail,11.5,Audio_Moth_1
2,2025-03-19 07:37:27,2025-03-19 07:42:27,"Human Presence on Trail, Vehicle",5.0,Audio_Moth_1
3,2025-03-19 07:42:30,2025-03-19 07:45:51,Human Presence on Trail,3.35,Audio_Moth_1
4,2025-03-19 17:14:39,2025-03-19 17:25:33,Human Presence on Trail,10.9,Audio_Moth_1
5,2025-03-19 17:25:36,2025-03-19 17:30:33,"Human Presence on Trail, Vehicle",4.95,Audio_Moth_1
6,2025-03-19 17:30:36,2025-03-19 17:34:36,Human Presence on Trail,4.0,Audio_Moth_1


In [40]:
df_moth2_table = pd.read_csv(r"C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\six sim block tables\sim_blocks_Audio_Moth_2.csv")

In [41]:
df_moth2_table

Unnamed: 0,Start_Time,End_Time,Sim Type Label,Duration_Minutes,Recorder
0,2025-03-18 16:50:19,2025-03-18 17:10:16,Human Presence on Trail,19.95,Audio_Moth_2
1,2025-03-19 07:41:31,2025-03-19 07:52:40,Human Presence on Trail,11.15,Audio_Moth_2
2,2025-03-19 07:52:43,2025-03-19 07:57:40,"Human Presence on Trail, Vehicle",4.95,Audio_Moth_2
3,2025-03-19 07:57:43,2025-03-19 08:01:28,Human Presence on Trail,3.75,Audio_Moth_2
4,2025-03-19 17:03:52,2025-03-19 17:14:49,Human Presence on Trail,10.95,Audio_Moth_2
5,2025-03-19 17:14:52,2025-03-19 17:19:49,"Chainsaw, Human Presence on Trail",4.95,Audio_Moth_2
6,2025-03-19 17:19:52,2025-03-19 17:23:52,Human Presence on Trail,4.0,Audio_Moth_2


In [42]:
df_moth3_table = pd.read_csv(r"C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\six sim block tables\sim_blocks_Audio_Moth_3.csv")

In [43]:
df_moth3_table

Unnamed: 0,Start_Time,End_Time,Sim Type Label,Duration_Minutes,Recorder
0,2025-03-19 08:01:55,2025-03-19 08:06:52,Human Presence off Trail,4.95,Audio_Moth_3
1,2025-03-19 16:58:43,2025-03-19 17:09:28,Human Presence on Trail,10.75,Audio_Moth_3
2,2025-03-19 17:09:31,2025-03-19 17:14:28,"Gunshot, Human Presence on Trail",4.95,Audio_Moth_3
3,2025-03-19 17:14:31,2025-03-19 17:18:40,Human Presence on Trail,4.15,Audio_Moth_3


In [44]:
df_moth4_table = pd.read_csv(r"C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\six sim block tables\sim_blocks_Audio_Moth_4.csv")

In [45]:
df_moth4_table

Unnamed: 0,Start_Time,End_Time,Sim Type Label,Duration_Minutes,Recorder
0,2025-03-18 16:26:40,2025-03-18 16:31:37,Gunshot,4.95,Audio_Moth_4
1,2025-03-19 08:28:54,2025-03-19 08:39:45,Human Presence on Trail,10.85,Audio_Moth_4
2,2025-03-19 08:39:48,2025-03-19 08:44:48,"Human Presence on Trail, Vehicle",5.0,Audio_Moth_4
3,2025-03-19 08:44:51,2025-03-19 08:48:51,Human Presence on Trail,4.0,Audio_Moth_4
4,2025-03-19 16:37:27,2025-03-19 16:44:09,Human Presence on Trail,6.7,Audio_Moth_4
5,2025-03-19 16:45:16,2025-03-19 16:48:19,Human Presence on Trail,3.05,Audio_Moth_4
6,2025-03-19 16:48:22,2025-03-19 16:53:19,"Human Presence on Trail, Vehicle",4.95,Audio_Moth_4
7,2025-03-19 16:53:22,2025-03-19 16:57:25,Human Presence on Trail,4.05,Audio_Moth_4
8,2025-03-20 16:30:13,2025-03-20 16:42:49,Human Presence on Trail,12.6,Audio_Moth_4
9,2025-03-20 16:42:52,2025-03-20 16:47:49,"Gunshot, Human Presence on Trail",4.95,Audio_Moth_4


In [46]:
df_moth5_table = pd.read_csv(r"C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\six sim block tables\sim_blocks_Audio_Moth_5.csv")

In [47]:
df_moth5_table

Unnamed: 0,Start_Time,End_Time,Sim Type Label,Duration_Minutes,Recorder
0,2025-03-17 16:42:48,2025-03-17 16:53:54,Human Presence on Trail,11.1,Audio_Moth_5
1,2025-03-17 16:53:57,2025-03-17 16:55:54,"Gunshot, Human Presence on Trail",1.95,Audio_Moth_5
2,2025-03-17 16:55:57,2025-03-17 16:58:57,"Chainsaw, Gunshot, Human Presence on Trail",3.0,Audio_Moth_5
3,2025-03-17 16:59:00,2025-03-17 17:00:54,"Chainsaw, Human Presence on Trail",1.9,Audio_Moth_5
4,2025-03-17 17:00:57,2025-03-17 17:02:45,Human Presence on Trail,1.8,Audio_Moth_5
5,2025-03-18 16:04:31,2025-03-18 16:24:28,Human Presence on Trail,19.95,Audio_Moth_5
6,2025-03-19 08:43:47,2025-03-19 08:54:38,Human Presence on Trail,10.85,Audio_Moth_5
7,2025-03-19 08:54:41,2025-03-19 08:59:38,"Human Presence on Trail, Vehicle",4.95,Audio_Moth_5
8,2025-03-19 08:59:41,2025-03-19 09:03:47,Human Presence on Trail,4.1,Audio_Moth_5
9,2025-03-19 16:24:08,2025-03-19 16:30:02,Human Presence on Trail,5.9,Audio_Moth_5


In [48]:
df_moth6_table = pd.read_csv(r"C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\six sim block tables\sim_blocks_Audio_Moth_6.csv")

In [49]:
df_moth6_table

Unnamed: 0,Start_Time,End_Time,Sim Type Label,Duration_Minutes,Recorder
0,2025-03-17 16:27:29,2025-03-17 16:39:29,Human Presence on Trail,12.0,Audio_Moth_6
1,2025-03-17 16:39:32,2025-03-17 16:44:32,"Human Presence on Trail, Vehicle",5.0,Audio_Moth_6
2,2025-03-17 16:44:35,2025-03-17 16:47:26,Human Presence on Trail,2.85,Audio_Moth_6
3,2025-03-18 15:52:19,2025-03-18 16:12:19,Human Presence on Trail,20.0,Audio_Moth_6
4,2025-03-19 08:57:16,2025-03-19 09:07:55,Human Presence on Trail,10.65,Audio_Moth_6
5,2025-03-19 09:07:58,2025-03-19 09:12:55,"Human Presence on Trail, Vehicle",4.95,Audio_Moth_6
6,2025-03-19 09:12:58,2025-03-19 09:17:13,Human Presence on Trail,4.25,Audio_Moth_6
7,2025-03-19 16:13:26,2025-03-19 16:24:23,Human Presence on Trail,10.95,Audio_Moth_6
8,2025-03-19 16:24:26,2025-03-19 16:29:26,"Gunshot, Human Presence on Trail",5.0,Audio_Moth_6
9,2025-03-19 16:29:29,2025-03-19 16:33:23,Human Presence on Trail,3.9,Audio_Moth_6


In [50]:
## now combine all 6 files into one

#get all the files
files = glob.glob(r"C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\six sim block tables\sim_blocks_Audio_Moth_*.csv")

#combine them
dfs = [pd.read_csv(f) for f in files]

sim_blocks_all = pd.concat(dfs, ignore_index=True)

#sort
sim_blocks_all = sim_blocks_all.sort_values(["Recorder", "Start_Time"]).reset_index(drop=True)

#save the combined csv file
sim_blocks_all.to_csv('sim_blocks_all.csv', index=False)



In [51]:
##check the combine file
df_combined = pd.read_csv(r"C:\Users\qfu88\Desktop\KwF\Bioacoustics-analysis\Datas\six sim block tables\sim_blocks_all.csv")

In [52]:
df_combined

Unnamed: 0,Start_Time,End_Time,Sim Type Label,Duration_Minutes,Recorder
0,2025-03-18 17:14:18,2025-03-18 17:19:18,Vehicle,5.00,Audio_Moth_1
1,2025-03-19 07:25:54,2025-03-19 07:37:24,Human Presence on Trail,11.50,Audio_Moth_1
2,2025-03-19 07:37:27,2025-03-19 07:42:27,"Human Presence on Trail, Vehicle",5.00,Audio_Moth_1
3,2025-03-19 07:42:30,2025-03-19 07:45:51,Human Presence on Trail,3.35,Audio_Moth_1
4,2025-03-19 17:14:39,2025-03-19 17:25:33,Human Presence on Trail,10.90,Audio_Moth_1
...,...,...,...,...,...
85,2025-03-21 08:53:14,2025-03-21 08:57:29,"Chainsaw, Human Presence on Trail, Vehicle",4.25,Audio_Moth_6
86,2025-03-21 08:57:32,2025-03-21 08:58:14,"Human Presence on Trail, Vehicle",0.70,Audio_Moth_6
87,2025-03-21 08:58:17,2025-03-21 09:01:47,Human Presence on Trail,3.50,Audio_Moth_6
88,2025-03-21 13:08:56,2025-03-21 13:20:47,Human Presence on Trail,11.85,Audio_Moth_6
