In [1]:
import pandas as pd
import random
import librosa
import os
from ketos.data_handling.parsing import load_audio_representation
from ketos.data_handling.database_interface import AudioWriter, create_database
from ketos.data_handling import selection_table as sl

2021-11-30 22:43:51.215915: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-30 22:43:51.621566: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10477 MB memory:  -> device: 0, name: NVIDIA TITAN V, pci bus id: 0000:a1:00.0, compute capability: 7.0
  "The `lr` argument is deprecated, use `learning_rate` instead.")


# Initialization

In [2]:
# Database-related parameters
db_name_string='CB300' # 'CB300' or 'CB50'
data_dir='/data/WCS/' # Path to the folder that contains all the audio files
dataset_name='/'+db_name_string

spec_cfg = load_audio_representation('spec_config.json', name="spectrogram")

# Load annotation files
root_path='/home/sadman/arctic_mammal/' # Root path to all code/annotation
annot_filename=db_name_string+'_train.csv'

db_save_path=root_path+'saved_database/'
db_save_filename='db_test_'+str(spec_cfg['duration'])+'.h5'

# If want to use all annotations, then keep False, otherwise if want to use annotations based on quality, then True
filter_based_on_quality_flag=False 

annot_df=pd.read_csv(root_path+'annotations/train/'+annot_filename, sep=',')

In [3]:
annot_df

Unnamed: 0,Selection,View,Channel,Begin Time (s),End Time (s),Delta Time (s),File Offset (s),Low Freq (Hz),High Freq (Hz),Peak Freq (Hz),...,AR comment,SNR NIST Quick (dB FS),SNR order,species,certainty,comment,type,WH ID,WH Comment,NEW analysis
0,4825,Spectrogram 1,1,43005.721333,43007.475333,1.754,117.248,47.6,470.2,93.8,...,,40.43,217,BH,High,,,,,y
1,2503,Spectrogram 1,1,13132.109667,13134.101667,1.992,235.314,185.6,676.6,199.2,...,,13.13,4192,BH,High,ds,ssong,,,yes
2,9342,Spectrogram 1,1,141645.687633,141646.945633,1.258,81.163,64.1,163.1,117.2,...,,2.31,5593,BH,High,,,,,
3,5491,Spectrogram 1,1,53861.496367,53863.120367,1.624,175.749,61.9,216.5,175.8,...,,26.63,3198,BH,High,,,,,y
4,11636,Spectrogram 1,1,187384.257000,187392.701000,8.444,232.162,67.2,7258.7,228.5,...,,13.17,4177,BH,High,,roar,,,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4780,11567,Spectrogram 1,1,186586.066033,186588.556033,2.490,33.841,59.4,344.6,64.5,...,,34.10,1809,BH,High,,,,,yes
4781,9162,Spectrogram 1,1,137259.443700,137260.214700,0.771,193.645,93.2,314.6,193.4,...,,37.61,758,BH,High,faint,us,,,
4782,5464,Spectrogram 1,1,53505.164967,53506.853967,1.689,119.268,61.9,364.9,199.2,...,,31.88,2503,BH,High,,,,,y
4783,6813,Spectrogram 1,1,84310.844367,84312.140367,1.296,31.579,80.2,246.2,123.0,...,,30.93,2748,BH,High,,,,,


# Annotation file pre-processing

In [4]:
def delete_unnecessary_columns(annot_df, delete_columns=['Selection', 'View']):
    """ Delete unwanted columns from the annotations dataframe
     
        Args:
            annot_df: pandas DataFrame
                Annotation table.
            delete_columns: list
                List of columns to delete from the annotations dataframe
                default values ('Selection', 'View')

        Returns:
            annot_df: pandas DataFrame
                Annotation table after removing listed columns

    """
    for column in delete_columns:
        del annot_df[column]
    return annot_df

annot_df=delete_unnecessary_columns(annot_df)

annot_df.rename({'Begin Path': 'filename',
                'File Offset (s)': 'start',
                'species': 'label'}, axis='columns', inplace =True)

# Modify filepath (Discard the drive location (e.g., D:/))
annot_df['filename']=annot_df['filename'].apply(lambda x: x[3:len(x)]) 
# Modify filepath to replace \ with / in the filename
annot_df['filename']=annot_df['filename'].apply(lambda x: x.replace("\\", "/")) 
# Modify filepath to add the data root dir (/data/WCS/)
annot_df['filename']=annot_df['filename'].apply(lambda x: data_dir+x)
# Calculate End time
annot_df['end']=annot_df['start']+annot_df['Delta Time (s)']

# Add Negative samples

In [5]:
# Change the location of 'target_wav_location' to choose random neg samples from that location
list_of_wav_files=annot_df['filename'].unique()
print("list_of_wav_files: ", len(list_of_wav_files))

# Define the ratio of files to select from the folder
add_ratio=0.1
total_files=int(len(list_of_wav_files)*add_ratio)
print(total_files)

# Now, generate list of unique file indices
file_indices_random = random.sample(range(0, len(list_of_wav_files)), total_files)

list_of_wav_files:  813
81


In [6]:
# ========= Adding Negative (no BH) samples ==========
total_wav_parsed = 0
total_number_of_negative_samples=0

for file_index in file_indices_random:
    target_wav_filename=list_of_wav_files[file_index]
    # target_wav_filename=target_wav_filename[3:len(target_wav_filename)]
    target_matched_annot_df = annot_df[annot_df['filename'].str.find(os.path.basename(target_wav_filename)) != -1]
    
    if(len(target_matched_annot_df)!=0):
        print("target_wav_filename:", target_wav_filename)
        total_time_of_file = librosa.get_duration(filename=target_wav_filename)
        target_files_with_len = pd.DataFrame({'filename':[target_wav_filename], 
                                              'duration':[total_time_of_file]})

        print("Total annotations found: ", len(target_matched_annot_df))

        #Standardize annotation table format
        annot, label_dict = sl.standardize(target_matched_annot_df, return_label_dict=True, trim_table=True)


        sel = sl.select_by_segmenting(files=target_files_with_len, 
                                   length=spec_cfg['duration'], 
                                   annotations=annot, 
                                   step=spec_cfg['duration'],
                                   keep_only_empty=True) 

        print("sel:", len(sel))
        
        # Check if there is any BH class in the selection table
        sel_only_neg=sel.loc[sel['label']==0] 
        sel_only_pos=sel.loc[sel['label']==1]
        
        print("sel_only_pos:", len(sel_only_pos))
        print("sel_only_neg:", len(sel_only_neg))
        
        print("Adding samples to "+dataset_name)
        
        create_database(output_file=db_save_path+db_save_filename, data_dir='',
                                        dataset_name=dataset_name+'/Neg', selections=sel_only_neg,
                                        audio_repres=spec_cfg)

        total_wav_parsed+=1
        total_number_of_negative_samples+=len(sel_only_neg)

target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050316235015.wav
Total annotations found:  2
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 37.30it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050109205015.wav
Total annotations found:  7
sel: 87
sel_only_pos: 0
sel_only_neg: 87
Adding samples to /CB300


100%|███████████████████████████████████████████| 87/87 [00:02<00:00, 37.81it/s]


87 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050112005015.wav
Total annotations found:  20
sel: 48
sel_only_pos: 0
sel_only_neg: 48
Adding samples to /CB300


100%|███████████████████████████████████████████| 48/48 [00:01<00:00, 36.06it/s]


48 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041221165015.wav
Total annotations found:  3
sel: 93
sel_only_pos: 0
sel_only_neg: 93
Adding samples to /CB300


100%|███████████████████████████████████████████| 93/93 [00:02<00:00, 38.09it/s]


93 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041013235015.wav
Total annotations found:  14
sel: 75
sel_only_pos: 0
sel_only_neg: 75
Adding samples to /CB300


100%|███████████████████████████████████████████| 75/75 [00:01<00:00, 37.56it/s]


75 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050108235015.wav
Total annotations found:  8
sel: 87
sel_only_pos: 0
sel_only_neg: 87
Adding samples to /CB300


100%|███████████████████████████████████████████| 87/87 [00:02<00:00, 38.38it/s]


87 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041125135015.wav
Total annotations found:  7
sel: 85
sel_only_pos: 0
sel_only_neg: 85
Adding samples to /CB300


100%|███████████████████████████████████████████| 85/85 [00:02<00:00, 36.36it/s]


85 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050317035015.wav
Total annotations found:  8
sel: 87
sel_only_pos: 0
sel_only_neg: 87
Adding samples to /CB300


100%|███████████████████████████████████████████| 87/87 [00:02<00:00, 37.03it/s]


87 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041022225015.wav
Total annotations found:  6
sel: 91
sel_only_pos: 0
sel_only_neg: 91
Adding samples to /CB300


100%|███████████████████████████████████████████| 91/91 [00:02<00:00, 37.60it/s]


91 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050109115015.wav
Total annotations found:  13
sel: 76
sel_only_pos: 0
sel_only_neg: 76
Adding samples to /CB300


100%|███████████████████████████████████████████| 76/76 [00:01<00:00, 39.50it/s]


76 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041225165015.wav
Total annotations found:  3
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 38.22it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041220125015.wav
Total annotations found:  6
sel: 91
sel_only_pos: 0
sel_only_neg: 91
Adding samples to /CB300


100%|███████████████████████████████████████████| 91/91 [00:02<00:00, 38.31it/s]


91 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041102055015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 37.64it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050104165015.wav
Total annotations found:  15
sel: 80
sel_only_pos: 0
sel_only_neg: 80
Adding samples to /CB300


100%|███████████████████████████████████████████| 80/80 [00:02<00:00, 37.70it/s]


80 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050103005015.wav
Total annotations found:  5
sel: 95
sel_only_pos: 0
sel_only_neg: 95
Adding samples to /CB300


100%|███████████████████████████████████████████| 95/95 [00:03<00:00, 29.56it/s]


95 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050108135015.wav
Total annotations found:  3
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 38.81it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041219215015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 36.46it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041101065015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 38.08it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050113225015.wav
Total annotations found:  6
sel: 89
sel_only_pos: 0
sel_only_neg: 89
Adding samples to /CB300


100%|███████████████████████████████████████████| 89/89 [00:02<00:00, 36.60it/s]


89 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041023045015.wav
Total annotations found:  9
sel: 86
sel_only_pos: 0
sel_only_neg: 86
Adding samples to /CB300


100%|███████████████████████████████████████████| 86/86 [00:02<00:00, 38.63it/s]


86 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041014125015.wav
Total annotations found:  13
sel: 82
sel_only_pos: 0
sel_only_neg: 82
Adding samples to /CB300


100%|███████████████████████████████████████████| 82/82 [00:02<00:00, 36.88it/s]


82 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050216005015.wav
Total annotations found:  3
sel: 95
sel_only_pos: 0
sel_only_neg: 95
Adding samples to /CB300


100%|███████████████████████████████████████████| 95/95 [00:02<00:00, 38.49it/s]


95 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050218165015.wav
Total annotations found:  21
sel: 68
sel_only_pos: 0
sel_only_neg: 68
Adding samples to /CB300


100%|███████████████████████████████████████████| 68/68 [00:01<00:00, 37.57it/s]


68 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041031185015.wav
Total annotations found:  17
sel: 67
sel_only_pos: 0
sel_only_neg: 67
Adding samples to /CB300


100%|███████████████████████████████████████████| 67/67 [00:01<00:00, 37.87it/s]


67 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050119175015.wav
Total annotations found:  1
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 37.84it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050218195015.wav
Total annotations found:  5
sel: 89
sel_only_pos: 0
sel_only_neg: 89
Adding samples to /CB300


100%|███████████████████████████████████████████| 89/89 [00:02<00:00, 37.76it/s]


89 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041125185015.wav
Total annotations found:  4
sel: 92
sel_only_pos: 0
sel_only_neg: 92
Adding samples to /CB300


100%|███████████████████████████████████████████| 92/92 [00:03<00:00, 28.73it/s]


92 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041010075015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 37.76it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050109155015.wav
Total annotations found:  18
sel: 69
sel_only_pos: 0
sel_only_neg: 69
Adding samples to /CB300


100%|███████████████████████████████████████████| 69/69 [00:01<00:00, 37.47it/s]


69 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050210035015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 38.55it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041114205015.wav
Total annotations found:  2
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 37.73it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050217005015.wav
Total annotations found:  1
sel: 99
sel_only_pos: 0
sel_only_neg: 99
Adding samples to /CB300


100%|███████████████████████████████████████████| 99/99 [00:02<00:00, 37.96it/s]


99 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050110025015.wav
Total annotations found:  9
sel: 86
sel_only_pos: 0
sel_only_neg: 86
Adding samples to /CB300


100%|███████████████████████████████████████████| 86/86 [00:02<00:00, 38.16it/s]


86 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041028185015.wav
Total annotations found:  6
sel: 89
sel_only_pos: 0
sel_only_neg: 89
Adding samples to /CB300


100%|███████████████████████████████████████████| 89/89 [00:02<00:00, 36.66it/s]


89 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050116155015.wav
Total annotations found:  10
sel: 88
sel_only_pos: 0
sel_only_neg: 88
Adding samples to /CB300


100%|███████████████████████████████████████████| 88/88 [00:02<00:00, 37.92it/s]


88 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050106205015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 37.94it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041219165015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 38.12it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050107185015.wav
Total annotations found:  6
sel: 87
sel_only_pos: 0
sel_only_neg: 87
Adding samples to /CB300


100%|███████████████████████████████████████████| 87/87 [00:02<00:00, 37.52it/s]


87 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050114025015.wav
Total annotations found:  10
sel: 84
sel_only_pos: 0
sel_only_neg: 84
Adding samples to /CB300


100%|███████████████████████████████████████████| 84/84 [00:02<00:00, 37.88it/s]


84 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041104035015.wav
Total annotations found:  1
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:03<00:00, 28.00it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050115055015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 37.05it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050219125015.wav
Total annotations found:  2
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 38.11it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041223025015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 38.47it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041018085015.wav
Total annotations found:  3
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 38.44it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041031075015.wav
Total annotations found:  15
sel: 77
sel_only_pos: 0
sel_only_neg: 77
Adding samples to /CB300


100%|███████████████████████████████████████████| 77/77 [00:01<00:00, 38.54it/s]


77 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050218225015.wav
Total annotations found:  5
sel: 91
sel_only_pos: 0
sel_only_neg: 91
Adding samples to /CB300


100%|███████████████████████████████████████████| 91/91 [00:02<00:00, 38.60it/s]


91 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050109005015.wav
Total annotations found:  9
sel: 84
sel_only_pos: 0
sel_only_neg: 84
Adding samples to /CB300


100%|███████████████████████████████████████████| 84/84 [00:02<00:00, 37.13it/s]


84 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041216225015.wav
Total annotations found:  3
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 38.64it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050316135015.wav
Total annotations found:  1
sel: 99
sel_only_pos: 0
sel_only_neg: 99
Adding samples to /CB300


100%|███████████████████████████████████████████| 99/99 [00:02<00:00, 36.91it/s]


99 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050309175015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 39.44it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050110015015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 37.97it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050104105015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:03<00:00, 30.14it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041126225015.wav
Total annotations found:  1
sel: 99
sel_only_pos: 0
sel_only_neg: 99
Adding samples to /CB300


100%|███████████████████████████████████████████| 99/99 [00:02<00:00, 37.83it/s]


99 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041101215015.wav
Total annotations found:  4
sel: 94
sel_only_pos: 0
sel_only_neg: 94
Adding samples to /CB300


100%|███████████████████████████████████████████| 94/94 [00:02<00:00, 38.01it/s]


94 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050114085015.wav
Total annotations found:  8
sel: 86
sel_only_pos: 0
sel_only_neg: 86
Adding samples to /CB300


100%|███████████████████████████████████████████| 86/86 [00:02<00:00, 36.36it/s]


86 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041022035015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 39.17it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041216215015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 39.31it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041125205015.wav
Total annotations found:  3
sel: 95
sel_only_pos: 0
sel_only_neg: 95
Adding samples to /CB300


100%|███████████████████████████████████████████| 95/95 [00:02<00:00, 38.71it/s]


95 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050310125015.wav
Total annotations found:  6
sel: 91
sel_only_pos: 0
sel_only_neg: 91
Adding samples to /CB300


100%|███████████████████████████████████████████| 91/91 [00:02<00:00, 37.46it/s]


91 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050310025015.wav
Total annotations found:  6
sel: 91
sel_only_pos: 0
sel_only_neg: 91
Adding samples to /CB300


100%|███████████████████████████████████████████| 91/91 [00:02<00:00, 38.66it/s]


91 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050128105015.wav
Total annotations found:  2
sel: 97
sel_only_pos: 0
sel_only_neg: 97
Adding samples to /CB300


100%|███████████████████████████████████████████| 97/97 [00:02<00:00, 37.05it/s]


97 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041115165015.wav
Total annotations found:  1
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 37.35it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041103045015.wav
Total annotations found:  7
sel: 92
sel_only_pos: 0
sel_only_neg: 92
Adding samples to /CB300


100%|███████████████████████████████████████████| 92/92 [00:02<00:00, 37.81it/s]


92 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041121225015.wav
Total annotations found:  5
sel: 93
sel_only_pos: 0
sel_only_neg: 93
Adding samples to /CB300


100%|███████████████████████████████████████████| 93/93 [00:03<00:00, 29.55it/s]


93 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050105115015.wav
Total annotations found:  1
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 36.24it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041014235015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 37.59it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050107085015.wav
Total annotations found:  17
sel: 71
sel_only_pos: 0
sel_only_neg: 71
Adding samples to /CB300


100%|███████████████████████████████████████████| 71/71 [00:01<00:00, 37.54it/s]


71 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041208025015.wav
Total annotations found:  3
sel: 94
sel_only_pos: 0
sel_only_neg: 94
Adding samples to /CB300


100%|███████████████████████████████████████████| 94/94 [00:02<00:00, 38.00it/s]


94 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041123145015.wav
Total annotations found:  1
sel: 99
sel_only_pos: 0
sel_only_neg: 99
Adding samples to /CB300


100%|███████████████████████████████████████████| 99/99 [00:02<00:00, 38.31it/s]


99 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050209005015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 38.70it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050101185015.wav
Total annotations found:  30
sel: 61
sel_only_pos: 0
sel_only_neg: 61
Adding samples to /CB300


100%|███████████████████████████████████████████| 61/61 [00:01<00:00, 35.90it/s]


61 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041031035015.wav
Total annotations found:  21
sel: 69
sel_only_pos: 0
sel_only_neg: 69
Adding samples to /CB300


100%|███████████████████████████████████████████| 69/69 [00:01<00:00, 37.91it/s]


69 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041129005015.wav
Total annotations found:  4
sel: 94
sel_only_pos: 0
sel_only_neg: 94
Adding samples to /CB300


100%|███████████████████████████████████████████| 94/94 [00:02<00:00, 37.84it/s]


94 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041118105015.wav
Total annotations found:  2
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 38.44it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041012065015.wav
Total annotations found:  6
sel: 89
sel_only_pos: 0
sel_only_neg: 89
Adding samples to /CB300


100%|███████████████████████████████████████████| 89/89 [00:02<00:00, 36.93it/s]


89 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041026205015.wav
Total annotations found:  1
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 38.43it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041216235015.wav
Total annotations found:  4
sel: 93
sel_only_pos: 0
sel_only_neg: 93
Adding samples to /CB300


100%|███████████████████████████████████████████| 93/93 [00:03<00:00, 29.67it/s]


93 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041217085015.wav
Total annotations found:  1
sel: 98
sel_only_pos: 0
sel_only_neg: 98
Adding samples to /CB300


100%|███████████████████████████████████████████| 98/98 [00:02<00:00, 37.55it/s]


98 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.050104125015.wav
Total annotations found:  2
sel: 96
sel_only_pos: 0
sel_only_neg: 96
Adding samples to /CB300


100%|███████████████████████████████████████████| 96/96 [00:02<00:00, 37.76it/s]


96 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041029015015.wav
Total annotations found:  8
sel: 86
sel_only_pos: 0
sel_only_neg: 86
Adding samples to /CB300


100%|███████████████████████████████████████████| 86/86 [00:02<00:00, 38.56it/s]


86 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5
target_wav_filename: /data/WCS/Cape_Bathurst_300_2018_2019/1208533023.041125115015.wav
Total annotations found:  4
sel: 92
sel_only_pos: 0
sel_only_neg: 92
Adding samples to /CB300


100%|███████████████████████████████████████████| 92/92 [00:02<00:00, 37.64it/s]

92 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5





In [7]:
# Filter the annotations with 1/2/3 values in the Tonal, Signal strength, and Background columns
if(filter_based_on_quality_flag):
    annot_df=annot_df[annot_df['Tonal'].isin(['1', '2', '3'])]

# Add Positive (BH) samples

In [8]:
# ========= Adding Positive (BH) samples ==========
map_to_ketos_annot_std ={'Begin Path': 'filename',
                          'File Offset (s)': 'start',
                          'species': 'label'
                        } 
std_annot_BH_df = sl.standardize(table=annot_df, mapper=map_to_ketos_annot_std, 
                              signal_labels=['BH'], trim_table=True)
sel_table_BH = sl.select(annotations=std_annot_BH_df, length=spec_cfg['duration'], step=0.5, min_overlap=0.8, center=False)

In [9]:
create_database(output_file=db_save_path+db_save_filename, data_dir='',
                                dataset_name=dataset_name+'/BH',selections=std_annot_BH_df,
                                audio_repres=spec_cfg)

100%|███████████████████████████████████████| 4785/4785 [02:00<00:00, 39.61it/s]

4785 items saved to /home/sadman/arctic_mammal/saved_database/db_test_3.0.h5





In [10]:
print("Total negative samples saved: ", total_number_of_negative_samples)
print("Total BH samples saved (Before stepping augmentation): ", len(std_annot_BH_df))
print("Total BH samples saved (After stepping augmentation): ", len(sel_table_BH))

Total negative samples saved:  7308
Total BH samples saved (Before stepping augmentation):  4785
Total BH samples saved (After stepping augmentation):  8205
