In [12]:
%%capture
!pip install librosa pandas

In [2]:
import sys
sys.path.append('../')
import pandas as pd
from tqdm.notebook import tqdm

from src.s3_utils import create_s3_client, download_s3_object_to_memory
from src.clip_utils import extract_and_upload_clips

## Initialize S3 Client and Define Buckets

In [3]:
# Initialize the S3 client with configuration settings
s3 = create_s3_client('../config/connection_config.yaml')
# List of bucket names to process
buckets = ['tangkahan', 'way-kambas', 'bukit-tiga-puluh', 'sabah']

## Process Selection Tables and Extract Clips

In [4]:
bucket = 'way-kambas'
slctn_tbls_to_sndfls_bytes = download_s3_object_to_memory(
        bucket, 'labels/selection_tables_to_soundfiles.csv', s3
        )
slctn_tbls_to_sndfls_df = pd.read_csv(slctn_tbls_to_sndfls_bytes)

slctn_tbls_to_sndfls_df.head(2)


Unnamed: 0,selection_table,soundfile,selection_table_directory,soundfile_directory
0,SWIFT4_20210916_080000.Table.1.selections.txt,SWIFT4_20210916_080000.wav,labels/selection_tables/SWIFT4_20210916_075445...,soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT...
1,SWIFT4_20210916_090000.Table.1.selections.txt,SWIFT4_20210916_090000.wav,labels/selection_tables/SWIFT4_20210916_075445...,soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT...


In [5]:
example = slctn_tbls_to_sndfls_df.loc[0]

In [6]:
example = slctn_tbls_to_sndfls_df.loc[0]

sf1 = example['soundfile_directory']
st1 = example['selection_table_directory']

In [54]:
st1

'labels/selection_tables/SWIFT4_20210916_075445/SWIFT4_20210916_080000.Table.1.selections.txt'

In [7]:
extract_and_upload_clips(sf1, st1, bucket, s3)

soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT4_2021-09-16/SWIFT4_20210916_080000.wav
labels/selection_tables/SWIFT4_20210916_075445/SWIFT4_20210916_080000.Table.1.selections.txt


In [8]:
# Process each specified bucket
for bucket in buckets:
    # map selection_tables to soundfiles
    slctn_tbls_to_sndfls_bytes = download_s3_object_to_memory(
        bucket, 'labels/selection_tables_to_soundfiles.csv', s3
        )
    slctn_tbls_to_sndfls_df = pd.read_csv(slctn_tbls_to_sndfls_bytes)
    n = len(slctn_tbls_to_sndfls_df)
    
    pbar = tqdm(total=n, desc=f"file {bucket}")
    for i, row in slctn_tbls_to_sndfls_df.iterrows():
        sf1 = row['soundfile_directory']
        st1 = row['selection_table_directory']
        
        pbar.set_description(f"{bucket}: {sf1}")
        pbar.update(1)
        # Extract and write clips based on the selection table entries
        extract_and_upload_clips(sf1, st1, bucket, s3)
    pbar.close() 
    
        
    

file tangkahan:   0%|          | 0/391 [00:00<?, ?it/s]

soundfiles/1633880400_ElocX/Eloc_2021-10-11/0mkandang_2021-10-11_15-50-16.wav
labels/selection_tables/1633880400_ElocX/0mCage_Eloc_20211011_155016.Table.1.selections.txt
soundfiles/1633880400_ElocX/Eloc_2021-10-13/0mkandang_2021-10-13_15-50-25.wav
labels/selection_tables/1633880400_ElocX/0mcage_Eloc_2021-10-13_15-50-25.Table.1.selections.txt
soundfiles/ElocX_20211110_092442/20211101_130000.WAV
labels/selection_tables/ElocX_20211110_092442/angon_20211101_130000.Table.1.selections.txt
soundfiles/1633880400_ElocX/Eloc_2021-10-10/0mkandang_2021-10-10_15-50-12.wav
labels/selection_tables/1633880400_ElocX/0mCage_2021-10-10_15-50-12.Table.1.selections.txt
soundfiles/1631374980_Swift1/Swift1_2021-09-13/Swift1_20210913_100003.wav
labels/selection_tables/1631374980_Swift1/0mCage_Swift1_20210913_100003.Table.1.selections.txt
soundfiles/1635602580_Swift2/Swift2_2021-11-01/Swift2_20211101_090100.wav
labels/selection_tables/1635602580_Swift2/100m_20211101_090100.Table.2.selections.txt
soundfiles/163

file way-kambas:   0%|          | 0/119 [00:00<?, ?it/s]

soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT4_2021-09-16/SWIFT4_20210916_080000.wav
labels/selection_tables/SWIFT4_20210916_075445/SWIFT4_20210916_080000.Table.1.selections.txt
soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT4_2021-09-16/SWIFT4_20210916_090000.wav
labels/selection_tables/SWIFT4_20210916_075445/SWIFT4_20210916_090000.Table.1.selections.txt
soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT4_2021-09-16/SWIFT4_20210916_150000.wav
labels/selection_tables/SWIFT4_20210916_075445/SWIFT4_20210916_150000.Table.1.selections.txt
soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT4_2021-09-16/SWIFT4_20210916_160000.wav
labels/selection_tables/SWIFT4_20210916_075445/SWIFT4_20210916_160000.Table.1.selections.txt
soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT4_2021-09-16/SWIFT4_20210916_190000.wav
labels/selection_tables/SWIFT4_20210916_075445/SWIFT4_20210916_190000.Table.1.selections.txt
soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT4_2021-09-16/SWIFT4_20210916_200000.wav
labels/selection_ta

file bukit-tiga-puluh:   0%|          | 0/45 [00:00<?, ?it/s]

soundfiles/ELOC15_1654513906288/ELOC15_1654513906288_2022-06-27_06-11-46.wav
labels/selection_tables/ELOC15_1654513906288/ELOC15_1654513906288_2022-06-27_06-11-46.Table.1.selections.txt
soundfiles/ELOC19_1656414148027/ELOC19_1656414148027_2022-06-28_23-02-28.wav
labels/selection_tables/ELOC19_1656414148027/ELOC19_1656414148027_2022-06-28_23-02-28.txt
soundfiles/ELOC1_1654435369276/ELOC1_1654435369276_2022-06-07_02-22-50.wav
labels/selection_tables/ELOC1_1654435369276/ELOC1_1654435369276_2022-06-07_02-22-50.txt
soundfiles/ELOC1_1654435369276/ELOC1_1654435369276_2022-06-07_03-22-50.wav
labels/selection_tables/ELOC1_1654435369276/ELOC1_1654435369276_2022-06-07_03-22-50.txt
soundfiles/ELOC1_1654435369276/ELOC1_1654435369276_2022-06-07_04-22-50.wav
labels/selection_tables/ELOC1_1654435369276/ELOC1_1654435369276_2022-06-07_04-22-50.txt
soundfiles/ELOC1_1654435369276/ELOC1_1654435369276_2022-06-07_05-22-50.wav
labels/selection_tables/ELOC1_1654435369276/ELOC1_1654435369276_2022-06-07_05-22-50

file sabah:   0%|          | 0/17 [00:00<?, ?it/s]

soundfiles/ELOC26_1678094402251/ELOC26_1677321565102_2023-03-03_08-39-25.wav
labels/selection_tables/ELOC26_1678094402251/ELOC26_1677321565102_2023-03-03_08-39-25.Table.1.selections.txt
soundfiles/ELOC26_1678094402251/ELOC26_1677321565102_2023-03-03_09-39-25.wav
labels/selection_tables/ELOC26_1678094402251/ELOC26_1677321565102_2023-03-03_09-39-25.Table.1.selections.txt
soundfiles/ELOC26_1678094402251/ELOC26_1677321565102_2023-03-03_10-39-25.wav
labels/selection_tables/ELOC26_1678094402251/ELOC26_1677321565102_2023-03-03_10-39-25.Table.1.selections.txt
soundfiles/ELOC26_1678094402251/ELOC26_1677321565102_2023-03-03_13-39-25.wav
labels/selection_tables/ELOC26_1678094402251/ELOC26_1677321565102_2023-03-03_13-39-25.Table.1.selections.txt
soundfiles/ELOC6_1678095504743/ELOC6_1678095504743_2023-03-06_11-38-25.wav
labels/selection_tables/ELOC6_1678095504743/ELOC6_1678095504743_2023-03-06_11-38-25.Table.1.selections.txt
soundfiles/ELOC6_1678095504743/ELOC6_1678095504743_2023-03-13_01-38-25.wav