In [None]:
pip install librosa pandas

In [4]:
import sys
sys.path.append('../')

import librosa
import boto3
import os
from src.s3_utils import download_s3_object_to_memory, read_audio_fromS3, read_selection_table_fromS3, trim_audio_file
import yaml
import pandas as pd
import soundfile as sf
import io

In [5]:
# Load the S3 credentials from a YAML file
with open('../config/connection_config.yaml', 'r') as f:
    credentials = yaml.safe_load(f)

# Extract the access key and secret access key
access_key = credentials['access_key']
secret_access_key = credentials['secret_access_key']

# Connect to the S3 bucket
bucket_name = 'bukit-tiga-puluh'
s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_access_key)

In [6]:
# create a data frame containing a referencing selection_tables (that containt labels) to the coresponding soundfile
slctn_tbls_to_sndfls_bytes = download_s3_object_to_memory(bucket_name, 'labels/selection_tables_to_soundfiles.csv', s3)
slctn_tbls_to_sndfls_df = pd.read_csv(slctn_tbls_to_sndfls_bytes)
slctn_tbls_to_sndfls_df.head()

Unnamed: 0,selection_table,soundfile,selection_table_directory,soundfile_directory
0,ELOC15_1654513906288_2022-06-27_06-11-46.Table...,ELOC15_1654513906288_2022-06-27_06-11-46.wav,labels/selection_tables/ELOC15_1654513906288/E...,soundfiles/ELOC15_1654513906288/ELOC15_1654513...
1,ELOC19_1656414148027_2022-06-28_23-02-28.txt,ELOC19_1656414148027_2022-06-28_23-02-28.wav,labels/selection_tables/ELOC19_1656414148027/E...,soundfiles/ELOC19_1656414148027/ELOC19_1656414...
2,ELOC1_1654435369276_2022-06-07_02-22-50.txt,ELOC1_1654435369276_2022-06-07_02-22-50.wav,labels/selection_tables/ELOC1_1654435369276/EL...,soundfiles/ELOC1_1654435369276/ELOC1_165443536...
3,ELOC1_1654435369276_2022-06-07_03-22-50.txt,ELOC1_1654435369276_2022-06-07_03-22-50.wav,labels/selection_tables/ELOC1_1654435369276/EL...,soundfiles/ELOC1_1654435369276/ELOC1_165443536...
4,ELOC1_1654435369276_2022-06-07_04-22-50.txt,ELOC1_1654435369276_2022-06-07_04-22-50.wav,labels/selection_tables/ELOC1_1654435369276/EL...,soundfiles/ELOC1_1654435369276/ELOC1_165443536...


In [8]:
client = s3

In [9]:
for i, row in slctn_tbls_to_sndfls_df.iterrows():
    # read the audio file
    audio_file_path = row['soundfile_directory']
    audio, sr = read_audio_fromS3(audio_file_path, bucket_name, client) 
    # read the selection table
    selection_table_path = row['selection_table_directory']
    selection_table = read_selection_table_fromS3(selection_table_path, bucket_name, client)
    
    for i, label in selection_table.iterrows():
        start, end = label["Begin Time (s)"], label["End Time (s)"]
        clip = trim_audio_file(audio, start, end, sr)
        sound_category = label['sound_category']
        sound_type = label['sound_type']
        label_id = label["label_id"]
        
        output_dir = f"soundfiles_trimmed/{sound_category}/{sound_type}/"
        
        filename = f"{label_id}.wav"  # Name the file using label_id
        output_path = os.path.join(output_dir, filename)
        
        with io.BytesIO() as audio_file:
            sf.write(audio_file, clip, sr, format='WAV', subtype='PCM_24')
            audio_file.seek(0)
            print(output_path)
            # Upload the audio file to S3
            s3.upload_fileobj(audio_file, bucket_name, output_path, ExtraArgs={'ContentType': "audio/wav"})   

soundfiles/ELOC15_1654513906288/ELOC15_1654513906288_2022-06-27_06-11-46.wav
labels/selection_tables/ELOC15_1654513906288/ELOC15_1654513906288_2022-06-27_06-11-46.Table.1.selections.txt
soundfiles_trimmed/elephant_vocalization/roar/roar_ELOC15_1654513906288_2022-06-27_06-11-46.Table.1.selections_1.wav
soundfiles_trimmed/elephant_vocalization/trumpet/trumpet_ELOC15_1654513906288_2022-06-27_06-11-46.Table.1.selections_2.wav
soundfiles_trimmed/elephant_vocalization/trumpet/trumpet_ELOC15_1654513906288_2022-06-27_06-11-46.Table.1.selections_3.wav
soundfiles_trimmed/elephant_vocalization/trumpet/trumpet_ELOC15_1654513906288_2022-06-27_06-11-46.Table.1.selections_4.wav
soundfiles_trimmed/elephant_vocalization/rumble/rumble_ELOC15_1654513906288_2022-06-27_06-11-46.Table.1.selections_5.wav
soundfiles_trimmed/elephant_vocalization/trumpet/trumpet_ELOC15_1654513906288_2022-06-27_06-11-46.Table.1.selections_7.wav
soundfiles_trimmed/elephant_vocalization/trumpet/trumpet_ELOC15_1654513906288_2022-0