In [None]:
pip install librosa pandas

In [2]:
import sys
sys.path.append('../')

import librosa
import boto3
import os
from src.s3_utils import download_s3_object_to_memory, read_audio_fromS3, read_selection_table_fromS3, trim_audio_file
import yaml
import pandas as pd
import soundfile as sf
import io

## Load S3 Credentials and Establish Connection

In [3]:
# Load the S3 credentials from a YAML file
with open('../config/connection_config.yaml', 'r') as f:
    credentials = yaml.safe_load(f)

# Extract the access key and secret access key
access_key = credentials['access_key']
secret_access_key = credentials['secret_access_key']

# Connect to the S3 bucket
bucket_name = 'tangkahan'
s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_access_key)

In [4]:
# create a data frame containing a referencing selection_tables (that containt labels) to the coresponding soundfile
slctn_tbls_to_sndfls_bytes = download_s3_object_to_memory(bucket_name, 'labels/selection_tables_to_soundfiles.csv', s3)
slctn_tbls_to_sndfls_df = pd.read_csv(slctn_tbls_to_sndfls_bytes)
slctn_tbls_to_sndfls_df.head()

Unnamed: 0,selection_table,soundfile,selection_table_directory,soundfile_directory
0,300m_20210917_043000.Table.1.selections.txt,300m_2021-09-17_04-27-42.wav,labels/selection_tables/1631802420_Eloc3/300m_...,soundfiles/1631802420_Eloc3/300m_2021-09-17_04...
1,P500m_2021-10-30_14-54-40.Table.1.selections.txt,P500m_2021-10-30_14-54-40.wav,labels/selection_tables/1635605760_Eloc5/P500m...,soundfiles/1635605760_Eloc5/P500m_2021-10-30_1...
2,100m_20211101_090100.Table.2.selections.txt,Swift2_20211101_090100.wav,labels/selection_tables/1635602580_Swift2/100m...,soundfiles/1635602580_Swift2/Swift2_2021-11-01...
3,100m_20211101_130000.Table.1.selections.txt,Swift2_20211101_130100.wav,labels/selection_tables/1635602580_Swift2/100m...,soundfiles/1635602580_Swift2/Swift2_2021-11-01...
4,100m_20211101_150000.Table.1.selections.txt,Swift2_20211101_150000.wav,labels/selection_tables/1635602580_Swift2/100m...,soundfiles/1635602580_Swift2/Swift2_2021-11-01...


In [5]:
client = s3

## Process Audio Files and Upload to S3
1. loops through each row of the slctn_tbls_to_sndfls_df DataFrame
2. reads the corresponding soundfile and its selection table from S3
3. loops through each label in the selection table, trims soundfile according to the begin and end times, upload to S3

In [6]:
for i, row in slctn_tbls_to_sndfls_df.iterrows():
    # read the audio file
    audio_file_path = row['soundfile_directory']
    audio, sr = read_audio_fromS3(audio_file_path, bucket_name, client) 
    # read the selection table
    selection_table_path = row['selection_table_directory']
    selection_table = read_selection_table_fromS3(selection_table_path, bucket_name, client)
    
    # trim soundfile according to begin and end times of each label
    for i, label in selection_table.iterrows():
        start, end = label["Begin Time (s)"], label["End Time (s)"]
        clip = trim_audio_file(audio, start, end, sr)
        sound_category = label['sound_category']
        sound_type = label['sound_type']
        label_id = label["label_id"]
        
        output_dir = f"soundfiles_trimmed/{sound_category}/{sound_type}/"
        
        filename = f"{label_id}.wav"  # Name the file using label_id
        output_path = os.path.join(output_dir, filename)
        
        # upload clips to S3
        with io.BytesIO() as audio_file:
            sf.write(audio_file, clip, sr, format='WAV', subtype='PCM_24')
            audio_file.seek(0)
            print(output_path)
            # Upload the audio file to S3
            s3.upload_fileobj(audio_file, bucket_name, output_path, ExtraArgs={'ContentType': "audio/wav"})   

soundfiles/1631802420_Eloc3/300m_2021-09-17_04-27-42.wav
labels/selection_tables/1631802420_Eloc3/300m_20210917_043000.Table.1.selections.txt
soundfiles_trimmed/elephant_vocalization/rumble/rumble_300m_20210917_043000_13.wav
soundfiles_trimmed/elephant_vocalization/growl/growl_300m_20210917_043000_10.wav
soundfiles_trimmed/elephant_vocalization/rumble/rumble_300m_20210917_043000_4.wav
soundfiles_trimmed/elephant_vocalization/rumble/rumble_300m_20210917_043000_8.wav
soundfiles_trimmed/elephant_vocalization/growl/growl_300m_20210917_043000_22.wav
soundfiles_trimmed/elephant_vocalization/rumble/rumble_300m_20210917_043000_9.wav
soundfiles_trimmed/elephant_vocalization/bark/bark_300m_20210917_043000_24.wav
soundfiles_trimmed/elephant_vocalization/growl/growl_300m_20210917_043000_11.wav
soundfiles_trimmed/elephant_vocalization/rumble/rumble_300m_20210917_043000_12.wav
soundfiles_trimmed/elephant_vocalization/rumble/rumble_300m_20210917_043000_29.wav
soundfiles_trimmed/elephant_vocalization/