In [None]:
pip install librosa pandas

In [2]:
import sys
sys.path.append('../')

import librosa
import boto3
import os
from src.s3_utils import download_s3_object_to_memory, read_audio_fromS3, read_selection_table_fromS3, trim_audio_file
import yaml
import pandas as pd
import soundfile as sf
import io

In [6]:
# Load the S3 credentials from a YAML file
with open('../config/connection_config.yaml', 'r') as f:
    credentials = yaml.safe_load(f)

# Extract the access key and secret access key
access_key = credentials['access_key']
secret_access_key = credentials['secret_access_key']

# Connect to the S3 bucket
bucket_name = 'way-kambas'
s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_access_key)

In [8]:
# create a data frame containing a referencing selection_tables (that containt labels) to the coresponding soundfile
slctn_tbls_to_sndfls_bytes = download_s3_object_to_memory(bucket_name, 'labels/selection_tables_to_soundfiles.csv', s3)
slctn_tbls_to_sndfls_df = pd.read_csv(slctn_tbls_to_sndfls_bytes)
slctn_tbls_to_sndfls_df.head()

Unnamed: 0,selection_table,soundfile,selection_table_directory,soundfile_directory
0,SWIFT4_20210916_080000.Table.1.selections.txt,SWIFT4_20210916_080000.wav,labels/selection_tables/SWIFT4_20210916_075445...,soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT...
1,SWIFT4_20210916_090000.Table.1.selections.txt,SWIFT4_20210916_090000.wav,labels/selection_tables/SWIFT4_20210916_075445...,soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT...
2,SWIFT4_20210916_150000.Table.1.selections.txt,SWIFT4_20210916_150000.wav,labels/selection_tables/SWIFT4_20210916_075445...,soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT...
3,SWIFT4_20210916_160000.Table.1.selections.txt,SWIFT4_20210916_160000.wav,labels/selection_tables/SWIFT4_20210916_075445...,soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT...
4,SWIFT4_20210916_190000.Table.1.selections.txt,SWIFT4_20210916_190000.wav,labels/selection_tables/SWIFT4_20210916_075445...,soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT...


In [9]:
client = s3

In [10]:
for i, row in slctn_tbls_to_sndfls_df.iterrows():
    # read the audio file
    audio_file_path = row['soundfile_directory']
    audio, sr = read_audio_fromS3(audio_file_path, bucket_name, client) 
    # read the selection table
    selection_table_path = row['selection_table_directory']
    selection_table = read_selection_table_fromS3(selection_table_path, bucket_name, client)
    
    for i, label in selection_table.iterrows():
        start, end = label["Begin Time (s)"], label["End Time (s)"]
        clip = trim_audio_file(audio, start, end, sr)
        sound_category = label['sound_category']
        sound_type = label['sound_type']
        label_id = label["label_id"]
        
        output_dir = f"soundfiles_trimmed/{sound_category}/{sound_type}/"
        
        filename = f"{label_id}.wav"  # Name the file using label_id
        output_path = os.path.join(output_dir, filename)
        
        with io.BytesIO() as audio_file:
            sf.write(audio_file, clip, sr, format='WAV', subtype='PCM_24')
            audio_file.seek(0)
            print(output_path)
            # Upload the audio file to S3
            s3.upload_fileobj(audio_file, bucket_name, output_path, ExtraArgs={'ContentType': "audio/wav"})   

soundfiles/Swift4/SWIFT4_20210916_075445/SWIFT4_2021-09-16/SWIFT4_20210916_080000.wav
labels/selection_tables/SWIFT4_20210916_075445/SWIFT4_20210916_080000.Table.1.selections.txt
soundfiles_trimmed/elephant_vocalization/roar/roar_SWIFT4_20210916_080000.Table.1.selections_1.wav
soundfiles_trimmed/elephant_vocalization/rumble/rumble_SWIFT4_20210916_080000.Table.1.selections_2.wav
soundfiles_trimmed/elephant_vocalization/rumble/rumble_SWIFT4_20210916_080000.Table.1.selections_4.wav
soundfiles_trimmed/elephant_vocalization/roar_rumble/roar_rumble_SWIFT4_20210916_080000.Table.1.selections_5.wav
soundfiles_trimmed/elephant_vocalization/roar/roar_SWIFT4_20210916_080000.Table.1.selections_6.wav
soundfiles_trimmed/elephant_vocalization/trumpet/trumpet_SWIFT4_20210916_080000.Table.1.selections_7.wav
soundfiles_trimmed/elephant_vocalization/roar_rumble/roar_rumble_SWIFT4_20210916_080000.Table.1.selections_8.wav
soundfiles_trimmed/elephant_vocalization/roar_rumble/roar_rumble_SWIFT4_20210916_08000