# Setup

In [1]:
# Only run once
#!pip install librosa
#!pip install tensorflow

In [1]:
# Package Imports
import sys
# sys.path.append('/Users/sucheen/anaconda3/lib/python3.11/site-packages')
sys.path.append('/Users/sophiapchung/anaconda3/lib/python3.10/site-packages')
import pandas as pd
import numpy as np
import librosa
from scipy.io import wavfile
import soundfile as sf
import glob
import os
import boto3
import tensorflow as tf
#from awsKeys import awsKeys
import matplotlib.pyplot as plt
import matplotlib.image
import warnings
from os import path
warnings.filterwarnings('ignore')



KEYS = "ssundar_accessKeys.csv" # change to your path
# KEYS = '/Users/sophiapchung/Desktop/Bioacoustics/spchung_accessKeys.csv'
    
#aws_access_key_id, aws_secret_access_key = awsKeys(KEYS)

# Create a boto3 resource with your credentials
s3 = boto3.resource(
    's3',
    region_name='us-west-2'
)

# S3 Bucket for Professor's Account is 'whale-recordings'
# S3 Bucket for our free tier Account is 'test-whale-preprocessing'
bucket_name = 'test-whale-preprocessing'
bucket = s3.Bucket(bucket_name)


## CONSTANTS
# Window size (n_fft) in seconds
WINDOW_SIZE_SEC = 0.15175
# Hop Length in seconds
HOP_LEN_SEC = 0.05

2024-07-17 22:32:12.719702: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-17 22:32:12.981418: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-17 22:32:12.982993: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-17 22:32:13.399925: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def pull_wav_files(bucket, holding_directory, local_store_directory):
    """
    Compiles a list of all decimated wav files.
    
    PARAMETERS
    ----------
        bucket: boto3 s3 resource
            The object made to reference a boto3 s3 resource
            
        holding_directory: string
            Diretory holding all the wanted wav files in an s3 bucket (including those in subdirectories of it)
            
        local_store_directory: string
            Directory for locally storing the wav files for processing
    ----------
    
    RETURNS
    ----------
        WAV_FILES:
            Decimated .wav files w/ their file path
    ----------
    """
    # Decimated .wav files w/ their file path
    WAV_FILES = []
    for file in bucket.objects.all():
        if holding_directory in file.key and '.wav' in file.key:
            wav = file.key.split('/')[-1][:-4]
            s3_path = file.key
            notebook_path = path.join(local_store_directory, wav)
            bucket.download_file(s3_path, notebook_path)
            WAV_FILES.append([wav, notebook_path])
    return WAV_FILES

In [3]:
def pull_annotation_files(bucket, holding_directory, local_store_directory):
    """
    Compiles a list of all decimated wav files.
    
    PARAMETERS
    ----------
        bucket: boto3 s3 resource
            The object made to reference a boto3 s3 resource
            
        holding_directory: string
            Diretory holding all the wanted wav files in an s3 bucket (including those in subdirectories of it)
            
        local_store_directory: string
            Directory for locally storing the wav files for processing
    ----------
    
    RETURNS
    ----------
        WAV_FILES:
            Decimated .wav files w/ their file path
    ----------
    """
    # Decimated .wav files w/ their file path
    WAV_FILES = []
    for file in bucket.objects.all():
        if holding_directory in file.key and '.txt' in file.key:
            wav = file.key.split('/')[-1] 
            s3_path = file.key
            notebook_path = path.join(local_store_directory, wav)
            bucket.download_file(s3_path, notebook_path)
            WAV_FILES.append([wav, notebook_path])
    return WAV_FILES

# Reading .wav Files

In [4]:
def read_wavfile(wname, verbose=False):
    """
    Reads in a decimated wav file from the S3 Bucket.
    
    PARAMETERS
    ----------
        wname: string
            Decimated WAV file's path
        verbose: boolean
            Indicates whether or not to make output excessively detailed
    ----------
    
    RETURNS
    ----------
        sr: int
            Sampling rate of WAV file
        data: numpy array
            Contains floats representing the amplitudes of the sound wave for each sample (automatically ordered from earliest to latest)
    ----------
    """
    # Reads-in the decimated WAV file's information.
    """try:
        #sr, data = wavfile.read(wname) # NOTE: Sampling rate (sr) seems to be 8000 samples per second
        # download the file with the given name
        bucket.download_file(wname, wname)
        sr, data = wavfile.read(wname)
    except Exception:
        print(f'{wname} not found')
        exit(1)"""
    # download the file with the given name
    #bucket.download_file(wname, wname)
    sr, data = wavfile.read(f'{wname}')
    if verbose:
        print(f'{wname}: {data.shape[0]} samples at {sr} samples/sec --> {data.shape[0]/sr} seconds')
    
    return sr, data

# STFT --> Mel --> PCEN Implementation

In [5]:
import librosa
import numpy as np
import tensorflow as tf

def process_wav(wname, target_shape=(608, 192), verbose=True, batch_size=30, running=False):
    """
    Processes a WAV file by converting it to a Mel-Spectrogram and then to PCEN.
    :param wname: wav file path
    :param target_shape: target shape of the spectrogram
    :param verbose: verbosity
    :param batch_size: batch size
    :param running: running mode
    :return: dataset & sampling rate
    """
    data, sr = librosa.load(wname, sr=None, mono=True)
    if verbose:
        print(f"Loaded {wname} with sample rate {sr}")

    # Convert to Mel-Spectrogram and then to PCEN
    # Change either n_fft or win_length to 1200 to match the window size the bio team used during annotations
    S = librosa.feature.melspectrogram(y=data, sr=sr, n_fft=2048, hop_length=512, n_mels=target_shape[1])
    S_DB = librosa.power_to_db(S, ref=np.max)
    S_PCEN = librosa.pcen(S * (2**31), sr=sr, hop_length=512) #Need to figure out the time scale here...
    print("duration: ", librosa.get_duration(S=S_PCEN, n_fft=2048, hop_length=512))
    print("Num pixles: ", S_PCEN.shape)
    print("Time per pixle (seconds): ", librosa.get_duration(S=S_PCEN, n_fft=2048, hop_length=512)/S_PCEN.shape[1])
    time_per_pixle = librosa.get_duration(S=S_PCEN, n_fft=2048, hop_length=512)/S_PCEN.shape[1]

    # Transpose to align time and frequency axes
    S_PCEN = np.transpose(S_PCEN, (1, 0))
    if verbose:
        print("Shape after transpose:", S_PCEN.shape)

    # Segment the long spectrogram into smaller chunks
    segmented_spectrograms = []
    time_stamps = []
    for start in range(0, S_PCEN.shape[0] - target_shape[0] + 1, target_shape[0]):
        segmented = S_PCEN[start:start + target_shape[0], :]
        if segmented.shape[0] == target_shape[0]:
            segmented_spectrograms.append(segmented.reshape(target_shape[0], target_shape[1], 1))
            time_stamps.append([time_per_pixle * start, time_per_pixle * (start+target_shape[0])])
    if verbose:
        print(f"Generated {len(segmented_spectrograms)} segments of shape {target_shape}.")

    if running:
        return np.array(segmented_spectrograms), sr, time_stamps #return a corresponding 2D array of time stamps

    dataset = tf.data.Dataset.from_tensor_slices(np.array(segmented_spectrograms))
    dataset = dataset.shuffle(buffer_size=100).batch(batch_size)
    return dataset, sr


# Copy Files to S3 Bucket

In [6]:
def copy_to_bucket(fileSource, fileDestinationTrain, fileDestinationVal, fileName, bucket, fileNum = 0):
    """
    Copies a file from this notebook's working directory to the S3 Bucket.
    :param fileSource: string
        Name of the file to be copied
    :param fileDestination: string
        Name of the file in the S3 Bucket
    """
    # NOTE: Change the following name of the S3 Bucket (in parentheses) to reflect the name of the S3 Bucket for your current AWS account.
    """WARNING: This S3 Bucket should be the one that contains SageMaker files (NOT the one with WAV files and TXT files)."""
    bucket.upload_file(fileSource + fileName + '_' + str(fileNum) + '.png', fileDestinationTrain + fileName + '_' + str(fileNum) + '.png')
    bucket.upload_file(fileSource + fileName + '_' + str(fileNum) + '.png', fileDestinationVal + fileName + '_' + str(fileNum) + '.png')
print("finished preprocessing")

finished preprocessing


In [7]:
def make_list_file(curr_list, ravenAnnotations, timeStamp, fileNum, fileDestinationTrain, fileDestinationVal, fileName):
    print((ravenAnnotations["End Time (s)"].apply(lambda x: min(x, timeStamp[1])) - ravenAnnotations["Begin Time (s)"].apply(lambda x: max(x, timeStamp[0]))).apply(lambda x: max(x, 0)).sum())
    if (ravenAnnotations["End Time (s)"].apply(lambda x: min(x, timeStamp[1])) - ravenAnnotations["Begin Time (s)"].apply(lambda x: max(x, timeStamp[0]))).apply(lambda x: max(x, 0)).sum() > .25:
        curr_list = curr_list + str(fileNum) + "\t1\t" + fileDestinationTrain + fileName + '_' + str(fileNum) + '.png\n'
        #curr_list = curr_list + str(fileNum) + "\t1\t" + fileDestinationVal + fileName + '_' + str(fileNum) + '.png\n'
    else:
        curr_list = curr_list + str(fileNum) + "\t0\t" + fileDestinationTrain + fileName + '_' + str(fileNum) + '.png\n'
        #curr_list = curr_list + str(fileNum) + "\t0\t" + fileDestinationVal + fileName + '_' + str(fileNum) + '.png\n'
    return curr_list


# Mel Spectrogram - between STFT and PCEN

In [9]:
# AUDIO_FILE = "files/6805.230201090825_processed.wav"
# samples, sample_rate = librosa.load(AUDIO_FILE, sr=None)

# # # raw frequency
# sgram = librosa.stft(samples)
# librosa.display.specshow(sgram)

In [10]:
# mel-scale instead of frequency of Hz
# sgram_mag, _ = librosa.magphase(sgram)
# mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sample_rate)
# librosa.display.specshow(mel_scale_sgram)

# Running Preprocessing

In [8]:
# Create a boto3 resource with your credentials
s3 = boto3.resource(
    's3',
    region_name='us-west-2'
)

# S3 Bucket for Professor's Account is 'whale-recordings'
# S3 Bucket for our free tier Account is 'test-whale-preprocessing'
bucket_name = 'test-whale-preprocessing'
bucket = s3.Bucket(bucket_name)


## CONSTANTS
# Window size (n_fft) in seconds
WINDOW_SIZE_SEC = 0.15175
# Hop Length in seconds
HOP_LEN_SEC = 0.05

files = pull_wav_files(bucket, "decimated_wav_files", "local_store")
ravenAnnotationFiles = pull_annotation_files(bucket, "recording_annotations", "local_store")

j = 1
list_file = ""
for file in files:
    dataset, sr, time_stamps = process_wav(file[1], running = True, target_shape = (300, 192))
    print(time_stamps)
    items = zip(dataset, time_stamps)
    for item in items:
        data = item[0]
        time_stamp = item[1]
        #print(files[i][0] + '_' + str(j) + '.png')
        matplotlib.image.imsave('image_folder/' + file[0] + '_' + str(j) + '.png', np.flip(np.transpose(np.squeeze(data)), 0))
        
        for annotationFile in ravenAnnotationFiles:
            if file[0].split("_")[0] in annotationFile[1]:
                annotationPath = annotationFile[1]
        
        ravenAnnotations = pd.read_csv(annotationPath, sep = '\t')
        
        copy_to_bucket('image_folder/', "processed_wav_files/train/images/", "processed_wav_files/validation/images/", file[0], bucket, j)
        list_file = make_list_file(list_file, ravenAnnotations, time_stamp, j, "images/", "images/", file[0])
        
        #bucket.upload_file('image_folder/' + files[i][0] + '_' + str(j) + '.png', "processed_wav_files/" + files[i][0] + '_' + str(j) + '.png')
        j = j+1
with open('result.txt', 'w') as fp:
    fp.write(list_file)
bucket.upload_file("result.txt", "processed_wav_files/train_lst.lst")
bucket.upload_file("result.txt", "processed_wav_files/validation_lst.lst")

Loaded local_store/671658014.180928183606_processed with sample rate 8000
duration:  3917.601088435374
Num pixles:  (192, 168718)
Time per pixle (seconds):  0.02321981702269689
Shape after transpose: (168718, 192)
Generated 562 segments of shape (300, 192).
[[0.0, 6.965945106809067], [6.965945106809067, 13.931890213618134], [13.931890213618134, 20.897835320427202], [20.897835320427202, 27.863780427236268], [27.863780427236268, 34.82972553404534], [34.82972553404534, 41.795670640854404], [41.795670640854404, 48.76161574766347], [48.76161574766347, 55.727560854472536], [55.727560854472536, 62.6935059612816], [62.6935059612816, 69.65945106809068], [69.65945106809068, 76.62539617489973], [76.62539617489973, 83.59134128170881], [83.59134128170881, 90.55728638851787], [90.55728638851787, 97.52323149532694], [97.52323149532694, 104.489176602136], [104.489176602136, 111.45512170894507], [111.45512170894507, 118.42106681575413], [118.42106681575413, 125.3870119225632], [125.3870119225632, 132.3

4.436165106809066
3.978074893190934
3.464335320427203
4.619964679572799
4.509599999999995
2.9139706408544015
4.455429359145597
2.1998999999999995
2.034800000000004
0
3.8496999999999986
0
0
1.9799000000000007
3.8530766021359995
3.900823397864002
4.454999999999984
5.618011922563198
5.215988077436776
5.305902136181345
4.8209451068090345
3.291152757009627
3.4099999999999966
4.675000000000011
3.299999999999983
3.6065727770357228
6.745945106809103
3.6174821161551733
2.75
3.972353204272025
6.360945106809055
3.5257016889189288
3.1899999999999977
6.269133631508254
4.710945106809106
2.8799212616826537
4.893968951935506
4.6760310480644875
3.2450000000000045
4.897804272362691
5.700945106809002
0.4002506208282739
0
0
0.2965298064080457
3.0584701935919725
0
2.226365126835219
4.208634873164783
1.9800000000000182
3.738999999999976
5.287145554071515
6.36194510680906
3.115945106809079
5.308964232310416
3.2939259813077797
5.205945106809054
4.809128911883192
4.227761301734972
5.0112386982650605
4.30165151

0
0
3.2198397930515057
0.7801602069484943
0
2.1600000000003092
1.6199999999998909
0
0
0
0
0
0
0
0
0
0
1.1690163951875547
3.2959451068090857
3.6850384980029958
2.3200000000001637
2.5300000000002
2.5900000000001455
0
0
2.8099999999999454
0
2.6184674632781935
1.1015325367220612
0.41035767689618297
1.5396423231040899
0.9200000000000728
0
0
0
0
0
0
0
0
0
0
2.550000000000182
1.1400000000003274
0.8099999999999454
0
1.2400000000002365
1.5973695994593982
1.2626304005407292
0.18925981307756956
1.9107401869227942
1.9500000000002728
Loaded local_store/6805.230205000826_processed with sample rate 8000
duration:  653.0844444444444
Num pixles:  (192, 28127)
Time per pixle (seconds):  0.02321912910884362
Shape after transpose: (28127, 192)
Generated 93 segments of shape (300, 192).
[[0.0, 6.965738732653086], [6.965738732653086, 13.931477465306171], [13.931477465306171, 20.897216197959256], [20.897216197959256, 27.862954930612343], [27.862954930612343, 34.828693663265426], [34.828693663265426, 41.79443

In [None]:
print("hi\tbye")

In [None]:
fig, ax = plt.subplots()
ax.imshow(np.transpose(np.squeeze(dataset[20])), cmap="viridis")
plt.show()

In [None]:
### BELOW IS THE 2024 SUMMER TEAM WORK ###
# Only run once
#!pip install librosa
#!pip install tensorflow

In [None]:
# Package Imports
import sys
# sys.path.append('/Users/sucheen/anaconda3/lib/python3.11/site-packages')
import pandas as pd
import numpy as np
import librosa
from scipy.io import wavfile
import soundfile as sf
import glob
import os
import boto3
import tensorflow as tf
#from awsKeys import awsKeys #Causing error?
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
s3_client = boto3.client('s3')

In [None]:
## CONSTANTS
# Window size (n_fft) in seconds
WINDOW_SIZE_SEC = 0.15175
# Hop Length in seconds
HOP_LEN_SEC = 0.05
# Decimated .wav files w/ their file path
WAV_FILES = []
# BUCKET NAME
bucket_name = "test-whale-preprocessing"
input_folder = "decimated_wav_files"



#for file in bucket.objects.all():
#    if file.key.split('/')[-2] == 'decimated_files':
#        wav = file.key.split('/')[-1] 
#        path = file.key
#        if wav != '':
#            WAV_FILES.append([wav, path])

In [None]:
objects = s3_client.list_objects_v2(Bucket=bucket_name)

for obj in objects['Contents']:
    print(obj['Key'])