--------------------------------------------
# Processing daily WAV file data (MBARI)
--------------------------------------------

Downloading raw daily WAV files from MBARI AWS Server, Calculating the Long-Term Spectral Average for the file, saving LTSAs locally.

--------------------------------------------

In [1]:
#Libraries
import boto3, botocore
from botocore import UNSIGNED
from botocore.client import Config
from six.moves.urllib.request import urlopen
import io
import scipy
from scipy import signal
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import os

In [2]:
# Setting up the AWS Bot Client
s3 = boto3.client('s3',
    aws_access_key_id='',
    aws_secret_access_key='', 
    config=Config(signature_version=UNSIGNED))

### Listing All Files Within a Given Date Range

In [3]:
# Defining Date Range

#--------bucket--------------------------
bucket = 'pacific-sound-2khz'
#--------start---------------------------
year1 = "2018"
month1 = "01"
#--------end-----------------------------
year2 = '2021'
month2 = '10'
#----------------------------------------------------------------------------------------

dates = []
for year in range(int(year1), int(year2)+1):
    
    if (str(year)==year1) and (str(year)==year2):
        months =[format(i, '02d') for i in range(int(month1), int(month2)+1)]
    elif (str(year)==year1):
        months =[format(i, '02d') for i in range(int(month1), 13)]
    elif (str(year)==year2):
        months =[format(i, '02d') for i in range(1, int(month2)+1)]
    else:
        months = [format(i, '02d') for i in range(1, 13)]
    
    for month in months:
        dates.append((str(year), month))
        
#----------------------------------------------------------------------------------------
requested_files = [] # List of all files within the defined date range

for date in dates:
    year = date[0]
    month = date[1]
    for obj in s3.list_objects_v2(Bucket=bucket, Prefix=f'{year}/{month}')['Contents']:
        requested_files.append(obj['Key'])
    
print("Indexed {} files within {}/{} and {}/{}".format(len(requested_files), month1, year1, month2, year2))
#----------------------------------------------------------------------------------------

Indexed 1367 files within 01/2018 and 10/2021


### Loading Listed Files, Calculating PPSDs, Downloading PPSDs Locally

In [7]:
saving_path = r'./Downloaded Spectral Data '
for filename in tqdm(requested_files[1000:]):
    year, month = filename.split('/')[:2]
    day = filename.split('/')[2][11:13]
    path = os.path.join(saving_path, year+'-'+month+'-'+day+'.npy')
    if os.path.isfile(path):
        continue
    if filename[-4:]!='.wav':
        print('Ignored {}'.format(filename))
        continue
    key = filename
    url = f'https://{bucket}.s3.amazonaws.com/{key}' # Create URL from the file path (key)
    x, sample_rate = sf.read(io.BytesIO(urlopen(url).read()),dtype='float32') 
    v = x*3   # convert scaled voltage to volts
    a = np.arange(v.size)+1 # The X-Axis numbers
    nsec = (v.size)/sample_rate # number of seconds in vector
    spa = 60  # seconds/minute on average
    nseg = int(nsec/spa) # Number of minutes 
    nfreq = int(sample_rate/2+1) # the maxiumum frequency we can calculate with PPSD (nyquist theorem)
    LTSA = np.empty((nfreq, nseg), float) # Shape of PPSD we are making
    w = scipy.signal.get_window('hann',sample_rate)
    for x in range(0,nseg): # for each minute...
      cstart = x*spa*sample_rate
      cend = (x+1)*spa*sample_rate
      f,psd = scipy.signal.welch(v[cstart:cend],fs=sample_rate,window=w,nfft=sample_rate)
      psd = 10*np.log10(psd) + 177.9
      LTSA[:,x] = psd
    
    
    np.save(path,LTSA)
    print('Downloaded {}-{}-{}'.format(year, month, day))
    
#8/2

  0%|          | 0/367 [00:00<?, ?it/s]

Ignored 2020/10/_PAM_Archive___2020_10_wav_mcache.hdf
Ignored 2020/11/_PAM_Archive___2020_11_wav_mcache.hdf
Downloaded 2021-08-14
Downloaded 2021-08-15
Downloaded 2021-08-16
Downloaded 2021-08-17
Downloaded 2021-08-18
Downloaded 2021-08-19




Downloaded 2021-08-20
Downloaded 2021-08-21
Downloaded 2021-08-22
Downloaded 2021-08-23
Downloaded 2021-08-24
Downloaded 2021-08-25
Downloaded 2021-08-26
Downloaded 2021-08-27
Downloaded 2021-08-28
Downloaded 2021-08-29
Downloaded 2021-08-30
Downloaded 2021-08-31
Downloaded 2021-09-01
Downloaded 2021-09-02
Downloaded 2021-09-03
Downloaded 2021-09-04
Downloaded 2021-09-05
Downloaded 2021-09-06
Downloaded 2021-09-07
Downloaded 2021-09-08
Downloaded 2021-09-09
Downloaded 2021-09-10
Downloaded 2021-09-11
Downloaded 2021-09-12
Downloaded 2021-09-13
Downloaded 2021-09-14
Downloaded 2021-09-15
Downloaded 2021-09-16
Downloaded 2021-09-17
Downloaded 2021-09-18
Downloaded 2021-09-20
Downloaded 2021-09-21
Downloaded 2021-09-22
Downloaded 2021-09-23
Downloaded 2021-09-24
Downloaded 2021-09-25
Downloaded 2021-09-26
Downloaded 2021-09-27
Downloaded 2021-09-28
Downloaded 2021-09-29
Downloaded 2021-09-30
Downloaded 2021-10-01
Downloaded 2021-10-02
Downloaded 2021-10-03
Downloaded 2021-10-04
Downloaded