# This notebook allows the Bio Team to take a WAV file from the "fullsize_files/" folder in the professor's S3 Bucket, decimate it, and obtain the decimated WAV file. This decimated WAV file can then be uploaded to the "decimated_files/" folder in the S3 Bucket.

### The Data Science Team is also welcome to use this notebook, but their involvement should be discussed with the professor and the Bio Team beforehand.

# Between the [] brackets, type the name of the WAV file you want predictions for (make sure you put quotes around the name).

In [1]:
"""
EXAMPLE:
wav_fname = ["671658014.181008033412"]
"""
wav_fname = ["6805.230205000826"]

# Now you can click "Run" -> "Run All" in the menu and then scroll to the bottom of this notebook.

## Feel free to ignore everything else, the code will work as-intended.

In [2]:
# Import Statement
import boto3

In [3]:
# Creates a connection to bucket
s3 = boto3.client('s3')
# NOTE: Change the following name of the S3 Bucket (in parentheses) to reflect the name of the S3 Bucket for your current AWS account.
#bucket = s3.Bucket('test-whale-preprocessing')

In [4]:
# Import Statements
from scipy.io import wavfile
from scipy import signal
import warnings
import numpy as np
import os
from os import path
from tqdm import tqdm

In [5]:
def process_wav_file(fpath, rate=12, norm=0.5):
    """
    Decimates and normalizes a WAV file to the range [-norm, norm].
    
    PARAMETERS
    ----------
    fpath : str
        path to the .wav file to process (e.g., 'fullsize_wav_files/671658014.181008033412.wav')
    rate : int, optional (default: 12)
        decimation rate (by default reduces samples by a factor of 12)
    norm : float, optional (default: 0.5)
        absolute value of the minimum and maximum sample
    ----------
    
    RETURNS
    ----------
    sr : int
        new sample rate after decimation
    data : np.ndarray
        array of processed data
    ----------
    """
    print("Yellow")
    # Reads information from WAV file
    sr, data = wavfile.read(fpath)
    print("Bellow")
    # Decimates WAV file
    data = signal.decimate(data, rate)
    print("NOOOOO")
    # Normalizes WAV file
    #data = data.astype(float)
    #data = data - data.min()
    #data = (data / data.max() * 2.0) - 1.0
    #data = data * norm
    sr = sr // rate
        
    return sr, data


def process_directory_wav_files(
        wav_fname,
        bucket_name,
        s3_input_directory,
        local_input_directory,
        s3_output_directory,
        local_output_directory,
        rate=12,
        norm=0.5,
        dtype=np.int16,
        show_progress=True):
    """
    Decimates and normalizes wav_fname (located in input_directory) then saves to output_directory.
    
    PARAMETERS
    ----------
    wav_fname: list of strings
        The name of the WAV file(s) to download from the S3 Bucket for decimation (without the ".wav" portion)
        (Technically, you are allowed to specify multiple wav file names in the list. 
            However, I strongly recommend only specifying one due to the time investment and added complexity.)
    bucket_name: str
        The name of the s3 bucket holding the .wav files
    s3_input_directory : str
        path to the input directory containing .wav files within the s3 bucket
    local_input_directory : str
        path to the local (notebook) input directory where you want to store the .wav files during processing
    s3_output_directory : str
        path to the output directory to save processed .wav files within the s3 bucket
    local_output_directory : str
        path to the local (notebook) output directory to save processed .wav files
    rate : int, optional (default: 12)
        decimation rate (by default reduces samples by a factor of 12)
    norm : float, optional (default: 0.5)
        absolute value of the minimum and maximum sample
    dtype : integer data type, optional (default: np.int16)
        integer data type to convert wav samples to
    show_progress : bool, optional (default: True)
        flag to control whether progress bar is shown or hidden
    ----------
        
    RETURNS
    ----------
        N/A
    ----------
    """
    # Ensures that "norm" value is valid
    if norm < 0.0 or norm > 1.0:
        new_norm = np.clip(norm, 0.0, 1.0)
        warnings.warn(
            "({}) Norm must be between 0.0 and 1.0, not {:g}. " \
            "Clipping to {:g}.".format(
                "process_directory_wav_files",
                norm,
                new_norm)
        )
        norm = new_norm

    # Downloads WAV file(s) from S3 Bucket
    for fname in wav_fname:
        file_name = f"{fname}.wav"
        s3_path = path.join(s3_input_directory, file_name)
        notebook_path = path.join(local_input_directory, file_name)
        s3.download_file(bucket_name, s3_path, notebook_path)

    # Makes list of all WAV files in "input_directory"
    fnames = [
        fname for fname in os.listdir(local_input_directory) if fname.endswith(".wav")
    ]
    # Prepares progress bar if show_progress=True
    file_iter = tqdm(fnames) if show_progress else fnames
    # Calls process_wav_file() to decimate the WAV file(s) and saves to output_directory
    for fname in file_iter:
        fpath = path.join(local_input_directory, fname)
        sr, data = process_wav_file(fpath, rate=rate, norm=norm)
        data = (data * np.iinfo(dtype).max).astype(dtype)
        # Data now spans half of the dtype's span and is 0-centered.
        out_fname = "{}_processed.wav".format(path.splitext(fname)[0])
        wavfile.write(path.join(local_output_directory, out_fname), sr, data)
        
        # ! I'm noticing that this seems to just output to the notebook workspace, we could try and upload the results directly to the s3 bucket


def finish():
    """Tells the Bio Team that decimation is complete."""
    print("Done!")

In [None]:
bucket_name = "test-whale-preprocessing"
process_directory_wav_files(wav_fname = wav_fname, bucket_name = bucket_name, s3_input_directory = "fullsize_wav_files", local_input_directory = "fullsize_wav_files", s3_output_directory = "", local_output_directory = "decimated_wav_files")
finish()

  0%|          | 0/1 [00:00<?, ?it/s]

Yellow
Bellow


In [None]:
s3 = boto3.client('s3')
s3.download_file('test-whale-preprocessing', 'fullsize_wav_files/6805.230205000826.wav', '6805.230205000826.wav')

In [None]:
s3.list_buckets()

In [None]:
s3.list_objects_v2(Bucket = 'test-whale-preprocessing')

In [None]:
os.listdir("fullsize_wav_files")