In [1]:
%pip install scikit-learn tensorflow

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import librosa
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

2024-07-31 10:16:25.699173: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-31 10:16:25.716171: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-31 10:16:25.735122: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-31 10:16:25.740320: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-31 10:16:25.756131: I tensorflow/core/platform/cpu_feature_guar

In [3]:
# Enable GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# Your other imports and code follow
from tensorflow.keras import layers, models, utils

In [7]:
# DATA PREP
os.chdir('data')
os.makedirs('orca_spectrogram')
os.makedirs('noise_spectrogram')

In [8]:
orca_df = pd.read_csv("orca_data.csv")
noise_df = pd.read_csv("noise_data.csv")
noise_df.shape

(2216, 3)

In [9]:
# Read only 1/21th of data = ~100 rows
num_orca_rows = len(orca_df)
num_noise_rows = len(noise_df)
orca_df = orca_df.sample(n=num_orca_rows//21)
noise_df = noise_df.sample(n=num_noise_rows//21)

In [10]:
# Create orca audio chunks, create and save spectrogram for each
dir_path = '/home/ajm76/OrcaSeis/data'
for index, row in orca_df.iterrows():
    filename = row['wav_filename']
    filepath = 'wav/' + filename
    start_time = row['start_time_s']  # Keep in seconds
    end_time = row['end_time_s']  # Keep in seconds
    
    # Load the original wav file
    y, sr = librosa.load(filepath)

    # Convert start and end times to sample indices
    start_sample = int(start_time * sr)
    end_sample = int(end_time * sr)

    # Extract the chunk
    chunk = y[start_sample:end_sample]
        
    # Generate a Short-Time Fourier Transform (STFT) spectrogram
    D = librosa.stft(chunk)

    # Convert amplitude spectrogram to dB-scaled spectrogram
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
        
    # Save the spectrogram as a NumPy array
    np.save(os.path.join(dir_path, 'orca_spectrogram', str(index)+'.npy'), S_db) 

In [11]:
# Create noise audio chunks, create and save spectrogram for each
dir_path = '/home/ajm76/OrcaSeis/data'
for index, row in noise_df.iterrows():
    filename = row['wav_filename']
    filepath = 'wav/' + filename
    start_time = row['start_time_s']  # Keep in seconds
    end_time = row['end_time_s']  # Keep in seconds
    
    # Load the original wav file
    y, sr = librosa.load(filepath)

    # Convert start and end times to sample indices
    start_sample = int(start_time * sr)
    end_sample = int(end_time * sr)

    # Extract the chunk
    chunk = y[start_sample:end_sample]
        
    # Generate a Short-Time Fourier Transform (STFT) spectrogram
    D = librosa.stft(chunk)

    # Convert amplitude spectrogram to dB-scaled spectrogram
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
        
    # Save the spectrogram as a NumPy array
    np.save(os.path.join(dir_path, 'noise_spectrogram', str(index)+'.npy'), S_db) 



In [12]:
# ML
input_dir = "/home/ajm76/OrcaSeis/data"
categories = ['noise', 'orca_call']

In [13]:
data = []
labels = []
for category_idx, category in enumerate(categories):
    cat_path = ""
    if category == 'noise':
        cat_path = 'noise_spectrogram'
    else:
        cat_path = 'orca_spectrogram'
    for file in os.listdir(os.path.join(input_dir, cat_path)):
        spectrogram = np.load(os.path.join(input_dir, cat_path, file))
        # Downsample by a factor of 2
        spectrogram = spectrogram[::100, ::100]
        data.append(spectrogram)
        labels.append(category_idx)

In [14]:
# Convert all elements to NumPy arrays if they aren't already
data = [np.array(spectrogram) for spectrogram in data]

# Determine the maximum shape for each dimension
max_shape = tuple(max(dim) for dim in zip(*[spectrogram.shape for spectrogram in data]))

# Initialize a list to store the padded spectrograms
standardized_data = []

for spectrogram in data:
    # Pad spectrograms to match the maximum shape
    pad_widths = [(0, max_size - size) for size, max_size in zip(spectrogram.shape, max_shape)]
    padded_spectrogram = np.pad(spectrogram, pad_widths, mode='constant', constant_values=0)
    standardized_data.append(padded_spectrogram)

# Convert to a single NumPy array
data = np.array(standardized_data)

print(data.shape)

(2241, 11, 741)


In [15]:
data = data[..., np.newaxis]  # Add a channel dimension if it's not already present

In [16]:
data = np.asarray(data)
labels = np.asarray(labels)

In [17]:
# Train/Test Split
x_train, x_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, 
                                                     shuffle=True, stratify=labels)
num_classes = 2

In [18]:
input_shape = x_train.shape[1:]
print(input_shape)

(11, 741, 1)


In [19]:
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'

In [20]:
model = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Conv2D(8, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(16, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(16, activation='relu'),
    layers.Dense(2, activation='softmax')
])

2024-07-31 10:17:47.561561: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:198] Using CUDA malloc Async allocator for GPU: 0
2024-07-31 10:17:47.561966: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22461 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:3b:00.0, compute capability: 8.6


In [21]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Display the model architecture
model.summary()

In [None]:
history = model.fit(x_train, y_train, epochs=10, batch_size=16, validation_data=(x_val, y_val))

In [None]:
# # Final training and validation metrics
# final_train_loss = history.history['loss'][-1]
# final_train_accuracy = history.history['accuracy'][-1]
# final_val_loss = history.history['val_loss'][-1]
# final_val_accuracy = history.history['val_accuracy'][-1]

# print(f"Final Training Loss: {final_train_loss}")
# print(f"Final Training Accuracy: {final_train_accuracy}")
# print(f"Final Validation Loss: {final_val_loss}")
# print(f"Final Validation Accuracy: {final_val_accuracy}")