# **Audio Transformer**

In [1]:
# Importing the drive module from google.colab library
from google.colab import drive

# Mounting the Google Drive to the Colab environment
drive.mount('/content/drive')

project_path = '/content/drive/My Drive/GitHub/MarineMammalSoundClassification/'
%cd /content/drive/My Drive/GitHub/MarineMammalSoundClassification/

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1oJSL58N419Ve8pd0wCvgXEy52hLM2tJN/MarineMammalSoundClassification


In [2]:
import os
import numpy as np
import pickle
from utils.utilities import ensure_dir
from utils.models_functions import load_data, create_Transformer_model, train_model, evaluate_model, save_model, load_model
from utils.display import display_model_evaluation

In [3]:
def extract_patches(mel_spectrogram, patch_size=(16, 16)):
    """
    Extract patches of the specified size from the input mel-spectrogram.

    Parameters:
    mel_spectrogram (np.ndarray): The input mel-spectrogram array.
    patch_size (tuple): The size of the patches to extract (height, width).

    Returns:
    np.ndarray: An array of extracted patches.
    """
    patches = []
    h, w = mel_spectrogram.shape
    patch_h, patch_w = patch_size
    for i in range(0, h - patch_h + 1, patch_h):
        for j in range(0, w - patch_w + 1, patch_w):
            patch = mel_spectrogram[i:i + patch_h, j:j + patch_w]
            patches.append(patch)
    patches = np.array(patches)
    return patches

def preprocess_data(trainX):
    """
    Preprocess the input mel-spectrograms by extracting patches and reshaping them for transformer input.

    Parameters:
    trainX (np.ndarray): Array of mel-spectrograms with shape (num_samples, mel_height, mel_width).

    Returns:
    tuple: A tuple containing:
        - np.ndarray: Preprocessed array of mel-spectrogram patches with shape
                      (num_samples, num_patches, patch_size_flat).
        - int: Number of patches extracted from each mel-spectrogram.
        - int: Flattened patch size.
    """
    num_samples = trainX.shape[0]
    patch_size = (16, 16)
    all_patches = []
    for mel_spectrogram in trainX:
        patches = extract_patches(mel_spectrogram, patch_size)
        all_patches.append(patches)

    # Convert list to numpy array and reshape for transformer input
    all_patches = np.array(all_patches)
    num_patches = all_patches.shape[1]
    patch_size_flat = patch_size[0] * patch_size[1]
    all_patches = all_patches.reshape(num_samples, num_patches, patch_size_flat)

    return all_patches, num_patches, patch_size_flat

In [8]:
def process_pipeline(model_name, specs_or_mels, seg_secs, epochs=40, batch_size=64, patience=5):
    """
    Performs the entire pipeline of processing, training, evaluating, and saving a neural network model.

    Args:
    model_name (str): The name to be used for saving the model and HTML output.
    specs_or_mels (str): Specifies whether to use spectrograms or mel spectrograms.
    seg_secs (int): The segment duration in seconds for which data is prepared.
    epochs (int, optional): The number of epochs to train the model. Defaults to 40.
    batch_size (int, optional): The batch size to use during training. Defaults to 64.
    patience (int, optional): The number of epochs with no improvement after which training will be stopped. Defaults to 5.

    Returns:
    None
    """
    # Load the data
    specs_path = f'spectrograms/{seg_secs}_secs/train_{specs_or_mels}.pkl'
    classes_path = f'spectrograms/{seg_secs}_secs/train_classes.pkl'

    trainX, trainY, _ = load_data(specs_path, classes_path, True)

    train_patches, num_patches, patch_size_flat = preprocess_data(trainX)

    specs_path = f'spectrograms/{seg_secs}_secs/val_{specs_or_mels}.pkl'
    classes_path = f'spectrograms/{seg_secs}_secs/val_classes.pkl'

    valX, valY, _ = load_data(specs_path, classes_path, True)
    val_patches, _, _ = preprocess_data(valX)

    # Build the model
    input_shape = (num_patches, patch_size_flat)
    num_classes = len(trainY[0])
    model = create_Transformer_model(input_shape, num_classes)
    history = train_model(model, train_patches, trainY, val_patches, valY, epochs, batch_size, patience)

    # Create directory for saving models if it doesn't exist
    ensure_dir('models/Transformer')

    # Save the trained model
    model_path = os.path.join('models/Transformer', model_name+'.keras')
    save_model(model, model_path)

    # Load the saved model
    loaded_model = load_model(model_path)

    # Load test data and evaluate the model
    specs_path = f'spectrograms/{seg_secs}_secs/test_{specs_or_mels}.pkl'
    classes_path = f'spectrograms/{seg_secs}_secs/test_classes.pkl'

    testSegments_path = f'spectrograms/{seg_secs}_secs/test_segments.pkl'
    with open(testSegments_path, 'rb') as f:
        testSegments = np.array(pickle.load(f))


    testX, testY, encoder = load_data(specs_path, classes_path, True)
    test_patches, _, _ = preprocess_data(testX)
    conf_matrix, accuracy, f1, majority_conf_matrix, majority_accuracy, majority_f1 = evaluate_model(loaded_model, test_patches, testY, testSegments)
    labels = list(encoder.classes_)

    display_model_evaluation('Transformer', f'input_type = {specs_or_mels} | seg_duration = {seg_secs}secs | epochs = {epochs} | batch_size = {batch_size} | patience = {patience}',\
                          history.history, accuracy, f1, conf_matrix, labels, True, majority_conf_matrix, majority_accuracy, majority_f1)

In [9]:
process_pipeline('mels_5.0_80_16_20', 'mels', '5.0', 80, 16, 20)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80

KeyboardInterrupt: 