# **AudioFuse**

This notebook contains the preprocessing pipeline for PASCAL dataset for our proposed **AudioFuse** and also its baseline models. We used the PhysioNet 2016 Challenge dataset for pretraining (Heart Sound Classification/Abnormality Detection). The audio files were converted to Spectrograms and Scalograms here, creating 2-channel npy images.

In [None]:
# ===================================================================
# SCRIPT FOR PRE-PROCESSING THE PASCAL HEART SOUNDS DATASET
# ===================================================================

import os
import zipfile
import glob
import pandas as pd
import numpy as np
import librosa
import pywt
import cv2
import shutil
from tqdm.notebook import tqdm

# Registering the .progress_apply() method for Pandas
tqdm.pandas()

# --- 1. Configuration ---
# Use the same config as PhysioNet 2016 for consistency
class Config:
    SAMPLE_RATE = 22050
    SIGNAL_LENGTH_SECONDS = 5
    N_MELS = 224
    N_FFT = 2048
    HOP_LENGTH = 512
    WAVELET = 'morl'
    IMG_SIZE = 224

CONFIG = Config()

# --- 2. Defining Paths ---
DRIVE_FOLDER = "/content/drive/MyDrive/Multimodal Audio Fusion"
ZIP_PATH = os.path.join(DRIVE_FOLDER, "Pascal Heart Sounds.zip")
EXTRACT_PATH = "/content/PASCAL_extracted/"
LOCAL_OUTPUT_NPY_DIR = "/content/PASCAL_processed/"

# --- Pre-run Cleanup ---
# Cleaning up previous runs to ensure a fresh start
if os.path.exists(EXTRACT_PATH): shutil.rmtree(EXTRACT_PATH)
if os.path.exists(LOCAL_OUTPUT_NPY_DIR): shutil.rmtree(LOCAL_OUTPUT_NPY_DIR)

os.makedirs(LOCAL_OUTPUT_NPY_DIR, exist_ok=True)
os.makedirs(EXTRACT_PATH, exist_ok=True)

# --- 3. Unzipping the dataset ---
print("Unzipping PASCAL dataset...")
with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(EXTRACT_PATH)
print("Unzipping complete.")

# --- 4. Discovering Files, Filtering, and Creating Labels ---
print("Discovering audio files, filtering unlabeled, and mapping labels...")

# Finding all .wav files in the extracted directory
all_files = glob.glob(os.path.join(EXTRACT_PATH, "**/*.wav"), recursive=True)

# --- Filtering out the unlabeled test files ---
labeled_files = [f for f in all_files if "unlabelledtest" not in os.path.basename(f).lower()]
print(f"Found {len(all_files)} total files, keeping {len(labeled_files)} labeled files.")

# --- Parsing filenames to get original labels and mapping to binary labels ---
filepaths = []
original_labels = []
binary_labels = []

for f in labeled_files:
    basename = os.path.basename(f)
    # The label is the second part of the filename, e.g., 'A_murmur_...' -> 'murmur'
    original_label = basename.split('_')[0]

    # Mapping to the PhysioNet binary system
    if original_label == 'normal':
        binary_label = 0 # Normal
    else:
        # 'murmur', 'extrasystole', and 'artifact' are all considered 'Abnormal'
        binary_label = 1 # Abnormal

    filepaths.append(f)
    original_labels.append(original_label)
    binary_labels.append(binary_label)

data_df = pd.DataFrame({
    'filepath': filepaths,
    'original_label': original_labels,
    'label': binary_labels
})

print("\nLabel distribution in the PASCAL test set:")
print(data_df['original_label'].value_counts())
print("\nMapped binary label distribution:")
print(data_df['label'].value_counts())


# --- 5. Core Processing Functions ---
def get_spectrogram(waveform, sr):
    mel_spec = librosa.feature.melspectrogram(y=waveform, sr=sr, n_fft=CONFIG.N_FFT, hop_length=CONFIG.HOP_LENGTH, n_mels=CONFIG.N_MELS)
    return librosa.power_to_db(mel_spec, ref=np.max)

def get_scalogram(waveform):
    scales = np.arange(1, CONFIG.N_MELS + 1)
    coeffs, _ = pywt.cwt(waveform, scales, CONFIG.WAVELET)
    return np.log1p(np.abs(coeffs))

def process_and_save_pascal(filepath):
    """Processes a single audio file and saves it as a .npy file."""
    max_length = int(CONFIG.SIGNAL_LENGTH_SECONDS * CONFIG.SAMPLE_RATE)

    try:
        waveform, _ = librosa.load(filepath, sr=CONFIG.SAMPLE_RATE, mono=True)
        waveform = waveform[:max_length] if len(waveform) > max_length else np.pad(waveform, (0, max_length - len(waveform)), 'constant')

        spec = get_spectrogram(waveform, CONFIG.SAMPLE_RATE)
        spec_resized = cv2.resize(spec, (CONFIG.IMG_SIZE, CONFIG.IMG_SIZE))
        spec_norm = (spec_resized - spec_resized.min()) / (spec_resized.max() - spec_resized.min() + 1e-6)

        scalo = get_scalogram(waveform)
        scalo_resized = cv2.resize(scalo, (CONFIG.IMG_SIZE, CONFIG.IMG_SIZE))
        scalo_norm = (scalo_resized - scalo_resized.min()) / (scalo_resized.max() - scalo_resized.min() + 1e-6)

        fused_image = np.stack([spec_norm, scalo_norm], axis=-1)

        base_filename = os.path.splitext(os.path.basename(filepath))[0]
        save_path = os.path.join(LOCAL_OUTPUT_NPY_DIR, f"{base_filename}.npy")
        np.save(save_path, fused_image.astype(np.float32))
        return save_path
    except Exception as e:
        print(f"Error processing {filepath}: {e}")
        return None

# --- 6. Executing Pre-processing ---
print("\nStarting PASCAL pre-computation (saving locally)...")
data_df['npy_filepath'] = data_df['filepath'].progress_apply(process_and_save_pascal)
print("Local pre-computation complete!")

# --- 7. Saving Final Metadata and Zipping the Results ---
cleaned_df = data_df.dropna(subset=['npy_filepath'])
print(f"\nSuccessfully processed {len(cleaned_df)} out of {len(data_df)} files.")

final_csv_path = os.path.join(DRIVE_FOLDER, "pascal_metadata_binary_test.csv")
cleaned_df.to_csv(final_csv_path, index=False)
print(f"Metadata for the test set saved to {final_csv_path}")

print("\nZipping the processed files... This may take a few minutes.")
zip_output_path_base = os.path.join(DRIVE_FOLDER, "PASCAL_processed_binary_test")
shutil.make_archive(zip_output_path_base, 'zip', LOCAL_OUTPUT_NPY_DIR)

print("-" * 50)
print("SUCCESS!")
print(f"A zip file has been created at: {zip_output_path_base}.zip")
print("This zip file contains the PASCAL dataset, ready to be used for testing your PhysioNet model.")
print("-" * 50)

Unzipping PASCAL dataset...
Unzipping complete.
Discovering audio files, filtering unlabeled, and mapping labels...
Found 832 total files, keeping 585 labeled files.

Label distribution in the PASCAL test set:
original_label
normal        351
murmur        129
extrastole     46
artifact       40
extrahls       19
Name: count, dtype: int64

Mapped binary label distribution:
label
0    351
1    234
Name: count, dtype: int64

Starting PASCAL pre-computation (saving locally)...


  0%|          | 0/585 [00:00<?, ?it/s]

Local pre-computation complete!

Successfully processed 585 out of 585 files.
Metadata for the test set saved to /content/drive/MyDrive/Multimodal Audio Fusion/pascal_metadata_binary_test.csv

Zipping the processed files... This may take a few minutes.
--------------------------------------------------
SUCCESS!
A zip file has been created at: /content/drive/MyDrive/Multimodal Audio Fusion/PASCAL_processed_binary_test.zip
This zip file contains the PASCAL dataset, ready to be used for testing your PhysioNet model.
--------------------------------------------------
