# **AudioFuse**

This notebook contains the preprocessing pipeline for our proposed **AudioFuse** and also its baseline models. We used the PhysioNet 2016 Challenge dataset (Heart Sound Classification/Abnormality Detection). The audio files were converted to Spectrograms and Scalograms here, creating 2-channel npy images. We also ensured no data leakage through data cleaning.

## **Importing Libraries**

In [None]:
# File and Path Management
import os
import zipfile
import glob

# Data Manipulation
import pandas as pd
import numpy as np

# Audio Processing
import librosa
import pywt # PyWavelets for scalogram

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# TensorFlow and Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Data Splitting and Metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    roc_auc_score,
    roc_curve,
    matthews_corrcoef,
    cohen_kappa_score
)

# Utilities
from tqdm.notebook import tqdm
tqdm.pandas()
import cv2 # OpenCV for resizing

# Ensuring reproducible results
def set_seed(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed(42)

## **Configuration**

In [None]:
class Config:
    # --- Paths ---
    ZIP_PATH = '/content/drive/MyDrive/Multimodal Audio Fusion/PhysioNet Heart Sound Classification.zip'
    EXTRACT_PATH = '/content/physionet_data/'

    # --- Audio Settings ---
    SAMPLE_RATE = 22050
    SIGNAL_LENGTH_SECONDS = 5
    N_MELS = 224
    N_FFT = 2048
    HOP_LENGTH = 512
    WAVELET = 'morl'

    # --- Image/Input Settings ---
    IMG_SIZE = 224
    IN_CHANS = 1 # Each input stream is single-channel

CONFIG = Config()

## **Data Preparation**

In [None]:
# --- 1. Unzipping the dataset ---
if not os.path.exists(CONFIG.EXTRACT_PATH):
    print("Extracting dataset...")
    with zipfile.ZipFile(CONFIG.ZIP_PATH, 'r') as zip_ref:
        zip_ref.extractall(CONFIG.EXTRACT_PATH)
    print("Extraction complete.")
else:
    print("Dataset already extracted.")

# --- 2. Helper function to load data from specific folders ---
def load_data_from_folders(root_path, folder_patterns):
    all_ref_files = []
    for pattern in folder_patterns:
        all_ref_files.extend(glob.glob(os.path.join(root_path, pattern, 'REFERENCE.csv')))

    if not all_ref_files:
        raise ValueError(f"No REFERENCE.csv files found for patterns: {folder_patterns} in root: {root_path}")

    all_dfs = []
    for ref_path in all_ref_files:
        temp_df = pd.read_csv(ref_path, header=None, names=['filename', 'label'])
        parent_dir = os.path.dirname(ref_path)
        temp_df['filepath'] = temp_df['filename'].apply(
            lambda fname: os.path.join(parent_dir, f"{fname}.wav")
        )
        all_dfs.append(temp_df)

    combined_df = pd.concat(all_dfs, ignore_index=True)
    combined_df['label'] = combined_df['label'].apply(lambda x: 1 if x == 1 else 0)
    return combined_df

# --- 3. Loading the Training and Validation data separately ---
DATA_ROOT = os.path.join(CONFIG.EXTRACT_PATH)
train_folder_patterns = ["training-*"]
validation_folder_patterns = ["validation"]

try:
    train_df = load_data_from_folders(DATA_ROOT, train_folder_patterns)
    val_df = load_data_from_folders(DATA_ROOT, validation_folder_patterns)
except ValueError:
    print("Trying alternative directory structure...")
    DATA_ROOT = os.path.join(CONFIG.EXTRACT_PATH, "physionet-cinc-challenge-2016-1.0.0")
    train_df = load_data_from_folders(DATA_ROOT, train_folder_patterns)
    val_df = load_data_from_folders(DATA_ROOT, validation_folder_patterns)

# Combining for pre-processing step
data_df = pd.concat([train_df, val_df], ignore_index=True)

print(f"Total Training Samples: {len(train_df)}")
print(f"Total Validation Samples: {len(val_df)}")

Extracting dataset...
Extraction complete.
Total Training Samples: 3240
Total Validation Samples: 301


## **Pre-Computation and Saving Spectrogram + Scalogram .npy Files**

In [None]:
# Audio processing functions with log-scaling for both
def get_spectrogram(waveform, sr):
    mel_spec = librosa.feature.melspectrogram(y=waveform, sr=sr, n_fft=CONFIG.N_FFT, hop_length=CONFIG.HOP_LENGTH, n_mels=CONFIG.N_MELS)
    return librosa.power_to_db(mel_spec, ref=np.max)

def get_scalogram(waveform):
    scales = np.arange(1, CONFIG.N_MELS + 1)
    coeffs, _ = pywt.cwt(waveform, scales, CONFIG.WAVELET)
    return np.log1p(np.abs(coeffs))

PROCESSED_DATA_DIR = "/content/drive/MyDrive/Multimodal Audio Fusion/Physionet_processed_images/"
if not os.path.exists(PROCESSED_DATA_DIR):
    os.makedirs(PROCESSED_DATA_DIR, exist_ok=True)
    print("Starting pre-computation of spectrograms and scalograms...")

    def process_and_save(filepath):
        max_length = int(CONFIG.SIGNAL_LENGTH_SECONDS * CONFIG.SAMPLE_RATE)
        waveform, _ = librosa.load(filepath, sr=CONFIG.SAMPLE_RATE, mono=True)
        waveform = waveform[:max_length] if len(waveform) > max_length else np.pad(waveform, (0, max_length - len(waveform)), 'constant')

        spec = get_spectrogram(waveform, CONFIG.SAMPLE_RATE)
        spec_resized = cv2.resize(spec, (CONFIG.IMG_SIZE, CONFIG.IMG_SIZE))
        spec_norm = (spec_resized - spec_resized.min()) / (spec_resized.max() - spec_resized.min() + 1e-6)

        scalo = get_scalogram(waveform)
        scalo_resized = cv2.resize(scalo, (CONFIG.IMG_SIZE, CONFIG.IMG_SIZE))
        scalo_norm = (scalo_resized - scalo_resized.min()) / (scalo_resized.max() - scalo_resized.min() + 1e-6)

        fused_image = np.stack([spec_norm, scalo_norm], axis=-1)
        save_path = os.path.join(PROCESSED_DATA_DIR, f"{os.path.splitext(os.path.basename(filepath))[0]}.npy")
        np.save(save_path, fused_image.astype(np.float32))
        return save_path

    data_df['npy_filepath'] = data_df['filepath'].progress_apply(process_and_save)
    print("\nPre-computation complete!")
else:
    print("Pre-computed data found. Linking file paths...")
    data_df['npy_filepath'] = data_df['filename'].apply(lambda f: os.path.join(PROCESSED_DATA_DIR, f"{f}.npy"))

# Updating train_df and val_df with the new .npy paths
train_df = data_df[data_df['filename'].isin(train_df['filename'])].copy()
val_df = data_df[data_df['filename'].isin(val_df['filename'])].copy()

Starting pre-computation of spectrograms and scalograms...


  0%|          | 0/3541 [00:00<?, ?it/s]


Pre-computation complete!


In [None]:
# --- 7. Saving Final Metadata Files (No changes here) ---
train_metadata_path = os.path.join("/content/drive/MyDrive/Multimodal Audio Fusion", "Heart Sounds_train.csv")
test_metadata_path = os.path.join("/content/drive/MyDrive/Multimodal Audio Fusion", "Heart Sounds_test.csv")

train_df.to_csv(train_metadata_path, index=False)
val_df.to_csv(test_metadata_path, index=False)

## **Deleting Co-existing & Duplicate Entries in the Training and Validation Sets to Prevent Data Leakage**

In [None]:
import pandas as pd
import os

# --- 1. Defining File Paths ---
DRIVE_FOLDER = "/content/drive/MyDrive/Multimodal Audio Fusion"
TRAIN_CSV_PATH = os.path.join(DRIVE_FOLDER, "Heart Sounds_train.csv")
TEST_CSV_PATH = os.path.join(DRIVE_FOLDER, "Heart Sounds_test.csv")

try:
    # --- 2. Loading the CSV files ---
    print("Loading the metadata files...")
    train_df = pd.read_csv(TRAIN_CSV_PATH)
    test_df = pd.read_csv(TEST_CSV_PATH)

    print("\n--- Initial File Counts ---")
    print(f"Original 'Heart Sounds_train.csv' rows: {len(train_df)}")
    print(f"Original 'Heart Sounds_test.csv' rows: {len(test_df)}")

    # ================================================================
    # Step 1: Cleaning the Test/Validation File (Heart Sounds_test.csv)
    # ================================================================
    print("\n--- Cleaning 'Heart Sounds_test.csv' ---")

    # Identifying duplicates based on the unique audio file path
    duplicates_in_test = test_df.duplicated(subset=['filename']).sum()
    print(f"Found {duplicates_in_test} duplicate rows to remove.")

    # Removing duplicates
    cleaned_test_df = test_df.drop_duplicates(subset=['filename'], keep='first')

    print(f"Cleaned 'Heart Sounds_test.csv' now has {len(cleaned_test_df)} unique rows.")

    # ================================================================
    # Step 2: Cleaning the Train File (Heart Sounds_train.csv)
    # ================================================================
    print("\n--- Cleaning 'Heart Sounds_train.csv' ---")

    # Getting a list of all unique filepaths that are in the clean test set
    test_filepaths = set(cleaned_test_df['filename'])

    # Checking which rows in the training set are also in the test set
    leaked_rows_mask = train_df['filename'].isin(test_filepaths)
    leaked_rows_count = leaked_rows_mask.sum()
    print(f"Found {leaked_rows_count} rows in the training set that also exist in the test set. These will be removed.")

    # Keeping only the rows that are NOT in the test set
    cleaned_train_df = train_df[~leaked_rows_mask]

    print(f"Cleaned 'Heart Sounds_train.csv' now has {len(cleaned_train_df)} unique rows.")

    # ================================================================
    # Step 3: Overwriting the original files with the cleaned versions
    # ================================================================
    print("\nSaving the cleaned files...")

    # Saving the cleaned data back to the original file paths
    cleaned_train_df.to_csv(TRAIN_CSV_PATH, index=False)
    cleaned_test_df.to_csv(TEST_CSV_PATH, index=False)

    print("-" * 50)
    print("SUCCESS: Both metadata files have been cleaned and overwritten.")
    print("-" * 50)

except FileNotFoundError as e:
    print(f"\n--- ERROR ---")
    print(f"Could not find a file. Please double-check the path and filename.")
    print(f"The script was looking for: {e.filename}")
    print("-" * 50)

Loading the metadata files...

--- Initial File Counts ---
Original 'Heart Sounds_train.csv' rows: 2939
Original 'Heart Sounds_test.csv' rows: 602

--- Cleaning 'Heart Sounds_test.csv' ---
Found 301 duplicate rows to remove.
Cleaned 'Heart Sounds_test.csv' now has 301 unique rows.

--- Cleaning 'Heart Sounds_train.csv' ---
Found 0 rows in the training set that also exist in the test set. These will be removed.
Cleaned 'Heart Sounds_train.csv' now has 2939 unique rows.

Saving the cleaned files...
--------------------------------------------------
SUCCESS: Both metadata files have been cleaned and overwritten.
--------------------------------------------------
