## CWT

In [2]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy.signal import cwt, morlet
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report


In [7]:
# Define paths
base_dir = "Data"
class_labels = ['AS', 'MR', 'MS', 'MVP', 'N']

In [8]:
# CWT Feature Extraction (using the magnitude of the complex values)
def extract_cwt_features(file_path, sampling_rate=4000, widths=np.arange(1, 31)):
    # Load the audio file
    signal, sr = librosa.load(file_path, sr=sampling_rate)
    
    # Normalize the signal
    signal = signal / np.max(np.abs(signal))
    
    # Perform Continuous Wavelet Transform using the Morlet wavelet
    coefficients = cwt(signal, morlet, widths)
    
    # Use the magnitude (absolute value) of the complex CWT coefficients
    coefficients_magnitude = np.abs(coefficients)
    
    # Extract statistical features from the magnitude of CWT coefficients (e.g., mean, variance, energy, entropy)
    features = []
    
    for scale in coefficients_magnitude:
        # Energy of the scale
        scale_energy = np.sum(scale**2)
        # Entropy of the scale
        scale_entropy = -np.sum(scale * np.log(scale + 1e-10))
        
        # Append both energy and entropy as features
        features.append([scale_energy, scale_entropy])
    
    # Flatten the feature array
    features = np.array(features).flatten()
    
    return features



In [12]:
# Process the dataset using CWT
def process_dataset(base_dir, class_labels):
    feature_list = []
    label_list = []
    
    for label in class_labels:
        folder_path = os.path.join(base_dir, label)
        
        # Iterate over all wav files in the folder
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.wav'):
                file_path = os.path.join(folder_path, file_name)
                # print(f"Processing: {file_path}")
                
                # Extract CWT features
                features = extract_cwt_features(file_path)
                
                # Append features and corresponding label
                feature_list.append(features)
                label_list.append(label)
    
    # Convert to numpy arrays
    X = np.array(feature_list)
    y = np.array(label_list)
    
    # Standardize features (ensure all data is real)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    return X, y


In [13]:
# 1. Plot the raw heart sound waveform with a smaller figure size
def plot_waveform(file_path, heart_sound_type, sampling_rate=4000):
    signal, sr = librosa.load(file_path, sr=sampling_rate)
    plt.figure(figsize=(6, 3))  # Smaller figure size
    librosa.display.waveshow(signal, sr=sr)
    plt.title(f'{heart_sound_type} - Heart Sound Waveform')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.tight_layout()
    plt.show()

# 2. Plot CWT coefficients (subbands) using subplots and smaller figure size
def plot_cwt_subbands(file_path, heart_sound_type, sampling_rate=4000, widths=np.arange(1, 31)):
    signal, sr = librosa.load(file_path, sr=sampling_rate)
    
    # Perform Continuous Wavelet Transform using the Morlet wavelet
    coefficients = cwt(signal, morlet, widths)
    
    # Plot each scale (sub-band)
    fig, axs = plt.subplots(len(widths), 1, figsize=(10, 17))  # Adjust size for subplots
    for i, scale in enumerate(coefficients):
        axs[i].plot(scale)
        axs[i].set_title(f'{heart_sound_type} - CWT Sub-band - Scale {i + 1}')
    
    plt.tight_layout()  # Adjust layout to prevent overlap
    plt.show()


In [15]:
# Process the dataset
X, y = process_dataset(base_dir, class_labels)


# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict on test data
y_pred = clf.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

          AS       0.91      0.91      0.91        33
          MR       0.93      0.88      0.90        48
          MS       0.92      0.89      0.90        37
         MVP       0.86      0.97      0.91        38
           N       0.95      0.93      0.94        44

    accuracy                           0.92       200
   macro avg       0.91      0.92      0.91       200
weighted avg       0.92      0.92      0.91       200

