<a href="https://colab.research.google.com/github/MoyoMbongeni/ML/blob/main/Untitled7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Help From Gemini

In [2]:
!pip install wfdb

import numpy as np
import wfdb
import pywt
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Global Configuration
FS = 360
SEGMENT_LEN = 1800  # 5 seconds @ 360Hz
NOISE_TYPES = ['baseline_wander', 'muscle_artifact', 'electrode_motion']

def download_data():
    """Downloads necessary datasets if not present."""
    # Clean signals from MIT-BIH
    wfdb.dl_database('mitdb', dl_dir='data/mitdb', records=['100', '101'])
    # Noisy signals from NSTDB
    wfdb.dl_database('nstdb', dl_dir='data/nstdb', records=['bw', 'ma', 'em'])

def load_and_segment(record_path, label_idx=None):
    """Loads a record and slices it into 5s windows."""
    record = wfdb.rdrecord(record_path)
    signal = record.p_signal[:, 0]  # Use Lead II/first channel

    # Resample if not 360Hz (MIT-BIH and NSTDB are already 360Hz)

    # Normalization (Min-Max 0-1)
    scaler = MinMaxScaler()
    signal = scaler.fit_transform(signal.reshape(-1, 1)).flatten()

    segments = [signal[i:i + SEGMENT_LEN] for i in range(0, len(signal) - SEGMENT_LEN, SEGMENT_LEN)]
    return np.array(segments)

Collecting wfdb
  Downloading wfdb-4.3.0-py3-none-any.whl.metadata (3.8 kB)
Collecting pandas>=2.2.3 (from wfdb)
  Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Downloading wfdb-4.3.0-py3-none-any.whl (163 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/163.8 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m101.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pandas, wfdb
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.2.2
[31mERROR: pip's dependency resolver does not currently take into acco

In [1]:
def extract_features(signal):
    """
    Extracts 15 features for SVM:
    - DWT (db4, Level 9): Energy and Std Dev of last 6 bands (12 features)
    - TERMA: Peak, Mean, Std of moving average difference (3 features)
    """
    # DWT: Level 9 to capture very low-freq Baseline Wander
    coeffs = pywt.wavedec(signal, 'db4', level=9)
    dwt_feats = []
    for c in coeffs[-6:]: # Last 6 bands (approx + 5 details)
        dwt_feats.append(np.sqrt(np.mean(c**2))) # Energy
        dwt_feats.append(np.std(c))              # Std Dev

    # TERMA: Moving Average Difference
    sq_sig = signal**2
    fast_ma = np.convolve(sq_sig, np.ones(18)/18, mode='same')
    slow_ma = np.convolve(sq_sig, np.ones(360)/360, mode='same')
    terma_sig = fast_ma - slow_ma

    terma_feats = [np.max(terma_sig), np.mean(terma_sig), np.std(terma_sig)]

    return np.concatenate([dwt_feats, terma_feats])

In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models

def build_1d_cnn():
    model = models.Sequential([
        # Raw Signal Input (1800, 1)
        layers.Input(shape=(SEGMENT_LEN, 1)),

        layers.Conv1D(32, kernel_size=15, activation='relu'),
        layers.MaxPooling1D(2),

        layers.Conv1D(64, kernel_size=10, activation='relu'),
        layers.MaxPooling1D(2),

        layers.Conv1D(64, kernel_size=5, activation='relu'),
        layers.GlobalAveragePooling1D(), # Efficiency for Colab

        layers.Dense(32, activation='relu'),
        # Multi-label output (Sigmoid)
        layers.Dense(len(NOISE_TYPES), activation='sigmoid')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [3]:
def analyze_signal(raw_segment, cnn_model, svm_model):
    """
    Processes a segment through both systems and ranks severity.
    """
    # 1. CNN Prediction (Direct)
    cnn_input = raw_segment.reshape(1, SEGMENT_LEN, 1)
    cnn_probs = cnn_model.predict(cnn_input, verbose=0)[0]

    # 2. Mapping to output format
    detected = {}
    severity = {}

    for i, name in enumerate(NOISE_TYPES):
        prob = cnn_probs[i]
        is_detected = prob > 0.5
        detected[name] = bool(is_detected)
        if is_detected:
            # Using probability as a proxy for severity %
            severity[name] = round(prob * 100, 1)

    # 3. Quality Assessment
    if not severity:
        dominant = 'None'
        quality = 'excellent'
    else:
        dominant = max(severity, key=severity.get)
        max_sev = severity[dominant]
        if max_sev < 40: quality = 'high'
        elif max_sev < 75: quality = 'moderate'
        else: quality = 'low'

    return {
        'detected_noises': detected,
        'severity_scores': severity,
        'dominant_noise': dominant,
        'overall_quality': quality
    }