In [1]:
import wfdb
import numpy as np

# List of MIT-BIH record numbers (example records)
records = [100, 101, 102, 103, 104]

# Load signals and annotations
signals = []
labels = []

for record in records:
    record_path = f'{record}'
    signal, fields = wfdb.rdsamp(record_path, channels=[0])  # Use Lead II (MLII)
    annotation = wfdb.rdann(record_path, 'atr')
    
    signals.append(signal.flatten())
    labels.extend(annotation.symbol)

In [2]:
from scipy.signal import butter, filtfilt

def bandpass_filter(signal, fs=360, lowcut=0.5, highcut=40):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(4, [low, high], btype='band')
    filtered = filtfilt(b, a, signal)
    return filtered

# Normalize signal
def normalize(signal):
    return (signal - np.min(signal)) / (np.max(signal) - np.min(signal))

# Apply preprocessing
filtered_signals = [bandpass_filter(sig) for sig in signals]
normalized_signals = [normalize(sig) for sig in filtered_signals]

In [3]:
import neurokit2 as nk

r_peaks = []
for sig in normalized_signals:
    _, rpeaks = nk.ecg_peaks(sig, sampling_rate=360)
    r_peaks.append(rpeaks['ECG_R_Peaks'])

In [4]:
def extract_features(r_peaks, fs=360):
    rr_intervals = np.diff(r_peaks) / fs * 1000  # Convert to milliseconds
    heart_rate = 60 / (np.mean(rr_intervals) / 1000)  # BPM
    hrv = np.std(rr_intervals)
    return rr_intervals, heart_rate, hrv

features = []
for peaks in r_peaks:
    rr, hr, hrv = extract_features(peaks)
    features.append([np.mean(rr), hr, hrv])

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Convert labels to binary (e.g., Normal=0, Abnormal=1)
y = np.array([0 if label in ['N', 'L', 'R', 'e', 'j'] else 1 for label in labels])

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2)

# Train SVM
clf = SVC(kernel='rbf')
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

ValueError: Found input variables with inconsistent numbers of samples: [5, 10742]

In [None]:
signals, info = nk.ecg_process(normalized_signals[0], sampling_rate=360)
nk.ecg_plot(signals, info)