In [1]:
import wfdb
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import butter, filtfilt
import neurokit2 as nk

In [None]:
# the ecg records 
record_names = ['100','101','102','103','104','105','106','107',
           '108','109','111','112','113','114','115','116',
           '117','118','119','121','122','123','124','200',
           '201','202','203','205','207','208','209','210'] 
sampling_rate = 360  # Hz
cutoff = 0.5  # 0.5 Hz cutoff to remove baseline wander
ecg_signals = []  # List to store ECG signals
filtered_signals = []
# Load the records and store the signals
for record_name in record_names:
    record = wfdb.rdrecord(record_name, pn_dir='mitdb')
    ecg_signals.append(record.p_signal[:, 0])  # Store only the first lead signal

# Function to create a Butterworth high-pass filter
def butter_highpass(cutoff, sampling_rate, order=5):
    nyquist = 0.5 * sampling_rate
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    return b, a

# Apply the high-pass filter
def highpass_filter(data, cutoff, sampling_rate, order=5):
    b, a = butter_highpass(cutoff, sampling_rate, order=order)
    y = filtfilt(b, a, data)
    return y

# Apply the filter to each ECG signal and plot the result for one signal
for idx, ecg_signal in enumerate(ecg_signals):
    filtered_signal = highpass_filter(ecg_signal, cutoff, sampling_rate)
    filtered_signals.append(filtered_signal)  # Save the filtered signal
# Plot the filtered ECG signal
plt.figure(figsize=(10, 4))
plt.plot(filtered_signals[3], label=f'Filtered ECG Signal {record_names[7]}')
plt.xlabel('Sample')
plt.ylabel('Amplitude')
plt.title(f'Filtered ECG Signal from Record {record_names[7]}')
plt.legend()
plt.show()


In [None]:
signal_duration = len(filtered_signal)/sampling_rate  # seconds
# Window parameters (0.5 seconds window, 0.25 seconds step size)
window_size_seconds = 0.5
step_size_seconds = 0.25

# Convert to number of samples
window_size = int(window_size_seconds * sampling_rate)  # 180 samples
step_size = int(step_size_seconds * sampling_rate)      # 90 samples

# Feature extraction function
def extract_features(window):
    features = {}
    features['mean'] = np.mean(window)
    features['max'] = np.max(window)
    features['min'] = np.min(window)
    features['std'] = np.std(window)
    features['energy'] = np.sum(np.square(window))
    features['amplitude_range'] = np.max(window) - np.min(window)
    first_derivative = np.gradient(window)
    second_derivative = np.gradient(first_derivative)
    features['mean_first_derivative'] = np.mean(first_derivative)
    features['mean_second_derivative'] = np.mean(second_derivative)
    
    return features

# Function to split an ECG signal into windows and extract features for each window
def process_ecg_signal(filtered_signal):
    n_samples = len(filtered_signal)
    
    # Detect R-peaks using NeuroKit2
    _, r_peak_info = nk.ecg_peaks(filtered_signal, sampling_rate=sampling_rate)
    r_peak_indices = r_peak_info['ECG_R_Peaks']
    
    # List to store rows (each window's features)
    rows = []
    windows = []  # Store each window separately for later processing
    labels = []  # List to store the labels for R-peak detection
    # Slide over the ECG signal using the window and step size
    for start in range(0, n_samples - window_size + 1, step_size):
        end = start + window_size
        window = filtered_signal[start:end]
        
        windows.append(window)  # Store the window
         # Check if any R-peak index falls within this window
        r_peaks_in_window = [peak for peak in r_peak_indices if start <= peak < end]
        
         # Label the window as 1 if it contains an R-peak, otherwise 0
        if len(r_peaks_in_window) > 0:
            label = 1  # R-peak detected
        else:
            label = 0  # No R-peak detected
        
        # Append the label to the list
        labels.append(label)
        
        # Extract features for this window
        features = extract_features(window)
        
        # Append the features to the rows list
        rows.append(features)
    
    # Convert the list of rows to a DataFrame
    df = pd.DataFrame(rows)
    df['R_peak_label'] = labels  # Add the labels to the DataFrame
    return df, windows, labels
# Function to process multiple ECG signals
def process_multiple_ecg_signals(filtered_signals):
    all_features = []
    all_windows = []
    all_labels = []
    
    for i, filtered_signal in enumerate(filtered_signals):
        
        ecg_df, ecg_windows, ecg_labels = process_ecg_signal(filtered_signal)
        
        # Add a column to identify the signal number ( for tracking)
        ecg_df['signal_id'] = i
        
        # Append the results
        all_features.append(ecg_df)
        all_windows.append(ecg_windows)
        all_labels.append(ecg_labels)
    
    # Concatenate all the features into a single DataFrame
    combined_features_df = pd.concat(all_features, ignore_index=True)
    
    return combined_features_df, all_windows, all_labels


combined_df, all_windows, all_labels = process_multiple_ecg_signals(filtered_signals)


In [None]:
print("salam")
print("salam")

salam
