<a href="https://colab.research.google.com/github/Room1097/ECG_Feature_Analysis_DAV_Project/blob/main/DAV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install neurokit2

Collecting neurokit2
  Downloading neurokit2-0.2.10-py2.py3-none-any.whl.metadata (37 kB)
Downloading neurokit2-0.2.10-py2.py3-none-any.whl (693 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m693.1/693.1 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neurokit2
Successfully installed neurokit2-0.2.10


In [6]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, entropy
from scipy.signal import find_peaks, welch
import neurokit2 as nk

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
# Step 1: Read the CSV and flatten the signal
# Define the full path to your CSV file in Google Drive
file_path = '/content/drive/My Drive/a01.csv'
df = pd.read_csv(file_path, header=None, dtype=str)

df = df.replace(r'[^\d\.\-eE]', '', regex=True)

df = df.apply(pd.to_numeric, errors='coerce')

df = df.ffill(axis=1).bfill(axis=1)

ecg_data = df.values.flatten()

# Define sampling frequency
fs = 100  # Hz

In [9]:
# Step 2: Clean ECG and extract features
signals, info = nk.ecg_process(ecg_data, sampling_rate=fs)
rpeaks = info['ECG_R_Peaks']
rr_intervals = np.diff(rpeaks) / fs
hr = 60 / rr_intervals

In [10]:
# Time-domain features
features = {
    "Mean": np.mean(ecg_data),
    "Std": np.std(ecg_data),
    "Var": np.var(ecg_data),
    "Median": np.median(ecg_data),
    "Min": np.min(ecg_data),
    "Max": np.max(ecg_data),
    "Range": np.max(ecg_data) - np.min(ecg_data),
    "Q1": np.percentile(ecg_data, 25),
    "Q3": np.percentile(ecg_data, 75),
    "IQR": np.percentile(ecg_data, 75) - np.percentile(ecg_data, 25),
    "Skewness": skew(ecg_data),
    "Kurtosis": kurtosis(ecg_data),
    "RMS": np.sqrt(np.mean(np.square(ecg_data))),
    "Zero_Crossings": ((ecg_data[:-1] * ecg_data[1:]) < 0).sum(),
    "Signal_Energy": np.sum(ecg_data ** 2),
    "Signal_Entropy": entropy(np.histogram(ecg_data, bins=50, density=True)[0]),
    "Mean_Absolute_Diff": np.mean(np.abs(np.diff(ecg_data))),
    "STD_Absolute_Diff": np.std(np.abs(np.diff(ecg_data))),
    "Symmetry_Index": np.mean(ecg_data[:len(ecg_data)//2]) - np.mean(ecg_data[len(ecg_data)//2:]),
}

In [12]:
# Frequency-domain features using Welch's method
frequencies, power = welch(ecg_data, fs, nperseg=1024)
features.update({
    "Total_Power": np.trapezoid(power, frequencies),
    "VLF_Power": np.trapezoid(power[(frequencies >= 0.003) & (frequencies < 0.04)],
                               frequencies[(frequencies >= 0.003) & (frequencies < 0.04)]),
    "LF_Power": np.trapezoid(power[(frequencies >= 0.04) & (frequencies < 0.15)],
                              frequencies[(frequencies >= 0.04) & (frequencies < 0.15)]),
    "HF_Power": np.trapezoid(power[(frequencies >= 0.15) & (frequencies < 0.4)],
                              frequencies[(frequencies >= 0.15) & (frequencies < 0.4)]),
})


In [13]:
# LF/HF Ratio
lf = features["LF_Power"]
hf = features["HF_Power"]
features["LF_HF_Ratio"] = lf / hf if hf != 0 else np.nan

In [14]:
# HRV features
features.update({
    "Num_Beats": len(rpeaks),
    "Mean_RR": np.mean(rr_intervals),
    "STD_RR": np.std(rr_intervals),
    "Min_RR": np.min(rr_intervals),
    "Max_RR": np.max(rr_intervals),
    "Mean_HR": np.mean(hr),
    "SDNN": np.std(rr_intervals),
    "RMSSD": np.sqrt(np.mean(np.square(np.diff(rr_intervals)))),
    "NN50": np.sum(np.abs(np.diff(rr_intervals)) > 0.05),
    "pNN50": np.sum(np.abs(np.diff(rr_intervals)) > 0.05) / len(rr_intervals) * 100,
})

In [16]:
output_path = "/content/drive/My Drive/extracted_ecg_features.csv"

# Save features to CSV in Google Drive
features_df = pd.DataFrame([features])
features_df.to_csv(output_path, index=False)

print(f"✅ Features extracted and saved to: {output_path}")


✅ Features extracted and saved to: /content/drive/My Drive/extracted_ecg_features.csv
