In [1]:
import datetime
import os
import random
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.io import loadmat
from scipy import signal
from scipy.signal import medfilt
import pywt
from pywt import wavedec

In [2]:
DATASET_DIR = 'data\WFDBRecords'
print('DATASET DIR ::',DATASET_DIR)

DATASET DIR :: data\WFDBRecords


In [3]:
g_DATA = '.mat'
g_HEAD = '.hea'
g_leads = ['I','II','III','aVR','aVL','aVF','V1','V2','V3','V4','V5','V6' ]
BASIC_SRATE = 500 #Hz
print('Basic sampling rate(Hz):',BASIC_SRATE)

Basic sampling rate(Hz): 500


In [4]:
for root, dirs, files in os.walk('.'):
    for filename in files:
        if filename == 'RECORDS':
            os.rename(os.path.join(root, filename), os.path.join(root, 'RECORDS.txt'))


In [5]:
rec_count = 0 
rec_count_expected = 0  # Initialize with 0, will be calculated below
rec_name_sep = ','
rec_file = open('RECORDS.txt', 'w')
# Traverse through all subfolders and files
for root, dirs, files in os.walk(DATASET_DIR):
    for f in files:
        if f.endswith(g_HEAD):
            rec_name = os.path.splitext(f)[0]  # Extract filename without extension
            rec_file.write(rec_name_sep + rec_name)
            rec_count += 1

    # Calculate the expected count by summing up the number of files with the desired extension
    rec_count_expected += len([f for f in files if f.endswith(g_HEAD)])

rec_file.seek(0)  # Move the file pointer to the beginning of the file
rec_file.write(str(rec_count_expected))  # Write the expected count to the beginning of the file
rec_file.close()

assert rec_count_expected == rec_count  # Assert the count equality
print(rec_count)

45152


In [6]:
load_txt = np.loadtxt('./RECORDS.txt', delimiter=',', dtype='str')
rec_count = int(load_txt[0])
rec_list = load_txt[1:]
print(rec_count,len(rec_list))

45152001 45151


In [7]:
def denoise_signal(X, dwt_transform, dlevels, cutoff_low, cutoff_high):
    coeffs = wavedec(X, dwt_transform, level=dlevels)   # wavelet transform 'bior4.4'
    # scale 0 to cutoff_low 
    for ca in range(0,cutoff_low):
        coeffs[ca]=np.multiply(coeffs[ca],[0.0])
    # scale cutoff_high to end
    for ca in range(cutoff_high, len(coeffs)):
        coeffs[ca]=np.multiply(coeffs[ca],[0.0])
    Y = pywt.waverec(coeffs, dwt_transform) # inverse wavelet transform
    return Y  

def get_median_filter_width(sampling_rate, duration):
    res = int( sampling_rate*duration )
    res += ((res%2) - 1) # needs to be an odd number
    return res
# baseline fitting by filtering
# === Define Filtering Params for Baseline fitting Leads======================
ms_flt_array = [0.2,0.6]    #<-- length of baseline fitting filters (in seconds)
mfa = np.zeros(len(ms_flt_array), dtype='int')
for i in range(0, len(ms_flt_array)):
    mfa[i] = get_median_filter_width(BASIC_SRATE,ms_flt_array[i])

def filter_signal(X):
    global mfa
    X0 = X  #read orignal signal
    for mi in range(0,len(mfa)):
        X0 = medfilt(X0,mfa[mi]) # apply median filter one by one on top of each other
    X0 = np.subtract(X,X0)  # finally subtract from orignal signal
    return X0

In [8]:
rec_count = 0 
rec_count_expected = 0  # Initialize with 0, will be calculated below
rec_name_sep = ','
rec_file = open('RECORDS.txt', 'w')
# Traverse through all subfolders and files
for root, dirs, files in os.walk(DATASET_DIR):
    for f in files:
        if f.endswith(g_HEAD):
            rec_name = os.path.splitext(f)[0]  # Extract filename without extension
            rec_file.write(rec_name_sep + rec_name)
            rec_count += 1

    # Calculate the expected count by summing up the number of files with the desired extension
    rec_count_expected += len([f for f in files if f.endswith(g_HEAD)])

rec_file.seek(0)  # Move the file pointer to the beginning of the file
rec_file.write(str(rec_count_expected))  # Write the expected count to the beginning of the file
rec_file.close()

assert rec_count_expected == rec_count  # Assert the count equality
print(rec_count)

45152


In [9]:
import os
import scipy.io as sio

# Define the directory containing the original .mat files
INPUT_DIR = "data"

# Define the directory to save the modified .mat files
OUTPUT_DIR = "denoised_data"

# Define the list of ECG leads
g_leads = ['I','II','III','aVR','aVL','aVF','V1','V2','V3','V4','V5','V6']

# Create the output directory if it does not exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Gather all relevant records
rec_list = []
for root, dirs, files in os.walk(INPUT_DIR):
    for f in files:
        if f.endswith('.mat'):
            seg_name = os.path.splitext(f)[0]
            rec_list.append((seg_name, root))

# Select each record, modify its contents, and save it
for rec_name, rec_dir in rec_list:
    # Load the original .mat file
    file_path = os.path.join(rec_dir, f"{rec_name}.mat")
    try:
        original_data = sio.loadmat(file_path)
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        continue
    
    # Modify the values of each lead's signal data
    for i in range(0, 12):
        _key = g_leads[i]
        original_data['val'][i, :] = filter_signal(denoise_signal(original_data['val'][i, :], 'bior4.4', 9, 1, 7))
    
    # Define the file name and path for the modified file
    modified_file_path = os.path.join(OUTPUT_DIR, f"{rec_name}.mat")
    
    # Save the modified data to a new .mat file
    sio.savemat(modified_file_path, original_data)
    
    print(f"Modified data for record '{rec_name}' saved to '{modified_file_path}'")


Modified data for record 'JS00001' saved to 'denoised_data\JS00001.mat'
Modified data for record 'JS00002' saved to 'denoised_data\JS00002.mat'
Modified data for record 'JS00004' saved to 'denoised_data\JS00004.mat'
Modified data for record 'JS00005' saved to 'denoised_data\JS00005.mat'
Modified data for record 'JS00006' saved to 'denoised_data\JS00006.mat'
Modified data for record 'JS00007' saved to 'denoised_data\JS00007.mat'
Modified data for record 'JS00008' saved to 'denoised_data\JS00008.mat'
Modified data for record 'JS00009' saved to 'denoised_data\JS00009.mat'
Modified data for record 'JS00010' saved to 'denoised_data\JS00010.mat'
Modified data for record 'JS00011' saved to 'denoised_data\JS00011.mat'
Modified data for record 'JS00012' saved to 'denoised_data\JS00012.mat'
Modified data for record 'JS00013' saved to 'denoised_data\JS00013.mat'
Modified data for record 'JS00014' saved to 'denoised_data\JS00014.mat'
Modified data for record 'JS00015' saved to 'denoised_data\JS000

In [10]:
import os
import shutil

def copy_hea_files(data_dir, destination_dir):
    # Create the destination directory if it doesn't exist
    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)

    # Recursively search for .hea files in subdirectories of data_dir
    for root, dirs, files in os.walk(data_dir):
        for filename in files:
            if filename.endswith('.hea'):
                file_path = os.path.join(root, filename)
                # Copy the .hea file to the destination directory
                shutil.copy(file_path, destination_dir)

# Provide the directory where the data is located and the destination directory
data_dir = "data\WFDBRecords"
destination_dir = "denoised_data"

copy_hea_files(data_dir, destination_dir)