In [None]:
import pandas as pd
import os
import shutil

# File paths
ecg_rhythm_file = 'ECGSignal_ID_rhythm.csv'
source_folder = '../01_Database_PhysioNet'
target_folder = '../01_Database_ECGSignal_Rhythm'

# Read the IDs from ECGSignal_ID_rhythm.csv
df_rhythm = pd.read_csv(ecg_rhythm_file)
rhythm_ids = df_rhythm['ID'].values

# Clean the target folder if it exists, otherwise create it
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# Check the .mat and .hea files in the 01_Database_PhysioNet folder and copy the relevant ones
for filename in os.listdir(source_folder):
    # Extract file ID from filename
    file_id = filename.split('.')[0]  # e.g., J00001.mat -> J00001
    
    if filename.endswith('.mat') or filename.endswith('.hea'):  # Check .mat and .hea files
        if file_id in rhythm_ids:  # If the file ID is in the list
            source_path = os.path.join(source_folder, filename)
            target_path = os.path.join(target_folder, filename)
            shutil.copy(source_path, target_path)  # Copy the file

print("Process completed.")

Process completed.


In [8]:
import os
# Target folder path
target_folder = '../01_Database_ECGSignal_Rhythm'

# Get the count of files in the folder
file_count = len([f for f in os.listdir(target_folder) if os.path.isfile(os.path.join(target_folder, f))])

print(f"Number of files in the 01_Database_PhysioNet_Rhythm folder: {file_count}")

Number of files in the 01_Database_PhysioNet_Rhythm folder: 85604


In [2]:
import matplotlib.pyplot as plt
import scipy.io
import numpy as np
import pandas as pd

ecg_signal = scipy.io.loadmat('../01_Database_ECGSignal_Rhythm/JS00001.mat')
ecg_data = ecg_signal['val']
ecg_data

array([[-254, -254, -254, ...,  -34,   24,    5],
       [ 264,  264,  264, ...,  -68,  -49,  -34],
       [ 517,  517,  517, ...,  -34,  -73,  -39],
       ...,
       [ 810,  810,  810, ..., -205, -200, -171],
       [ 810,  810,  810, ..., -200, -195, -166],
       [ 527,  527,  527, ...,  102,   93,  112]], dtype=int16)

In [3]:
ecg_df = pd.DataFrame(ecg_data).T # Transpose
ecg_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-254,264,517,-5,-386,390,-98,-312,-98,810,810,527
1,-254,264,517,-5,-386,390,-98,-312,-98,810,810,527
2,-254,264,517,-5,-386,390,-98,-312,-98,810,810,527
3,-254,264,517,-5,-386,390,-98,-312,-98,810,810,527
4,-264,244,508,10,-386,376,-83,-259,-63,756,756,517
...,...,...,...,...,...,...,...,...,...,...,...,...
4995,-44,-44,0,44,-24,-24,-29,590,151,-185,-190,122
4996,-34,-63,-29,49,-5,-49,0,620,166,-181,-176,122
4997,-34,-68,-34,54,0,-54,-24,595,137,-205,-200,102
4998,24,-49,-73,15,49,-63,-15,590,132,-200,-195,93


In [4]:
ecg_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   0       5000 non-null   int16
 1   1       5000 non-null   int16
 2   2       5000 non-null   int16
 3   3       5000 non-null   int16
 4   4       5000 non-null   int16
 5   5       5000 non-null   int16
 6   6       5000 non-null   int16
 7   7       5000 non-null   int16
 8   8       5000 non-null   int16
 9   9       5000 non-null   int16
 10  10      5000 non-null   int16
 11  11      5000 non-null   int16
dtypes: int16(12)
memory usage: 117.3 KB


In [6]:
print(f"Shape of ECG DataFrame: {ecg_df.shape}")

n_samples, n_channels = ecg_df.shape
print(f"Number of channels: {n_channels}")
print(f"Number of samples (time points): {n_samples}")

Shape of ECG DataFrame: (5000, 12)
Number of channels: 12
Number of samples (time points): 5000
