#### Rhythm's .MAT and .CSV Data Separator

In [1]:
import scipy.io
import numpy as np
import pandas as pd
import os
import shutil

# File paths
ecg_rhythm_file = '../02_Diagnosis_PreProcess/DiagnosticInfo_rhythm.csv'
source_folder = '../01_Database_PhysioNet'
target_folder = '../01_Database_ECGSignal_Rhythm'

# Read the IDs from ECGSignal_ID_rhythm.csv
df_rhythm = pd.read_csv(ecg_rhythm_file)
rhythm_ids = df_rhythm['ID'].values

# Clean the target folder if it exists, otherwise create it
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# Check the .mat and .hea files in the 01_Database_PhysioNet folder and copy the relevant ones
for filename in os.listdir(source_folder):
    # Extract file ID from filename
    file_id = filename.split('.')[0]  # e.g., J00001.mat -> J00001
    
    if filename.endswith('.mat') or filename.endswith('.hea'):  # Check .mat and .hea files
        if file_id in rhythm_ids:  # If the file ID is in the list
            source_path = os.path.join(source_folder, filename)
            target_path = os.path.join(target_folder, filename)
            shutil.copy(source_path, target_path)  # Copy the file

print("Process completed.")

Process completed.


#### MAT to CSV Converter

In [None]:
import scipy.io
import numpy as np
import pandas as pd
import os

# Folder path
mat_folder = '../01_Database_ECGSignal_Rhythm'

# Header information
headers = ["I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"]

# Function to convert .mat files to .csv
def mat_to_csv(mat_folder):
    # Get all .mat files in the folder
    for file_name in os.listdir(mat_folder):
        if file_name.endswith(".mat"):
            mat_file_path = os.path.join(mat_folder, file_name)
            
            # Load the .mat file
            mat_data = scipy.io.loadmat(mat_file_path)
            
            # Get the ECG data (12x5000 format)
            ecg_data = mat_data.get('val')  # The variable name 'val' may vary depending on the .mat file
            
            if ecg_data is not None:
                ecg_data = ecg_data.T  # Transpose to get the 5000x12 format
                
                # Save as a CSV file
                output_csv = os.path.join(mat_folder, file_name.replace(".mat", ".csv"))
                df = pd.DataFrame(ecg_data, columns=headers)
                df.to_csv(output_csv, index=False)
                print(f"Converted {file_name} to CSV.")
            else:
                print(f"ECGSignal not found in {file_name}.")
                
# Call the function
mat_to_csv(mat_folder)

Converted JS33748.mat to CSV.
Converted JS13498.mat to CSV.
Converted JS34027.mat to CSV.
Converted JS22645.mat to CSV.
Converted JS33990.mat to CSV.
Converted JS35339.mat to CSV.
Converted JS02595.mat to CSV.
Converted JS32456.mat to CSV.
Converted JS03853.mat to CSV.
Converted JS12786.mat to CSV.
Converted JS26423.mat to CSV.
Converted JS29710.mat to CSV.
Converted JS17826.mat to CSV.
Converted JS08906.mat to CSV.
Converted JS36630.mat to CSV.
Converted JS39503.mat to CSV.
Converted JS00382.mat to CSV.
Converted JS20052.mat to CSV.
Converted JS10191.mat to CSV.
Converted JS30241.mat to CSV.
Converted JS01922.mat to CSV.
Converted JS28368.mat to CSV.
Converted JS30527.mat to CSV.
Converted JS37248.mat to CSV.
Converted JS20734.mat to CSV.
Converted JS17198.mat to CSV.
Converted JS36156.mat to CSV.
Converted JS16286.mat to CSV.
Converted JS39265.mat to CSV.
Converted JS26345.mat to CSV.
Converted JS31639.mat to CSV.
Converted JS06095.mat to CSV.
Converted JS29076.mat to CSV.
Converted 

In [4]:
import os
# Target folder path
target_folder = '../01_Database_ECGSignal_Rhythm'

# Get the count of files in the folder
file_count = len([f for f in os.listdir(target_folder) if os.path.isfile(os.path.join(target_folder, f))])

print(f"Number of files in the 01_Database_PhysioNet_Rhythm folder: {file_count}")

Number of files in the 01_Database_PhysioNet_Rhythm folder: 128406


In [7]:
import matplotlib.pyplot as plt
import scipy.io
import numpy as np
import pandas as pd

df_raw = pd.read_csv('../01_Database_ECGSignal_Rhythm/JS00005.csv')
df_raw

Unnamed: 0,I,II,III,aVR,aVL,aVF,V1,V2,V3,V4,V5,V6
0,5,-146,-151,73,78,-151,-273,-386,-381,-356,-220,-176
1,5,-127,-132,63,68,-132,-229,-332,-342,-317,-190,-161
2,-15,-127,-112,73,49,-122,-205,-288,-303,-288,-166,-146
3,-5,-127,-122,68,59,-127,-200,-264,-298,-298,-190,-161
4,-5,-127,-122,68,59,-127,-181,-244,-278,-278,-181,-151
...,...,...,...,...,...,...,...,...,...,...,...,...
4995,-44,-210,-166,127,59,-190,-156,-361,-337,-249,-195,-122
4996,-24,-220,-195,122,83,-210,-146,-342,-327,-239,-195,-122
4997,5,-195,-200,98,102,-200,-146,-337,-327,-229,-195,-137
4998,-5,-161,-156,83,73,-161,-117,-298,-293,-190,-166,-107


In [8]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   I       5000 non-null   int64
 1   II      5000 non-null   int64
 2   III     5000 non-null   int64
 3   aVR     5000 non-null   int64
 4   aVL     5000 non-null   int64
 5   aVF     5000 non-null   int64
 6   V1      5000 non-null   int64
 7   V2      5000 non-null   int64
 8   V3      5000 non-null   int64
 9   V4      5000 non-null   int64
 10  V5      5000 non-null   int64
 11  V6      5000 non-null   int64
dtypes: int64(12)
memory usage: 468.9 KB


In [9]:
print(f"Shape of ECG DataFrame: {df_raw.shape}")

n_samples, n_channels = df_raw.shape
print(f"Number of channels: {n_channels}")
print(f"Number of samples (time points): {n_samples}")

Shape of ECG DataFrame: (5000, 12)
Number of channels: 12
Number of samples (time points): 5000
