### 12-Lead ECG Singal Information
Outputs:
* ID
* Filename
* LeadCount
* Frequency
* SampleSize
* Unit
* Leads


#### All 12-Lead ECG Signal

In [None]:
import wfdb
import pandas as pd
import os
import numpy as np

def extract_signal_info(file_path):
    try:
        record = wfdb.rdrecord(file_path)  # Read the record

        # Signal Info - Directly accessing record attributes
        signal_info = {
            'ID': record.record_name,  # Record name
            'Filename': record.file_name[0] if record.file_name else np.nan,
            'LeadCount': record.n_sig,  # Number of leads
            'Frequency': record.fs,  # Sampling frequency
            'SampleSize': record.sig_len,  # Length of signal
            'Unit': record.units[0] if record.units else np.nan,
            'Leads': tuple(record.sig_name)  # Store leads as a tuple
        }

        # Convert signal info to DataFrame
        signal_info_df = pd.DataFrame([signal_info])
        return signal_info_df

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return pd.DataFrame()

# Function to process all signal info files in a directory
def process_signal_info_files(directory_path, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    signal_info_all = []

    # Process all .hea files
    for filename in os.listdir(directory_path):
        if filename.endswith('.hea'):
            # Create file path
            file_path = os.path.join(directory_path, filename.replace('.hea', ''))

            # Extract signal info
            signal_info_df = extract_signal_info(file_path)

            if not signal_info_df.empty:  # If not empty, add to the list
                signal_info_all.append(signal_info_df)

    if signal_info_all:
        # Concatenate all signal info into one DataFrame
        all_signal_info = pd.concat(signal_info_all, ignore_index=True)

        # Sort by ID
        all_signal_info.sort_values(by='ID', ascending=True, inplace=True)

        # Create the file path for SignalInfo.csv
        signal_info_path = os.path.join(output_folder, 'SignalInfo_all.csv')

        # Save to CSV
        all_signal_info.to_csv(signal_info_path, index=False)

        print(f"SignalInfo.csv has been saved: {signal_info_path}")
    else:
        print("No signal info data to process.")

# Define the directory and output folder
directory_path = "../01_Database_PyhsioNet"  # Directory where .hea files are located
output_folder = "/Users/dogukankorkut/Library/CloudStorage/OneDrive-ozyegin.edu.tr/Ozyegin_MSc_Thesis/04_Technical_Works/ECG_Datasets/01_Ningbo/02_ECGSignal_Preprocess"  # Folder to save output files

# Process signal information
process_signal_info_files(directory_path, output_folder)

SignalInfo.csv has been saved: /Users/dogukankorkut/Library/CloudStorage/OneDrive-ozyegin.edu.tr/Ozyegin_MSc_Thesis/04_Technical_Works/ECG_Datasets/01_Ningbo/02_ECGSignal_Preprocess/SignalInfo_all.csv


### Explaratory Data Analysis 

In [None]:
import pandas as pd

# Try to load the SignalInfo.csv file
file_path = 'SignalInfo_all.csv'

try:
    # Read the SignalInfo.csv file into a DataFrame
    signal_info_df = pd.read_csv(file_path)

except FileNotFoundError:
    print(f"Error: The file {file_path} was not found. Please check the path and try again.")

signal_info_df.head()

Unnamed: 0,ID,Filename,LeadCount,Frequency,SampleSize,Unit,Leads
0,JS00001,JS00001.mat,12,500,5000,mV,"('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ..."
1,JS00002,JS00002.mat,12,500,5000,mV,"('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ..."
2,JS00004,JS00004.mat,12,500,5000,mV,"('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ..."
3,JS00005,JS00005.mat,12,500,5000,mV,"('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ..."
4,JS00006,JS00006.mat,12,500,5000,mV,"('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', ..."


In [None]:
# Shape of dataset
print("Shape of Dataset:", signal_info_df.shape)

Shape of Dataset: (45152, 7)


In [4]:
# Display the types of each column
column_types = pd.DataFrame(signal_info_df.dtypes, columns=["Type"])

column_types

Unnamed: 0,Type
ID,object
Filename,object
LeadCount,int64
Frequency,int64
SampleSize,int64
Unit,object
Leads,object


In [5]:
isnull_number = []
for i in signal_info_df.columns:
    x = signal_info_df[i].isnull().sum()
    isnull_number.append(x)
    
pd.DataFrame(isnull_number, index = signal_info_df.columns, columns = ["Total Missing Values"])

Unnamed: 0,Total Missing Values
ID,0
Filename,0
LeadCount,0
Frequency,0
SampleSize,0
Unit,0
Leads,0


In [6]:
unique_number = []
for i in signal_info_df.columns:
    x = signal_info_df[i].value_counts().count()
    unique_number.append(x)
    
pd.DataFrame(unique_number, index = signal_info_df.columns, columns = ["Total Unique Values"])

Unnamed: 0,Total Unique Values
ID,45152
Filename,45152
LeadCount,1
Frequency,1
SampleSize,1
Unit,1
Leads,1
