In [75]:
import pyedflib
import pandas as pd
from tqdm import tqdm

# Function to read EDF file and convert to DataFrame
def edf_to_dataframe(edf_file):
    # Open the EDF file
    f = pyedflib.EdfReader(edf_file)
    
    # Get the number of signals
    n_signals = f.signals_in_file
    
    # Get the signal labels
    signal_labels = f.getSignalLabels()
    
    # Read the signals data
    signals_data = []
    for i in range(n_signals):
        signals_data.append(f.readSignal(i))
    
    # Close the EDF file
    f.close()
    
    # Create a dictionary with the signal labels as keys and the signal data as values
    data_dict = {signal_labels[i]: signals_data[i] for i in range(n_signals)}
    
    # Convert the dictionary to a pandas DataFrame
    df = pd.DataFrame(data_dict)
    
    return df

# Function to transform EDF data into a new DataFrame with reshaped ECG signal
def transformEdf(df_edf):
    new_df = pd.DataFrame()  # Create an empty DataFrame to store the transformed data
    
    # Loop through each chunk of 7680 rows in the original DataFrame
    for row in tqdm(range(0, df_edf.shape[0], 7680)):
        # Extract the ECG signal from the current chunk and reshape it into a new column
        new_df = pd.concat([new_df, pd.DataFrame({row // 7680: df_edf.loc[row:row+7679, 'ECG'].values})], axis=1)
    
    return new_df

# Function to read stage annotations from a text file and convert them into a DataFrame
def readStage(filename):
    with open(filename) as f:
        data = [line.split(',') for line in f.readlines()]
    df = pd.DataFrame(data)
    df.columns = df.iloc[0].str.strip()  # Set column names based on the first row of data
    df.drop(0, inplace=True)  # Drop the first row (which contains the column names)
    df['Duration'] = df['Duration'].astype(int)  # Convert 'Duration' column to integer type
    return df.drop(df[df['Duration'] < 30].index)  # Remove rows with duration less than 30

# Function to label the transformed EDF data with stage annotations
def labeling(df_trans, df_stage):
    return pd.concat([df_trans.T, pd.DataFrame({"stage": df_stage['Annotation'].values})], axis=1)



# Example usage:
filename = 'SN002_sleepscoring.txt'    
edf_file = "SN002.edf"
df_edf = edf_to_dataframe(edf_file)
df_stage = readStage(filename)
df_trans = transformEdf(df_edf)
df_done = labeling(df_trans, df_stage)
df_done.head()

100%|██████████| 857/857 [00:14<00:00, 60.23it/s] 

           0          1          2          3          4          5  \
0 -11.389334  -8.972305  -5.749599  -8.166629  -2.600137   4.138247   
1  12.927443  16.955825  17.907988  26.257725  22.449073  13.879606   
2  45.154498  42.298009  35.339895  45.740444  41.785306  39.880980   
3  13.366903  14.465553  17.102312  12.634470  16.003662  27.063401   
4 -10.070954 -20.544747  -3.772030   0.988785  -8.752575  -6.994736   

           6          7          8          9  ...       7672       7673  \
0   0.183108   4.797436  10.217441  13.073930  ...  14.905013  13.073930   
1   9.485008  16.003662  17.615015  13.073930  ...  33.362325  29.626917   
2  30.139620  31.750973  27.649348  21.863127  ...   4.650950   8.532845   
3  16.662852  23.035019  15.271229  20.691234  ... -16.223392 -18.347448   
4   0.988785  -3.186084  -1.867704  -5.090410  ... -63.245594 -57.312886   

        7674       7675       7676       7677       7678       7679  \
0  22.668803  18.640421  24.206912  25.159075


