In [1]:
import os

In [2]:
import wfdb 
import pandas as pd
import numpy as np
import glob


dat_files=glob.glob('files/*.dat') #Get list of all .dat files in the current folder

meta=[]                      #empty list for storing fields of .dat file
for i in range(len(dat_files)):
    recordname=dat_files[i][:-4]      #file names of chf subjects
    print(recordname)
    sig,fields = wfdb.rdsamp(recordname) # rdsamp() returns the signal as a numpy array  
    record=np.asarray(sig)
    path="files/"+recordname+".csv"
    np.savetxt(path,record,delimiter=",") #Writing the CSV for each record
    print("Files done: %s/%s"% (i+1,len(dat_files)))
    
    dct = {}            #empty dictionary to store record_id and fields
    dct.update({"record_id": recordname})
    dct.update(fields)
    meta.append(dct)

df = pd.DataFrame(meta)

# viewing the dataframe
df

chf01
Files done: 1/15
chf02
Files done: 2/15
chf03
Files done: 3/15
chf04
Files done: 4/15
chf05
Files done: 5/15
chf06
Files done: 6/15
chf07
Files done: 7/15
chf08
Files done: 8/15
chf09
Files done: 9/15
chf10
Files done: 10/15
chf11
Files done: 11/15
chf12
Files done: 12/15
chf13
Files done: 13/15
chf14
Files done: 14/15
chf15
Files done: 15/15


Unnamed: 0,record_id,fs,sig_len,n_sig,base_date,base_time,units,sig_name,comments
0,chf01,250,17994491,2,,10:00:00,"[mV, mV]","[ECG1, ECG2]",[Age: 71 Sex: M NYHA class: III-IV]
1,chf02,250,17793024,2,,13:22:00,"[mV, mV]","[ECG1, ECG2]",[Age: 61 Sex: F NYHA class: III-IV]
2,chf03,250,17998848,2,,08:35:00,"[mV, mV]","[ECG1, ECG2]",[Age: 63 Sex: M NYHA class: III-IV]
3,chf04,250,17998848,2,,14:30:00,"[mV, mV]","[ECG1, ECG2]",[Age: 54 Sex: M NYHA class: III-IV]
4,chf05,250,17802240,2,,08:26:00,"[mV, mV]","[ECG1, ECG2]",[Age: 59 Sex: F NYHA class: III-IV]
5,chf06,250,17789952,2,,14:35:00,"[mV, mV]","[ECG1, ECG2]",[Age: ? Sex: M NYHA class: III-IV]
6,chf07,250,17998848,2,,12:12:00,"[mV, mV]","[ECG1, ECG2]",[Age: 48 Sex: M NYHA class: III-IV]
7,chf08,250,17998848,2,,08:19:00,"[mV, mV]","[ECG1, ECG2]",[Age: 51 Sex: M NYHA class: III-IV]
8,chf09,250,17796577,2,,08:05:00,"[mV, mV]","[ECG1, ECG2]",[Age: 63 Sex: F NYHA class: III-IV]
9,chf10,250,17995711,2,,08:17:00,"[mV, mV]","[ECG1, ECG2]",[Age: 22 Sex: M NYHA class: III-IV]


In [3]:
# We have the dataframe of all the records and their corresponding fields
# The comments column needs to be splitted into further categories
def convert_to_list(row):                          #func to convert the values into list
    comms = [r.split() for r in row]
    comms = [(x[1].strip(), x[3].strip(),x[6].strip()) for x in comms]

    return comms

df["comments"] = df["comments"].apply(convert_to_list)

#creating three new columns for age,sex and NYHA class groups
a,b,c=[],[],[]

for i in range(len(files)):
    a.append([i[0] for i in df['comments'][i]])
df['age']=a    

for i in range(len(files)):
    b.append([i[1] for i in df['comments'][i]])
df['sex']=b

for i in range(len(files)):
    c.append([i[2] for i in df['comments'][i]])
df['NYHA class']=c

df.drop(columns='comments',axis=1,inplace=True)      #dropping the comments column finally

#viewing the dataframe
df

Unnamed: 0,record_id,fs,sig_len,n_sig,base_date,base_time,units,sig_name,age,sex,NYHA class
0,chf01,250,17994491,2,,10:00:00,"[mV, mV]","[ECG1, ECG2]",[71],[M],[III-IV]
1,chf02,250,17793024,2,,13:22:00,"[mV, mV]","[ECG1, ECG2]",[61],[F],[III-IV]
2,chf03,250,17998848,2,,08:35:00,"[mV, mV]","[ECG1, ECG2]",[63],[M],[III-IV]
3,chf04,250,17998848,2,,14:30:00,"[mV, mV]","[ECG1, ECG2]",[54],[M],[III-IV]
4,chf05,250,17802240,2,,08:26:00,"[mV, mV]","[ECG1, ECG2]",[59],[F],[III-IV]
5,chf06,250,17789952,2,,14:35:00,"[mV, mV]","[ECG1, ECG2]",[?],[M],[III-IV]
6,chf07,250,17998848,2,,12:12:00,"[mV, mV]","[ECG1, ECG2]",[48],[M],[III-IV]
7,chf08,250,17998848,2,,08:19:00,"[mV, mV]","[ECG1, ECG2]",[51],[M],[III-IV]
8,chf09,250,17796577,2,,08:05:00,"[mV, mV]","[ECG1, ECG2]",[63],[F],[III-IV]
9,chf10,250,17995711,2,,08:17:00,"[mV, mV]","[ECG1, ECG2]",[22],[M],[III-IV]


Now we have all the fields corresponding to a record except a missing value for age column in the record_id 'chf06'

In [4]:
#saving the datframe into a csv file 
df.to_csv("files/Records_Signals.csv")

In [5]:
ant_files=glob.glob('files/*.ecg') #Get list of all .ecg files in the current folder

ann=[]                      #empty list for storing fields of annotation files 
for i in range(len(ant_files)):
    recordname=ant_files[i][:-4] 
    print(recordname)
    annts = wfdb.rdann(recordname,extension=ant_files[i][-3:]).__dict__ # rdann() returns the annotations of a file.  
    #creating list to store different fields in annotation files
    sample = np.asarray(annts['sample'])
    symbol = np.asarray(annts['symbol'])
    subtype = np.asarray(annts['subtype'])
    chan = np.asarray(annts['chan'])
    num = np.asarray(annts['num'])
    aux_note = np.asarray(annts['aux_note'])
    path="files/"+recordname+"_annotations.csv"
    with open(path,'w') as f:
        for x in zip(sample,symbol,subtype,chan,num,aux_note):
            f.write("{},{},{},{},{},{}\n".format(x[0], x[1], x[2], x[3], x[4], x[5]))
    f.close()
    
    #creating a dictionary to store other fields 
    dct={"record_id":recordname, "extension":annts['extension'], "fs":annts['fs'], "label_store":annts['label_store'], "description":annts['description'], "custom_labels":annts['custom_labels'], "contained_labels":annts['contained_labels'], "ann_len":annts['ann_len']}
    ann.append(dct)
    
    print("Files done: %s/%s"% (i+1,len(ant_files)))
    
df_1=pd.DataFrame(ann)

#saving the datframe to a csv file
df_1.to_csv("files/Records_Annotations.csv")

#viewing the dataframe
df_1

chf01
Files done: 1/15
chf02
Files done: 2/15
chf03
Files done: 3/15
chf04
Files done: 4/15
chf05
Files done: 5/15
chf06
Files done: 6/15
chf07
Files done: 7/15
chf08
Files done: 8/15
chf09
Files done: 9/15
chf10
Files done: 10/15
chf11
Files done: 11/15
chf12
Files done: 12/15
chf13
Files done: 13/15
chf14
Files done: 14/15
chf15
Files done: 15/15


Unnamed: 0,record_id,extension,fs,label_store,description,custom_labels,contained_labels,ann_len
0,chf01,ecg,250,,,,,75548
1,chf02,ecg,250,,,,,114548
2,chf03,ecg,250,,,,,81301
3,chf04,ecg,250,,,,,112366
4,chf05,ecg,250,,,,,119153
5,chf06,ecg,250,,,,,118634
6,chf07,ecg,250,,,,,92584
7,chf08,ecg,250,,,,,90759
8,chf09,ecg,250,,,,,115052
9,chf10,ecg,250,,,,,147305


In [6]:
#We can replicate the same above processes for the normal sinus rhythm database pressent in folder 'normal'.

dat_files=glob.glob('normal/*.dat') #Get list of all .dat files in the current folder

meta=[]                      #empty list for storing fields of .dat file
for i in range(len(dat_files)):
    recordname=dat_files[i][:-4]
    print(recordname)
    sig,fields = wfdb.rdsamp(recordname) # rdsamp() returns the signal as a numpy array  
    record=np.asarray(sig)
    path="normal/"+recordname+".csv"
    np.savetxt(path,record,delimiter=",") #Writing the CSV for each record
    print("Files done: %s/%s"% (i+1,len(dat_files)))
    
    dct = {}                          #empty dictionary to store record_id and fields
    dct.update({"record_id": recordname})
    dct.update(fields)
    meta.append(dct)

df = pd.DataFrame(meta)

# viewing the dataframe
df

16265
Files done: 1/18
16272
Files done: 2/18
16273
Files done: 3/18
16420
Files done: 4/18
16483
Files done: 5/18
16539
Files done: 6/18
16773
Files done: 7/18
16786
Files done: 8/18
16795
Files done: 9/18
17052
Files done: 10/18
17453
Files done: 11/18
18177
Files done: 12/18
18184
Files done: 13/18
19088
Files done: 14/18
19090
Files done: 15/18
19093
Files done: 16/18
19140
Files done: 17/18
19830
Files done: 18/18


Unnamed: 0,record_id,fs,sig_len,n_sig,base_date,base_time,units,sig_name,comments
0,16265,128,11730944,2,,08:04:00,"[mV, mV]","[ECG1, ECG2]",[32 M]
1,16272,128,11520000,2,,10:45:00,"[mV, mV]","[ECG1, ECG2]",[20 F]
2,16273,128,11354112,2,,08:00:00,"[mV, mV]","[ECG1, ECG2]",[28 F]
3,16420,128,11051008,2,,09:55:00,"[mV, mV]","[ECG1, ECG2]",[38 F]
4,16483,128,11960320,2,,09:54:00,"[mV, mV]","[ECG1, ECG2]",[42 M]
5,16539,128,11327488,2,,08:40:00,"[mV, mV]","[ECG1, ECG2]",[35 F]
6,16773,128,11046912,2,,09:50:00,"[mV, mV]","[ECG1, ECG2]",[26 M]
7,16786,128,11284480,2,,11:48:00,"[mV, mV]","[ECG1, ECG2]",[32 F]
8,16795,128,10866688,2,,11:15:00,"[mV, mV]","[ECG1, ECG2]",[20 F]
9,17052,128,10659840,2,,11:06:00,"[mV, mV]","[ECG1, ECG2]",[45 F]


In [8]:
# We have the dataframe of all the records and their corresponding fields
# The comments column needs to be splitted into further categories
def convert_to_list(row):                          #func to convert the values into list
    comms = [r.split() for r in row]
    comms = [(x[0].strip(), x[1].strip()) for x in comms]

    return comms

df["comments"] = df["comments"].apply(convert_to_list)

#creating two new columns for age and sex.
a,b=[],[]

for i in range(len(files)):
    a.append([i[0] for i in df['comments'][i]])
df['age']=a    

for i in range(len(files)):
    b.append([i[1] for i in df['comments'][i]])
df['sex']=b

df.drop(columns='comments',axis=1,inplace=True)      #dropping the comments column finally

#viewing the dataframe
df

Unnamed: 0,record_id,fs,sig_len,n_sig,base_date,base_time,units,sig_name,age,sex
0,16265,128,11730944,2,,08:04:00,"[mV, mV]","[ECG1, ECG2]",[32],[M]
1,16272,128,11520000,2,,10:45:00,"[mV, mV]","[ECG1, ECG2]",[20],[F]
2,16273,128,11354112,2,,08:00:00,"[mV, mV]","[ECG1, ECG2]",[28],[F]
3,16420,128,11051008,2,,09:55:00,"[mV, mV]","[ECG1, ECG2]",[38],[F]
4,16483,128,11960320,2,,09:54:00,"[mV, mV]","[ECG1, ECG2]",[42],[M]
5,16539,128,11327488,2,,08:40:00,"[mV, mV]","[ECG1, ECG2]",[35],[F]
6,16773,128,11046912,2,,09:50:00,"[mV, mV]","[ECG1, ECG2]",[26],[M]
7,16786,128,11284480,2,,11:48:00,"[mV, mV]","[ECG1, ECG2]",[32],[F]
8,16795,128,10866688,2,,11:15:00,"[mV, mV]","[ECG1, ECG2]",[20],[F]
9,17052,128,10659840,2,,11:06:00,"[mV, mV]","[ECG1, ECG2]",[45],[F]


In [11]:
#saving the datframe into a csv file 
df.to_csv("normal/Records_Signals.csv")

In [10]:
ant_files=glob.glob('normal/*.atr') #Get list of all .atr files in the current folder

ann=[]
for i in range(len(ant_files)):
    recordname=ant_files[i][:-4] 
    print(recordname)
    annts = wfdb.rdann(recordname,extension=ant_files[i][-3:]).__dict__ # rdann() returns the annotations of a file.  
    #creating list to store different fields in annotation files
    sample = np.asarray(annts['sample'])
    symbol = np.asarray(annts['symbol'])
    subtype = np.asarray(annts['subtype'])
    chan = np.asarray(annts['chan'])
    num = np.asarray(annts['num'])
    aux_note = np.asarray(annts['aux_note'])
    path="normal/"+recordname+"_annotations.csv"
    with open(path,'w') as f:
        for x in zip(sample,symbol,subtype,chan,num,aux_note):
            f.write("{},{},{},{},{},{}\n".format(x[0], x[1], x[2], x[3], x[4], x[5]))
    f.close()
    
    #creating a dictionary to store other fields 
    dct={"record_id":recordname, "extension":annts['extension'], "fs":annts['fs'], "label_store":annts['label_store'], "description":annts['description'], "custom_labels":annts['custom_labels'], "contained_labels":annts['contained_labels'], "ann_len":annts['ann_len']}
    ann.append(dct)
    
    print("Files done: %s/%s"% (i+1,len(ant_files)))
    
df_1=pd.DataFrame(ann)

#saving the datframe to a csv file
df_1.to_csv("normal/Records_Annotations.csv")

#viewing the dataframe
df_1

16265
Files done: 1/18
16272
Files done: 2/18
16273
Files done: 3/18
16420
Files done: 4/18
16483
Files done: 5/18
16539
Files done: 6/18
16773
Files done: 7/18
16786
Files done: 8/18
16795
Files done: 9/18
17052
Files done: 10/18
17453
Files done: 11/18
18177
Files done: 12/18
18184
Files done: 13/18
19088
Files done: 14/18
19090
Files done: 15/18
19093
Files done: 16/18
19140
Files done: 17/18
19830
Files done: 18/18


Unnamed: 0,record_id,extension,fs,label_store,description,custom_labels,contained_labels,ann_len
0,16265,atr,128,,,,,100955
1,16272,atr,128,,,,,97146
2,16273,atr,128,,,,,90097
3,16420,atr,128,,,,,102436
4,16483,atr,128,,,,,104561
5,16539,atr,128,,,,,108674
6,16773,atr,128,,,,,112897
7,16786,atr,128,,,,,101739
8,16795,atr,128,,,,,87678
9,17052,atr,128,,,,,88002


We have created corresponding csv files for both signals and annotations for heart failure and normal sinus rhythm databses. 