In [1]:
import os 
import pandas as pd 

import warnings
warnings.filterwarnings("ignore")

In [2]:
folder = '../sepsis3_queries/data'
sepsis3 = pd.read_csv(f'{folder}/sepsis-df-3.csv')
text = pd.read_csv(f'{folder}/text-df-3.csv')

In [3]:
sepsis3.head()

Unnamed: 0,icustay_id,hadm_id,excluded,intime,outtime,dbsource,suspected_infection_time_poe,suspected_infection_time_poe_days,specimen_poe,positiveculture_poe,...,qsofa,qsofa_sysbp_score,qsofa_gcs_score,qsofa_resprate_score,composite_outcome,blood culture,suspicion_poe,abx_poe,sepsis-3,sofa>=2
0,200061,121149,0,2134-01-23 16:38:46,2134-01-25 16:59:14,metavision,2134-01-24 00:30:00,-0.327245,BLOOD CULTURE,0.0,...,1,0.0,0.0,1.0,0,True,True,True,1,1
1,200075,132255,0,2159-09-23 00:13:20,2159-09-25 01:55:17,metavision,2159-09-23 03:33:00,-0.138657,MRSA SCREEN,0.0,...,2,1.0,0.0,1.0,0,True,True,True,1,1
2,200087,124231,0,2196-08-30 11:19:49,2196-09-03 17:40:09,metavision,2196-08-30 12:08:00,-0.033461,MRSA SCREEN,0.0,...,2,1.0,0.0,1.0,1,True,True,True,1,1
3,200116,164386,0,2198-03-19 20:16:11,2198-03-20 18:28:07,metavision,2198-03-19 16:30:00,0.157072,BLOOD CULTURE,1.0,...,2,1.0,0.0,1.0,0,True,True,True,1,1
4,200131,187834,0,2176-10-30 12:05:18,2176-11-07 17:34:37,metavision,2176-10-30 20:51:00,-0.365069,URINE,1.0,...,2,0.0,1.0,1.0,1,True,True,True,1,1


In [4]:
text.head()

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,storetime,category,description,cgid,iserror,text
0,1072532,54610,100003.0,2150-04-17,2150-04-17 15:32:00,,Radiology,LIVER OR GALLBLADDER US (SINGLE ORGAN),,,[**2150-4-17**] 3:32 PM\n LIVER OR GALLBLADDER...
1,567608,54610,100003.0,2150-04-18,2150-04-18 05:30:00,2150-04-18 06:30:16,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g..."
2,567670,54610,100003.0,2150-04-18,2150-04-18 11:53:00,2150-04-18 16:25:04,Nursing,Nursing Transfer Note,17693.0,,"59 year old male with HCV-related cirrhosis, g..."
3,567697,54610,100003.0,2150-04-18,2150-04-18 05:30:00,2150-04-18 18:40:56,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g..."
4,567591,54610,100003.0,2150-04-18,2150-04-18 05:30:00,2150-04-18 05:30:44,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g..."


CHARTDATE records the date at which the note was charted. CHARTDATE will always have a time value of 00:00:00.

CHARTTIME records the date and time at which the note was charted. If both CHARTDATE and CHARTTIME exist, then the date portions will be identical. All records have a CHARTDATE. A subset are missing CHARTTIME. More specifically, notes with a CATEGORY value of ‘Discharge Summary’, ‘ECG’, and ‘Echo’ never have a CHARTTIME, only CHARTDATE. Other categories almost always have both CHARTTIME and CHARTDATE, but there is a small amount of missing data for CHARTTIME (usually less than 0.5% of the total number of notes for that category).

STORETIME records the date and time at which a note was saved into the system. Notes with a CATEGORY value of ‘Discharge Summary’, ‘ECG’, ‘Radiology’, and ‘Echo’ never have a STORETIME. All other notes have a STORETIME.

A ‘1’ in the ISERROR column indicates that a physician has identified this note as an error.



## Exclude errors

In [5]:
text.iserror.value_counts()

1.0    294
Name: iserror, dtype: int64

In [6]:
# exclude errors
text = text[text.iserror != 1]

## 24 hours in the ICU

In [7]:
text.head()

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,storetime,category,description,cgid,iserror,text
0,1072532,54610,100003.0,2150-04-17,2150-04-17 15:32:00,,Radiology,LIVER OR GALLBLADDER US (SINGLE ORGAN),,,[**2150-4-17**] 3:32 PM\n LIVER OR GALLBLADDER...
1,567608,54610,100003.0,2150-04-18,2150-04-18 05:30:00,2150-04-18 06:30:16,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g..."
2,567670,54610,100003.0,2150-04-18,2150-04-18 11:53:00,2150-04-18 16:25:04,Nursing,Nursing Transfer Note,17693.0,,"59 year old male with HCV-related cirrhosis, g..."
3,567697,54610,100003.0,2150-04-18,2150-04-18 05:30:00,2150-04-18 18:40:56,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g..."
4,567591,54610,100003.0,2150-04-18,2150-04-18 05:30:00,2150-04-18 05:30:44,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g..."


In [8]:
### 1 hours intervals based on if the have entries in the charttime 

KEYS = ['hadm_id']

text[['charttime', 'chartdate']] = text[['charttime','chartdate']].apply(pd.to_datetime)


sepsis3[["intime", "outtime"]] = sepsis3[["intime", "outtime"]].apply(pd.to_datetime)

print("Hourly buckets")

#### hourly buckets ####################################
to_hours = lambda x: max(0, x.days*24 + x.seconds // 3600)

# join and add in labs_vital the icu intime and outime, to separate in hourly buckets
sepsis3 = sepsis3.set_index(KEYS)
text = text.set_index(KEYS).join(sepsis3[['intime', 'outtime', 'icustay_id']])
# to hourly buckets
text['hours_in'] = (text['charttime'] - text['intime']).apply(to_hours)

Hourly buckets


In [9]:
text.head()

Unnamed: 0_level_0,row_id,subject_id,chartdate,charttime,storetime,category,description,cgid,iserror,text,intime,outtime,icustay_id,hours_in
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
100003.0,1072532,54610,2150-04-17,2150-04-17 15:32:00,,Radiology,LIVER OR GALLBLADDER US (SINGLE ORGAN),,,[**2150-4-17**] 3:32 PM\n LIVER OR GALLBLADDER...,2150-04-17 15:35:42,2150-04-19 14:12:52,209281,0
100003.0,567608,54610,2150-04-18,2150-04-18 05:30:00,2150-04-18 06:30:16,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",2150-04-17 15:35:42,2150-04-19 14:12:52,209281,13
100003.0,567670,54610,2150-04-18,2150-04-18 11:53:00,2150-04-18 16:25:04,Nursing,Nursing Transfer Note,17693.0,,"59 year old male with HCV-related cirrhosis, g...",2150-04-17 15:35:42,2150-04-19 14:12:52,209281,20
100003.0,567697,54610,2150-04-18,2150-04-18 05:30:00,2150-04-18 18:40:56,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",2150-04-17 15:35:42,2150-04-19 14:12:52,209281,13
100003.0,567591,54610,2150-04-18,2150-04-18 05:30:00,2150-04-18 05:30:44,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",2150-04-17 15:35:42,2150-04-19 14:12:52,209281,13


In [10]:
text.groupby(['subject_id'])['hours_in'].max()

subject_id
165        0
266        0
671      205
5771     291
7310      12
        ... 
99955     61
99957    244
99966     33
99973     61
99982    217
Name: hours_in, Length: 5658, dtype: int64

In [11]:
text[text.subject_id == 671]

Unnamed: 0_level_0,row_id,subject_id,chartdate,charttime,storetime,category,description,cgid,iserror,text,intime,outtime,icustay_id,hours_in
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
126769.0,365239,671,2196-01-01,2196-01-01 15:20:00,2196-01-01 15:20:57,Nutrition,Clinical Nutrition Note,19713.0,,Pt screened per ICU protocol. Patient has been...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,34
126769.0,365137,671,2196-01-01,2196-01-01 06:12:00,2196-01-01 06:12:24,Physician,Intensivist Note,15327.0,,TSICU\n HPI:\n HPI: 47 yo man with h/o B c...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,25
126769.0,365114,671,2196-01-01,2196-01-01 04:10:00,2196-01-01 04:11:39,Nursing,Nursing Progress Note,18887.0,,TITLE:\n HPI: 47 yo man with h/o B cell Lymp...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,23
126769.0,365111,671,2196-01-01,2196-01-01 03:25:00,2196-01-01 03:25:57,Nursing,Nursing Progress Note,18887.0,,TITLE:\n HPI: 47 yo man with h/o B cell Lymp...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,22
126769.0,365101,671,2195-12-31,2195-12-31 22:41:00,2195-12-31 22:41:22,Nursing,Nursing Progress Note,18887.0,,TITLE:\n HPI: 47 yo man with h/o B cell Lymp...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126769.0,3575,671,2196-01-11,NaT,,Discharge summary,Report,,,Admission Date: [**2195-12-31**] ...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,0
126769.0,63271,671,2196-01-11,NaT,,Echo,Report,,,PATIENT/TEST INFORMATION:\nIndication: Endocar...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,0
126769.0,127919,671,2195-12-31,NaT,,ECG,Report,,,Normal sinus rhythm. Axis is 0 degrees. J po...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,0
126769.0,365834,671,2196-01-05,2196-01-05 05:17:00,2196-01-05 05:17:27,Nursing,Nursing Progress Note,18887.0,,"TITLE:\n Diabetes Mellitus (DM), Type I\n ...",2195-12-31 04:45:35,2196-01-07 17:35:50,246119,120


We can see now that the hours_in correspond to the correct hours if the patient had an entry in the charttime. 
If the charttime was NaT this is registered as 0 hours in no matter how many days or hours the patient stayed in the ICU.
So i will slice again based the chartdate and create an additional feature based on days called days_in. 
Hopefully, this will be good for filtering them. 

In [12]:
import pandas as pd


# # Convert string columns to datetime
# df['chartdate'] = pd.to_datetime(df['chartdate'], format="%Y-%m-%d")
# df['intime'] = pd.to_datetime(df['intime'], format="%Y-%m-%d")

to_days = lambda x, y: max(0, (x.date() - y.date()).days)

# Apply the function to create a new column 'days_difference'
text['days_in'] = text.apply(lambda row: to_days(row['chartdate'], row['intime']), axis=1)



In [13]:
text[text.subject_id == 671]

Unnamed: 0_level_0,row_id,subject_id,chartdate,charttime,storetime,category,description,cgid,iserror,text,intime,outtime,icustay_id,hours_in,days_in
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
126769.0,365239,671,2196-01-01,2196-01-01 15:20:00,2196-01-01 15:20:57,Nutrition,Clinical Nutrition Note,19713.0,,Pt screened per ICU protocol. Patient has been...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,34,1
126769.0,365137,671,2196-01-01,2196-01-01 06:12:00,2196-01-01 06:12:24,Physician,Intensivist Note,15327.0,,TSICU\n HPI:\n HPI: 47 yo man with h/o B c...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,25,1
126769.0,365114,671,2196-01-01,2196-01-01 04:10:00,2196-01-01 04:11:39,Nursing,Nursing Progress Note,18887.0,,TITLE:\n HPI: 47 yo man with h/o B cell Lymp...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,23,1
126769.0,365111,671,2196-01-01,2196-01-01 03:25:00,2196-01-01 03:25:57,Nursing,Nursing Progress Note,18887.0,,TITLE:\n HPI: 47 yo man with h/o B cell Lymp...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,22,1
126769.0,365101,671,2195-12-31,2195-12-31 22:41:00,2195-12-31 22:41:22,Nursing,Nursing Progress Note,18887.0,,TITLE:\n HPI: 47 yo man with h/o B cell Lymp...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,17,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126769.0,3575,671,2196-01-11,NaT,,Discharge summary,Report,,,Admission Date: [**2195-12-31**] ...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,0,11
126769.0,63271,671,2196-01-11,NaT,,Echo,Report,,,PATIENT/TEST INFORMATION:\nIndication: Endocar...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,0,11
126769.0,127919,671,2195-12-31,NaT,,ECG,Report,,,Normal sinus rhythm. Axis is 0 degrees. J po...,2195-12-31 04:45:35,2196-01-07 17:35:50,246119,0,0
126769.0,365834,671,2196-01-05,2196-01-05 05:17:00,2196-01-05 05:17:27,Nursing,Nursing Progress Note,18887.0,,"TITLE:\n Diabetes Mellitus (DM), Type I\n ...",2195-12-31 04:45:35,2196-01-07 17:35:50,246119,120,5


## Save

In [14]:
sepsis3.columns

Index(['icustay_id', 'excluded', 'intime', 'outtime', 'dbsource',
       'suspected_infection_time_poe', 'suspected_infection_time_poe_days',
       'specimen_poe', 'positiveculture_poe', 'antibiotic_time_poe',
       'blood_culture_time', 'blood_culture_positive', 'age', 'gender',
       'is_male', 'ethnicity', 'race_white', 'race_black', 'race_hispanic',
       'race_other', 'metastatic_cancer', 'diabetes', 'height', 'weight',
       'bmi', 'first_service', 'hospital_expire_flag', 'thirtyday_expire_flag',
       'icu_los', 'hosp_los', 'mort_icu', 'mort_hosp', 'diagnosis',
       'sepsis_angus', 'sepsis_martin', 'sepsis_explicit',
       'septic_shock_explicit', 'severe_sepsis_explicit', 'sepsis_nqf',
       'sepsis_cdc', 'sepsis_cdc_simple', 'elixhauser_hospital', 'vent',
       'sofa', 'lods', 'sirs', 'qsofa', 'qsofa_sysbp_score', 'qsofa_gcs_score',
       'qsofa_resprate_score', 'composite_outcome', 'blood culture',
       'suspicion_poe', 'abx_poe', 'sepsis-3', 'sofa>=2'],
      d

In [15]:
text = text.join(sepsis3[['hospital_expire_flag', 'thirtyday_expire_flag', 'icu_los', 'hosp_los', 'mort_icu',\
                                          'mort_hosp', 'sepsis_angus']])




In [16]:
text.head()

Unnamed: 0_level_0,row_id,subject_id,chartdate,charttime,storetime,category,description,cgid,iserror,text,...,icustay_id,hours_in,days_in,hospital_expire_flag,thirtyday_expire_flag,icu_los,hosp_los,mort_icu,mort_hosp,sepsis_angus
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100003.0,1072532,54610,2150-04-17,2150-04-17 15:32:00,,Radiology,LIVER OR GALLBLADDER US (SINGLE ORGAN),,,[**2150-4-17**] 3:32 PM\n LIVER OR GALLBLADDER...,...,209281,0,0,0,0,1.9425,4.080556,0,0,0
100003.0,567608,54610,2150-04-18,2150-04-18 05:30:00,2150-04-18 06:30:16,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",...,209281,13,1,0,0,1.9425,4.080556,0,0,0
100003.0,567670,54610,2150-04-18,2150-04-18 11:53:00,2150-04-18 16:25:04,Nursing,Nursing Transfer Note,17693.0,,"59 year old male with HCV-related cirrhosis, g...",...,209281,20,1,0,0,1.9425,4.080556,0,0,0
100003.0,567697,54610,2150-04-18,2150-04-18 05:30:00,2150-04-18 18:40:56,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",...,209281,13,1,0,0,1.9425,4.080556,0,0,0
100003.0,567591,54610,2150-04-18,2150-04-18 05:30:00,2150-04-18 05:30:44,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",...,209281,13,1,0,0,1.9425,4.080556,0,0,0


In [17]:
text = text.drop('row_id', axis=1)
text.sepsis_angus.value_counts()

1    156761
0     77452
Name: sepsis_angus, dtype: int64

In [18]:
KEYS_ALL = ['subject_id', 'icustay_id', 'hadm_id', 'days_in']
text = text.reset_index()
text['hadm_id'] = text['hadm_id'].astype(int)
text.set_index(KEYS_ALL)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,chartdate,charttime,storetime,category,description,cgid,iserror,text,intime,outtime,hours_in,hospital_expire_flag,thirtyday_expire_flag,icu_los,hosp_los,mort_icu,mort_hosp,sepsis_angus
subject_id,icustay_id,hadm_id,days_in,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
54610,209281,100003,0,2150-04-17,2150-04-17 15:32:00,,Radiology,LIVER OR GALLBLADDER US (SINGLE ORGAN),,,[**2150-4-17**] 3:32 PM\n LIVER OR GALLBLADDER...,2150-04-17 15:35:42,2150-04-19 14:12:52,0,0,0,1.9425,4.080556,0,0,0
54610,209281,100003,1,2150-04-18,2150-04-18 05:30:00,2150-04-18 06:30:16,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",2150-04-17 15:35:42,2150-04-19 14:12:52,13,0,0,1.9425,4.080556,0,0,0
54610,209281,100003,1,2150-04-18,2150-04-18 11:53:00,2150-04-18 16:25:04,Nursing,Nursing Transfer Note,17693.0,,"59 year old male with HCV-related cirrhosis, g...",2150-04-17 15:35:42,2150-04-19 14:12:52,20,0,0,1.9425,4.080556,0,0,0
54610,209281,100003,1,2150-04-18,2150-04-18 05:30:00,2150-04-18 18:40:56,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",2150-04-17 15:35:42,2150-04-19 14:12:52,13,0,0,1.9425,4.080556,0,0,0
54610,209281,100003,1,2150-04-18,2150-04-18 05:30:00,2150-04-18 05:30:44,Nursing,Nursing Progress Note,16797.0,,"59 year old male with HCV-related cirrhosis, g...",2150-04-17 15:35:42,2150-04-19 14:12:52,13,0,0,1.9425,4.080556,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55617,240913,199984,2,2191-06-10,NaT,,ECG,Report,,,Normal sinus rhythm. T wave inversion in leads...,2191-06-08 17:19:35,2191-06-10 18:29:57,0,0,0,2.0489,2.861806,0,0,0
55617,240913,199984,0,2191-06-08,NaT,,ECG,Report,,,Sinus rhythm. Anterolateral lead ST-T wave abn...,2191-06-08 17:19:35,2191-06-10 18:29:57,0,0,0,2.0489,2.861806,0,0,0
94911,294585,199992,3,2155-02-14,NaT,,Discharge summary,Report,,,Admission Date: [**2155-2-11**] ...,2155-02-11 17:13:23,2155-02-12 15:29:40,0,0,0,0.9280,2.891667,0,0,1
94911,294585,199992,2,2155-02-13,NaT,,ECG,Report,,,Artifact is present. Sinus rhythm. Late tracin...,2155-02-11 17:13:23,2155-02-12 15:29:40,0,0,0,0.9280,2.891667,0,0,1


In [19]:
text[text.icustay_id == 240913]

Unnamed: 0,hadm_id,subject_id,chartdate,charttime,storetime,category,description,cgid,iserror,text,...,icustay_id,hours_in,days_in,hospital_expire_flag,thirtyday_expire_flag,icu_los,hosp_los,mort_icu,mort_hosp,sepsis_angus
234206,199984,55617,2191-06-11,NaT,,Discharge summary,Report,,,Admission Date: [**2191-6-8**] D...,...,240913,0,3,0,0,2.0489,2.861806,0,0,0
234207,199984,55617,2191-06-09,NaT,,Echo,Report,,,PATIENT/TEST INFORMATION:\nIndication: Left ve...,...,240913,0,1,0,0,2.0489,2.861806,0,0,0
234208,199984,55617,2191-06-10,NaT,,ECG,Report,,,Normal sinus rhythm. T wave inversion in leads...,...,240913,0,2,0,0,2.0489,2.861806,0,0,0
234209,199984,55617,2191-06-08,NaT,,ECG,Report,,,Sinus rhythm. Anterolateral lead ST-T wave abn...,...,240913,0,0,0,0,2.0489,2.861806,0,0,0


In [21]:
folder = 'data_stage_1'
text.to_csv(os.path.join(folder, 'text.csv'),sep=',',index=False)
