In [1]:
#import standard python packages for processing texts
import os, sys, re, glob
import pandas as pd

#import custom scripts for processing LVEF from texts from 
#Wagholikar KB, Fischer CM, Goodson A, Herrick CD, Rees M, 
#Toscano E, MacRae CA, Scirica BM, Desai AS, Murphy SN. 
#Extraction of Ejection Fraction from Echocardiography Notes for 
#Constructing a Cohort of Patients having Heart Failure 
#with reduced Ejection Fraction (HFrEF). 
#J Med Syst. 2018 Sep 25;42(11):209. 
#doi: 10.1007/s10916-018-1066-7. PMID: 30255347; PMCID: PMC6153777.

#https://github.com/waghsk/lvef-paper

from lvef_paper_master.echoNonTabularEF import echoNonTabularEF 
from lvef_paper_master.getSectionHeaders import isLineAHeader, getParas, getEFParaHeads
from lvef_paper_master.isReducedEF import efDataType, isReducedEF

In [2]:
#get txts from directory
txts=glob.glob(os.getcwd()+"/HF_case_disch/*.txt")
txts

['/Users/godefroychery/Desktop/LVEF_NLP/LVEF_ECHO_implementation_with_python/HF_case_disch/51157_119216_4252_Discharge summary_HF_.txt',
 '/Users/godefroychery/Desktop/LVEF_NLP/LVEF_ECHO_implementation_with_python/HF_case_disch/21280_118318_5327_Discharge summary_HF_.txt',
 '/Users/godefroychery/Desktop/LVEF_NLP/LVEF_ECHO_implementation_with_python/HF_case_disch/22289_117597_2421_Discharge summary_HF_.txt',
 '/Users/godefroychery/Desktop/LVEF_NLP/LVEF_ECHO_implementation_with_python/HF_case_disch/24588_176009_3150_Discharge summary_HF_.txt',
 '/Users/godefroychery/Desktop/LVEF_NLP/LVEF_ECHO_implementation_with_python/HF_case_disch/16278_116342_2246_Discharge summary_HF_.txt',
 '/Users/godefroychery/Desktop/LVEF_NLP/LVEF_ECHO_implementation_with_python/HF_case_disch/11043_171810_1714_Discharge summary_HF_.txt',
 '/Users/godefroychery/Desktop/LVEF_NLP/LVEF_ECHO_implementation_with_python/HF_case_disch/30543_105372_369_Discharge summary_HF_.txt',
 '/Users/godefroychery/Desktop/LVEF_NLP/LV

In [3]:
#get headers from csv
headerArr=pd.read_csv(os.getcwd()+"/lvef_paper_master/headers.csv", header=0)
headerArr

Unnamed: 0,Result Report
0,Patient Name:
1,Ordering Prov:
2,Primary Care Physician:
3,Reason For Exam:
4,Result Status:
...,...
63,Left Atrium - Post
64,Pulmonic Valve - Post
65,Interatrial Septum - Post
66,Pericardium - Post


In [5]:
#extract the EFs from texts

with open(os.getcwd()+"/HF_case_disch_EF_extractions_NLP.txt", "w") as file_out:
    
    file_out.write(f"File name\tLVEF string\tLVEF_value\tPat note\n")
    
    for txt_file in txts:
        with open(txt_file, "r") as txt:

            path, name = os.path.split(txt_file)

            txt=txt.read()
            LVEF_str, LVEF_value, pat_type = echoNonTabularEF(txt)
            txt=txt.replace("\n", " ")
            txt=txt.replace("\t", " ")
            
            file_out.write(f"{name}\t{LVEF_str}\t{LVEF_value}\t{txt}\n")