In [None]:
#################################################################################################################
## Prototype for reading hl7, parsing required fields, analyzing unstructured reports to create a 
## summary document for coders to utilize and assist with claims processing
#
## Basic components are; HL7 Input and parsing, data prep, analysis and summary report generation.
## Assumptions: Using HL7 v2.3 ORU sample from internet site as a basis format for fictitious patient encounters
##              Sample input file: RADORUSample.hl7
##              Also using RADIOLOGY CPT codes for various radiology modalities from an internet site.
#################################################################################################################

In [13]:
#### Dependencies
## Libraries
import pandas as pd

## Proprietary Dictionaries and Functions
from sam_2018_cpt_rad import * # 2018 RAD CPT codes and search functions

In [14]:
### INPUT HL7 & Parse required fields to dataframe
## Read all patient ORU data messages. Assume each Patient encounter message has 1 segment per row in hl7 file.
## Segments are; MSH, PID, PV1, OBR, however handle multiple sequential OBX segments in next logic section
column_names = [i for i in range(0, 50)]
orm_df = pd.read_csv('RADORUSample.hl7', delimiter='|', header=None, names=column_names, dtype='str')

## Create dataframe for each segment type
msh_df = orm_df.loc[orm_df[0]=='MSH']
pid_df = orm_df.loc[orm_df[0]=='PID']
pv1_df = orm_df.loc[orm_df[0]=='PV1']
obr_df = orm_df.loc[orm_df[0]=='OBR']
#
# 1. Filter out only required fields from segments and store in new segment dataframes
# 2. Transpose (.T) so that row of segment info is now in column form
# 3. reset_index which are now column headers due to Transpose
#
claim_msh_df = pd.DataFrame(data=[msh_df[5], msh_df[6], msh_df[8], msh_df[9], msh_df[10], msh_df[11]],
                            index=['RECFAC','MSGDATETIME','TYPE','CNTRLID','PROCID','VERID']).T.reset_index(drop=True)
claim_pid_df = pd.DataFrame(data=[pid_df[3], pid_df[5]], 
                            index=['ALTPATID','PATNAME']).T.reset_index(drop=True)
claim_pv1_df = pd.DataFrame(data=[pv1_df[7]],
                            index=['REFDOCTOR']).T.reset_index(drop=True)
claim_obr_df = pd.DataFrame(data=[obr_df[1], obr_df[2],obr_df[3], obr_df[4],obr_df[7],obr_df[14], obr_df[16], obr_df[22], obr_df[23]],
                            index=['PLACERORDNUM','FILLERORDNUM','USID','PRIORITYOBR','OBSRVENDDATETIME', 'SPECSOURCE',
                                   'ORDCALLPHONE','CHRGTOPRACTICE','PARENTRESULT']).T.reset_index(drop=True)
print(claim_msh_df)
print(claim_pid_df)
print(claim_pv1_df)
print(claim_obr_df)

        RECFAC     MSGDATETIME     TYPE           CNTRLID PROCID VERID
0  accountID01  20110126163020  ORU^R01  MessageControlID      P   2.3
1  accountID02  20090725163020  ORU^R01  MessageControlID      P   2.3
2  accountID03  20110616163020  ORU^R01  MessageControlID      P   2.3
     ALTPATID           PATNAME
0  1234567890      SMITH^JOHN^M
1  2234567890   BEAVERS^ETHEL^A
2  3234567890  SWANSON^RONALD^U
                                           REFDOCTOR
0  AttendingPhysicianID^AttendingPhysicianLastNam...
1  AttendingPhysicianID^AttendingPhysicianLastNam...
2  AttendingPhysicianID^AttendingPhysicianLastNam...
  PLACERORDNUM FILLERORDNUM          USID  \
0            1   Placer1234  Filler156789   
1            1   Placer2234  Filler256789   
2            1   Placer3234  Filler356789   

                                         PRIORITYOBR      OBSRVENDDATETIME  \
0     12345^MRI ABDOMEN WITH CONTRAST^Imaging Center        20110101152535   
1  222222^MRI BRAIN WITH AND WITHOUT CO

In [15]:
### Data scrub and prep for analytics
## Data Scrub requirements TBD

## Create final aggregated ORM dataframe based on individual claims_<segment>_df by joining each segment df together
#orm_claim_df = pd.DataFrame(data=claim_msh_df)
#orm_claim_df = orm_claim_df.join(claim_pid_df,lsuffix='_MSH', rsuffix='_PID')
#orm_claim_df = orm_claim_df.join(claim_pv1_df,lsuffix='_PID', rsuffix='_PV1')
#orm_claim_df = orm_claim_df.join(claim_obr_df,lsuffix='_PV1', rsuffix='_OBR')

# Switch from using a .join to .concat cause it seems simpler.
new_orm_claim_df = pd.concat([claim_msh_df,claim_pid_df,claim_pv1_df,claim_obr_df], sort=False, axis=1)
new_orm_claim_df.head()

Unnamed: 0,RECFAC,MSGDATETIME,TYPE,CNTRLID,PROCID,VERID,ALTPATID,PATNAME,REFDOCTOR,PLACERORDNUM,FILLERORDNUM,USID,PRIORITYOBR,OBSRVENDDATETIME,SPECSOURCE,ORDCALLPHONE,CHRGTOPRACTICE,PARENTRESULT
0,accountID01,20110126163020,ORU^R01,MessageControlID,P,2.3,1234567890,SMITH^JOHN^M,AttendingPhysicianID^AttendingPhysicianLastNam...,1,Placer1234,Filler156789,12345^MRI ABDOMEN WITH CONTRAST^Imaging Center,20110101152535,SpecimenReceivedDate/Time,OrderingProviderID^OrderingProviderLastName^Or...,ResultReportedDate/Time,
1,accountID02,20090725163020,ORU^R01,MessageControlID,P,2.3,2234567890,BEAVERS^ETHEL^A,AttendingPhysicianID^AttendingPhysicianLastNam...,1,Placer2234,Filler256789,222222^MRI BRAIN WITH AND WITHOUT CONTRAST^Ima...,ObservationDate/Time,SpecimenReceivedDate/Time,OrderingProviderID^OrderingProviderLastName^Or...,ResultReportedDate/Time,
2,accountID03,20110616163020,ORU^R01,MessageControlID,P,2.3,3234567890,SWANSON^RONALD^U,AttendingPhysicianID^AttendingPhysicianLastNam...,1,Placer3234,Filler356789,333333^LEFT FOOT XRAY^IMAGING CENTER,20110602163020,20110602163020,OrderingProviderID^OrderingProviderLastName^Or...,ResultReportedDate/Time,


In [16]:
### CREATE RADIOLOGY REPORT:
### For each patient encounter aggregate all the appropriate OBX segments that make up the patients unstructured
### Radiology Report. One report for each patient encounter message. Files will be output to current directory with
### following name convention format:  "PAT<MRN number>_<Encounter date>_RADRPT.txt"
#
# Parse out all OBX segments for all patients
obx_df = orm_df.loc[orm_df[0]=='OBX']

# Transform all OBX segments into a row per patient  
claim_obx_df = pd.DataFrame(data=[obx_df[1], obx_df[2], obx_df[3], obx_df[5], obx_df[11]],
                            index=['VALTYPE','OBSRVID','SUBID', 'OBSRVVAL','LASTOBSRV']).T.reset_index(drop=True)

## With OBX segments create a radiology report document for each patient by aggregating OBX segments for patient 
## NOTE: The OBX segments are used to contain an unstructured text report. One OBX segment per line of the report. 
##  The number of OBX segments will vary (1 to many)
#
# Patient totals from PID df and total OBX segments from OBX df
total_patients = len(claim_pid_df)
total_obx_segments = len (claim_obx_df)

# Loop thru all patients and create a new RAD report file for each one
patient_index = 0
obx_index = 0 
while (patient_index < total_patients):         
    report_filename = "PAT" + claim_pid_df.iloc[patient_index,0] + "_" + claim_msh_df.iloc[patient_index,1] + "_RADRPT.txt"
    f = open(report_filename, "x")
    print("Created report filename for this patient ==>", report_filename)

    #   Parse OBR and write to file
    claim_obr_df.iloc[patient_index,4]
    f.write(claim_obr_df.iloc[patient_index,4] + '\n')
    
    #   Parse 1st OBX and write unstructured text to file
    f.write(str(claim_obx_df.iloc[obx_index,3]) + '\n')
    
    #   loop through any remaining OBX segments for this patient
    obx_index = obx_index + 1
    obxseg_done = False
    while (obx_index < total_obx_segments) and (obxseg_done == False) :
        if claim_obx_df.iloc[obx_index,0] != '1' :
            # print remaining OBX segments, 1 line/OBX segment to rad report    
            f.write(str(claim_obx_df.iloc[obx_index,3]) + '\n')
            obx_index = obx_index + 1
        else : 
            obxseg_done = True
        
    # Increment patient index counter            
    patient_index = patient_index + 1

    f.close() 
    print("closed this patient report file")
    
print ("PROCESSED ALL PATIENTS RAD REPORTS")

Created report filename for this patient ==> PAT1234567890_20110126163020_RADRPT.txt
closed this patient report file
Created report filename for this patient ==> PAT2234567890_20090725163020_RADRPT.txt
closed this patient report file
Created report filename for this patient ==> PAT3234567890_20110616163020_RADRPT.txt
closed this patient report file
PROCESSED ALL PATIENTS RAD REPORTS


In [None]:
### Analyze unstructured radiology reports using NLP techniques
### See NLP_Play_CPT_MRI.ipynb

In [None]:
### Create a summary document for coders
## This will display metrics about the unstructured radiology and make recommendations on coding based on analysis


In [5]:
#########################################################
## Test CT dictionary and its functions (sam_2018_cpt_rad)
#########################################################
print(RAD_CPT_CT_dict)
print("72193 is ====> ", RAD_CPT_CT_dict.get('72193'), " <====")
print("Key for CT LUMBAR SPINE w/o & with contrast is ===>", find_first_key_exact(RAD_CPT_CT_dict, 'CT LUMBAR SPINE w/o & with contrast'))
print("Possible CT CPT codes with values containing, abdOmen ====>", find_all_keys_containing(RAD_CPT_CT_dict, 'abdOmen'))

{'70450': 'CT BRAIN without contrast', '70460': 'CT BRAIN with contrast', '70470': 'CT BRAIN w/o & with contrast', '70480': 'CT ORBIT/SKULL without contrast', '70481': 'CT ORBITS/SKULL with contrast', '70482': 'CT ORBIT/SKULL w/o & with contrast', '70486': 'CT MAXILLOFACIAL without contrast', '70488': 'CT FACE w/o & with contrast', '70490': 'CT NECK SOFT TISSUE without contrast', '70491': 'CT NECT SOFT TISSUE with contrast', '70492': 'CT NECK SOFT TISSUE w/o & with contrast', '70496': 'CT ANGIO HEAD with and/or w/o contrast', '70498': 'CT ANGIO/NECK with and/or w/o contrast', '71250': 'CT CHEST without contrast', '71260': 'CT CHEST with contrast', '71270': 'CT CHEST w/o & with contrast', '71275': 'CT ANGIOGRAPHY/CHEST with and/or w/o contrast', '72125': 'CT CERVICAL SPINE without contrast', '72126': 'CT CERVICAL SPINE with contrast', '72127': 'CT CERVICAL SPINE w/o & with contrast', '72128': 'CT THORACIC SPINE without contrast', '72129': 'CT THORACIC SPINE with contrast', '72130': 'CT 

In [6]:
#########################################################
## Test FLUORO dictionary and its functions (sam_2018_cpt_rad)
#########################################################
print(RAD_CPT_FLUORO_dict)
print("74280 is ====> ", RAD_CPT_FLUORO_dict.get('74280'), " <====")
print("Key for FLUORO BARIUM ENEMA W/AIR CONT is ===>", find_first_key_exact(RAD_CPT_FLUORO_dict, 'FLUORO BARIUM ENEMA W/AIR CONT'))
print("Possible FLUORO CPT codes with values containing, gi ====>", find_all_keys_containing(RAD_CPT_FLUORO_dict, 'gi'))

{'74210': 'FLUORO Upper Esophagram', '74220': 'FLUORO ESOPHAGUS EXAM', '74241': 'FLUORO UPPER GI SERIES', '74247': 'FLUORO UPPER GI TRACT WITH AIR', '74249': 'FLUORO UGI & SB WITH AIR', '74250': 'FLUORO SMALL BOWEL SERIES', '74270': 'FLUORO BARIUM ENEMA', '74280': 'FLUORO BARIUM ENEMA W/AIR CONT', '74400': 'FLUORO IVP'}
74280 is ====>  FLUORO BARIUM ENEMA W/AIR CONT  <====
Key for FLUORO BARIUM ENEMA W/AIR CONT is ===> 74280
Possible FLUORO CPT codes with values containing, gi ====> set()


In [7]:
#########################################################
## Test MAMO dictionary and its functions (sam_2018_cpt_rad)
#########################################################
print(RAD_CPT_MAMO_dict)
print("77066 is ====> ", RAD_CPT_MAMO_dict.get('77066'), " <====")
print("Key for MAMMOGRAPHY 3D DIGITAL SCREENING is ===>", find_first_key_exact(RAD_CPT_MAMO_dict, 'MAMMOGRAPHY 3D DIGITAL SCREENING'))
print("Possible XRAY CPT codes with values containing, lateral ====>", find_all_keys_containing(RAD_CPT_MAMO_dict, 'lateral'))

{'77063': 'MAMMOGRAPHY 3D DIGITAL SCREENING', '77065': 'MAMMOGRAPHY-Diagnostic UNILATERAL', '77066': 'MAMMOGRAPHY-Diagnostic- BILATERAL', '77067': 'MAMMOGRAPHY SCREENING'}
77066 is ====>  MAMMOGRAPHY-Diagnostic- BILATERAL  <====
Key for MAMMOGRAPHY 3D DIGITAL SCREENING is ===> 77063
Possible XRAY CPT codes with values containing, lateral ====> set()


In [8]:
#########################################################
## Test MRI dictionary and its functions (sam_2018_cpt_rad)
#########################################################
print(RAD_CPT_MRI_dict)
print("72147 is ====> ", RAD_CPT_MRI_dict.get('72147'), " <====")
print("Key for MRI LUMBAR SPINE without contrast / MRI LUMBAR PLEXUS is ===>", 
      find_first_key_exact(RAD_CPT_MRI_dict, 'MRI LUMBAR SPINE without contrast / MRI LUMBAR PLEXUS'))
print("Possible MRI CPT codes with values containing, without contrast ====>", 
      find_all_keys_containing(RAD_CPT_MRI_dict, 'without contrast'))

{'70336': 'MRI TEMPOROMANDIBULAR JOI', '70540': 'MRI SOFT TISSUE NECK,ORBIT without contrast', '70542': 'MRI SOFT TISSUE NECK,ORBIT with contrast', '70543': 'MRI NECK,ORBIT w/o & with contrast', '70544': 'MRA HEAD without contrast', '70545': 'MRA HEAD with contrast', '70546': 'MRA HEAD w/o & with contrast', '70547': 'MRA NECK without contrast', '70548': 'MRA NECK/CAROTID with contrast', '70549': 'MRA/NECK w/o & with contrast', '70551': 'MRI BRAIN without contrast', '70552': 'MRI BRAIN with contrast', '70553': 'MRI BRAIN w/o & with contrast', '71550': 'MRI CHEST without contrast', '71551': 'MRI CHEST with contrast', '71552': 'MRI CHEST w/o & with contrast', '71555': 'MRA CHEST', '72141': 'MRI CERV.SPINE without contrast', '72142': 'MRI CERV SPINE with contrast', '72146': 'MRI THORACIC without contrast', '72147': 'MRI THORACIC with contrast', '72148': 'MRI LUMBAR SPINE without contrast / MRI LUMBAR PLEXUS', '72149': 'MRI LUMBAR SPINE with contrast', '72156': 'MRI/CERV SPINE w/o & with co

In [9]:
#########################################################
## Test NUCLEAR dictionary and its functions (sam_2018_cpt_rad)
#########################################################
print(RAD_CPT_NUCLEAR_dict)
print("78452 is ====> ", RAD_CPT_NUCLEAR_dict.get('78452'), " <====")
print("Key for X-RAY NUCLEAR HEPATOBILIARY SCAN WITH PHARM is ===>", 
      find_first_key_exact(RAD_CPT_NUCLEAR_dict, 'NUCLEAR HEPATOBILIARY SCAN WITH PHARM'))
print("Possible XRAY CPT codes with values containing, bone ====>", find_all_keys_containing(RAD_CPT_NUCLEAR_dict, 'bone'))

{'78012': 'NUCLEAR EXAM THYROID (UPTAKE only)', '78014': 'NUCLEAR EXAM THYROID (WITH UPTAKE)', '78013': 'NUCLEAR EXAM THYROID (WITH IMAGING ONLY)', '78070': 'NUCLEAR EXAM PARATHYROID SCAN', '78071': 'NUCLEAR EXAM PARATHYROID SCAN with SPECT', '78206': 'NUCLEAR LIVER SCAN SPECT(HEMANGIOMA)', '78215': 'NUCLEAR LIVER/SPLEEN SCAN static', '78226': 'NUCLEAR HEPATOBILIARY SCAN', '78227': 'NUCLEAR HEPATOBILIARY SCAN WITH PHARM', '78290': 'NUCLEAR MECKELS SCAN', '78306': 'NUCLEAR BONE SCAN WHOLE BODY', '78315': 'NUCLEAR BONE SCAN 3 PHASE', '78320': 'NUCLEAR BONE SCAN SPECT', '78452': 'MyoCardial Perfusion Imaging/Pharmacologic with 93015', '78472': 'NUCLEAR MUGA SCAN CARDIA IMAGING', '78607': 'NUCLEAR Brain Spect (DaTscan)', '78707': 'NUCLEAR RENAL SCAN WITH MAG 3', '78708': 'NUCLEAR RENAL SCAN WITH LASIX OR CAPTOPRIL', '78800': 'NUCLEAR GALLIUM SCAN with 78803', '78804': 'NUCLEAR OCTREO SCAN with 78803', '78807': 'NUCLEAR SCAN FOR INFLAMATION with spect'}
78452 is ====>  MyoCardial Perfusion 

In [10]:
#########################################################
## Test PET dictionary and its functions (sam_2018_cpt_rad)
#########################################################
print(RAD_CPT_PET_dict)
print("78815 is ====> ", RAD_CPT_PET_dict.get('78815'), " <====")
print("Key for PET SCAN WITH CT Limited is ===>", find_first_key_exact(RAD_CPT_PET_dict, 'PET SCAN WITH CT Limited'))
print("Possible PET CPT codes with values containing, body ====>", find_all_keys_containing(RAD_CPT_PET_dict, 'body'))

{'78608': 'PET BRAIN IMAG', '78814': 'PET SCAN WITH CT Limited', '78815': 'PET SCANN with CT Skull to mid thigh', '78816': 'PET SCAN with CT full body'}
78815 is ====>  PET SCANN with CT Skull to mid thigh  <====
Key for PET SCAN WITH CT Limited is ===> 78814
Possible PET CPT codes with values containing, body ====> set()


In [11]:
#########################################################
## Test US dictionary and its functions (sam_2018_cpt_rad)
#########################################################
print(RAD_CPT_US_dict)
print("76705 is ====> ", RAD_CPT_US_dict.get('76705'), " <====")
print("Key for U/S OB MATERN/EVAL&DET EX/EACH ADD is ===>", find_first_key_exact(RAD_CPT_US_dict, 'U/S OB MATERN/EVAL&DET EX/EACH ADD'))
print("Possible US CPT codes with values containing, transvag ====>", find_all_keys_containing(RAD_CPT_US_dict, 'transvag'))

{'76506': 'U/S EXAM OF HEAD', '76536': 'U/S Soft Tissue Neck(thyroid,parathyroid,etc.)', '76604': 'U/S CHEST', '76641': 'U/S BREAST(uni or bi)', '76700': 'U/S ABDOMEN COMPLETE', '76705': 'U/S ABDOMEN(limited)', '76770': 'U/S RETRO(renal,aorta,nodes)', '76775': 'U/S RETRO-limited', '76776': 'U/S TRANSPLANTED KIDNEY', '76800': 'U/S ECHO EXAM SPINAL CANAL', '76801': 'U/S OB FIRST TRIMESTER', '76802': 'U/S OB/FIRST TRI/EACH ADD', '76805': 'U/S OB-GYN > 14 WEEKS', '76810': 'U/S OB > 14 WEEKS/EACH ADD', '76811': 'U/S OB MATERN/EVAL&DET EX', '76812': 'U/S OB MATERN/EVAL&DET EX/EACH ADD', '76813': 'U/S NUCHAL TRANSLUCENCY', '76814': 'U/S Nuchal Translucency/EACH ADD', '76815': 'U/S OB LIMITED', '76816': 'U/S OB LIMITED FOLLOW UP', '76817': 'U/S OB TRANSVAG', '76830': 'U/S TRANSVAGINAL', '76831': 'U/S HYSTEROGRAM WITH DOPPLER', '76856': 'U/S PELVIS-NON OB', '76857': 'U/S PELVIS-NON OB LIMITED', '76870': 'U/S SCROTUM with Doppler 93975', '76881': 'U/S EXTREMITY NON VASCULAR', '76882': 'U/S EXTRE

In [41]:
#########################################################
## Test XRAY dictionary and its functions (sam_2018_cpt_rad)
#########################################################
print(RAD_CPT_XRAY_dict)
print("72220 is ====> ", RAD_CPT_XRAY_dict.get('72220'), " <====")
print("Key for X-RAY EXAM OF ABDOMEN WITH OBLIQUE is ===>", find_first_key_exact(RAD_CPT_XRAY_dict, 'X-RAY EXAM OF ABDOMEN WITH OBLIQUE'))
print("Possible XRAY CPT codes with values containing, ABDOMEN ====>", find_all_keys_containing(RAD_CPT_XRAY_dict, 'ABDOMEN'))

{'70030': 'X-RAY EYE FOR FOREIGN BOD', '70100': 'XRAY MANDIBLE < 4 VIEWS', '70110': 'X-RAY EXAM OF MANDIBLE 4 VIEWS', '70140': 'XRAY FACIAL BONES < 3VIEWS', '70150': 'X-RAY FACIAL BONES 3VIEWS', '70160': 'XRAY NASAL BONES 3 VIEWS+', '70210': 'XRAY SINUSES < 3 VIEWS', '70220': 'XRAY SINUSES 3 OR MORE VIEWS', '70250': 'XRAY SKULL < 4 VIEWS', '70260': 'XRAY SKULL 4VIEWS', '70360': 'XRAY NECK SOFT TISSUE', '71010': 'XRAY CHEST, ONE VIEW', '71020': 'XRAY CHEST, TWO VIEWS', '71021': 'XRAY CHEST W/APICAL LORDO', '71022': 'XRAY CHEST W/OBLIQUE PROJEC', '71023': 'XRAY CHEST W/FLUORO', '71030': 'XRAY CHEST 4 VIEWS', '71034': 'XRAY CHEST 4 VIEWS W/FLUOROS.', '71035': 'XRAY CHEST LATERAL DECUBITUS', '71100': 'XRAY RIBS,UNILAT.,TWO VIEWS', '71101': 'XRAY RIBS INCL CHEST 3 VI', '71110': 'XRAY RIB BILAT 3 VIEWS', '71111': 'XRAY RIBS BILATERAL INCL CHEST 4 VI', '71120': 'XRAY STERNUM 2 VIEWS', '71130': 'XRAY STERNO CLAVI JOINT 3 VIEWS', '72010': 'XRAY SPINE, ENTIRE AP & LAT', '72020': 'XRAY SPINE ONE 

In [57]:
#########################################################
## General Test dictionary and its functions (sam_2018_cpt_rad)
#########################################################
#print(RAD_CPT_XRAY_dict)
searchval = '77067'
print(searchval, " is ====> ", RAD_CPT_MAMO_dict.get(searchval), " <====")
#print("Key for MRI LUMBAR SPINE without contrast / MRI LUMBAR PLEXUS is ===>", 
#      find_first_key_exact(RAD_CPT_MRI_dict, 'MRI LUMBAR SPINE without contrast / MRI LUMBAR PLEXUS'))
print("Possible MRI CPT codes with values containing, without contrast ====>", 
      find_all_keys_containing(RAD_CPT_MAMO_dict, 'mammography screening'))
#list(RAD_CPT_XRAY_dict)

77067  is ====>  MAMMOGRAPHY SCREENING  <====
Possible MRI CPT codes with values containing, without contrast ====> {'77067'}


In [43]:
RAD_CPT_MRI_dict.get('70553')

'MRI BRAIN w/o & with contrast'

In [44]:
import nltk
nltk.edit_distance('WITHOUT', 'w/o')

7