## Open File

In [1]:
def openme(file):
    with open (file, "r") as f:
        for cnt, line in enumerate(f):
            print("Line {}: {}".format(cnt, line))

In [2]:
openme("data/hl7oru")

Line 0: MSH|^~\&|LCS|LCA|LIS|TEST9999|199807311532||ORU^R01|3629|P|2.2

Line 1: PID|2|2161348462|20809880170|1614614|20809880170^TESTPAT||19760924|M|||^^^^00000-0000|||||||86427531^^^03|SSN# HERE

Line 2: ORC|NW|8642753100012^LIS|20809880170^LCS||||||19980727000000|||HAVILAND

Line 3: OBR|1|8642753100012^LIS|20809880170^LCS|008342^UPPER RESPIRATORY^CULTURE^L|||19980727175800||||||SS#634748641 CH14885 SRC:THROASRC:PENI|19980727000000||||||20809880170||19980730041800||BN|F

Line 4: OBX|1|ST|008342^UPPER RESPIRATORY CULTURE^L||FINALREPORT|||||N|F||| 19980729160500|BN

Line 5: ORC|NW|8642753100012^LIS|20809880170^LCS||||||19980727000000|||HAVILAND

Line 6: OBR|2|8642753100012^LIS|20809880170^LCS|997602^.^L|||19980727175800||||G|||19980727000000||||||20809880170||19980730041800|||F|997602|||008342

Line 7: OBX|2|CE|997231^RESULT 1^L||M415|||||N|F|||19980729160500|BN

Line 8: NTE|1|L|MORAXELLA (BRANHAMELLA) CATARRHALIS

Line 9: NTE|2|L| HEAVY GROWTH

Line 10: NTE|3|L| BETA LACTAMASE POSITIVE

## Segment Header

In [3]:
def splitFieldsSegName(file):
    with open (file, "r") as f:
        for cnt, line in enumerate(f):
            flds = line.split("|")
            for cnt, fld in enumerate(flds):
                print("%s: Field %s: %s" % (flds[0], cnt, fld))

In [4]:
splitFieldsSegName("data/hl7oru")

MSH: Field 0: MSH
MSH: Field 1: ^~\&
MSH: Field 2: LCS
MSH: Field 3: LCA
MSH: Field 4: LIS
MSH: Field 5: TEST9999
MSH: Field 6: 199807311532
MSH: Field 7: 
MSH: Field 8: ORU^R01
MSH: Field 9: 3629
MSH: Field 10: P
MSH: Field 11: 2.2

PID: Field 0: PID
PID: Field 1: 2
PID: Field 2: 2161348462
PID: Field 3: 20809880170
PID: Field 4: 1614614
PID: Field 5: 20809880170^TESTPAT
PID: Field 6: 
PID: Field 7: 19760924
PID: Field 8: M
PID: Field 9: 
PID: Field 10: 
PID: Field 11: ^^^^00000-0000
PID: Field 12: 
PID: Field 13: 
PID: Field 14: 
PID: Field 15: 
PID: Field 16: 
PID: Field 17: 
PID: Field 18: 86427531^^^03
PID: Field 19: SSN# HERE

ORC: Field 0: ORC
ORC: Field 1: NW
ORC: Field 2: 8642753100012^LIS
ORC: Field 3: 20809880170^LCS
ORC: Field 4: 
ORC: Field 5: 
ORC: Field 6: 
ORC: Field 7: 
ORC: Field 8: 
ORC: Field 9: 19980727000000
ORC: Field 10: 
ORC: Field 11: 
ORC: Field 12: HAVILAND

OBR: Field 0: OBR
OBR: Field 1: 1
OBR: Field 2: 8642753100012^LIS
OBR: Field 3: 20809880170^LCS
OBR: 

## Implementing ZIP

In [5]:
def splitFieldsSegNameZIP(file):
    with open (file, "r") as f:
        for cnt, line in enumerate(f):
            flds = line.split("|")
            fldH = []
            msgH = []
            for cnt, fld in enumerate(flds):
                msgH.append(flds[0])
                fldH.append('Field %s' % cnt)
            seg = zip(msgH, fldH, flds)
            for value in (seg):
                print(value)

In [6]:
splitFieldsSegNameZIP("data/hl7oru")

('MSH', 'Field 0', 'MSH')
('MSH', 'Field 1', '^~\\&')
('MSH', 'Field 2', 'LCS')
('MSH', 'Field 3', 'LCA')
('MSH', 'Field 4', 'LIS')
('MSH', 'Field 5', 'TEST9999')
('MSH', 'Field 6', '199807311532')
('MSH', 'Field 7', '')
('MSH', 'Field 8', 'ORU^R01')
('MSH', 'Field 9', '3629')
('MSH', 'Field 10', 'P')
('MSH', 'Field 11', '2.2\n')
('PID', 'Field 0', 'PID')
('PID', 'Field 1', '2')
('PID', 'Field 2', '2161348462')
('PID', 'Field 3', '20809880170')
('PID', 'Field 4', '1614614')
('PID', 'Field 5', '20809880170^TESTPAT')
('PID', 'Field 6', '')
('PID', 'Field 7', '19760924')
('PID', 'Field 8', 'M')
('PID', 'Field 9', '')
('PID', 'Field 10', '')
('PID', 'Field 11', '^^^^00000-0000')
('PID', 'Field 12', '')
('PID', 'Field 13', '')
('PID', 'Field 14', '')
('PID', 'Field 15', '')
('PID', 'Field 16', '')
('PID', 'Field 17', '')
('PID', 'Field 18', '86427531^^^03')
('PID', 'Field 19', 'SSN# HERE\n')
('ORC', 'Field 0', 'ORC')
('ORC', 'Field 1', 'NW')
('ORC', 'Field 2', '8642753100012^LIS')
('ORC', '

# Convert HL7 to JSON

In [7]:
# Converts HL7 text file into nested dictionary format for JSON conversion
# Null values have been removed without losing positioning

def convertHL7_JSON(file):
    with open (file, 'r') as f:
        file = f.read()
        msgsL = file.split('\r\n')
        msgs = {}
        for cnt0, msg in enumerate(msgsL, 1):
            segsL = msg.split('\n')
            segs = {}
            msgsK = 'MSG %s' % cnt0
            for cnt1, seg in enumerate(segsL, start=1):
                fldsL = seg.split('|')
                flds = {}
                if (seg == ''):
                    continue
                else:
                    for cnt2, fld in enumerate(fldsL, start=1):
                        fldsK = 'FLD %s' % cnt2
                        if (fld == ''):
                            continue
                        elif ((fld == '^~\&') or (fld == '^~`&') or (len(fld.split('^'))==1)):
                            flds[fldsK] = fld
                        else:
                            compsL = fld.split('^')
                            comps = {} 
                            for cnt3, cmp in enumerate(compsL, start=1):
                                subCompsL = cmp.split('~')
                                subComps = {}
                                compsK = 'CMP %s' % cnt3
                                if (cmp == ''):
                                    continue
                                elif (len(subCompsL)==1):
                                    comps[compsK] = cmp
                                else:
                                    for cnt4, sc in enumerate(subCompsL, start=1):
                                        subCompsK = 'SCMP %s' % cnt4
                                        if (sc == ''):
                                            continue
                                        else:
                                            subComps[subCompsK] = sc
                                    comps[compsK] = subComps
                            flds[fldsK] = comps
                segsK = 'SEG %s' % cnt1
                segs[segsK] = flds
            msgs[msgsK] = segs
        print(msgs)  

In [8]:
convertHL7_JSON("data/hl7oru")

{'MSG 1': {'SEG 1': {'FLD 1': 'MSH', 'FLD 2': '^~\\&', 'FLD 3': 'LCS', 'FLD 4': 'LCA', 'FLD 5': 'LIS', 'FLD 6': 'TEST9999', 'FLD 7': '199807311532', 'FLD 9': {'CMP 1': 'ORU', 'CMP 2': 'R01'}, 'FLD 10': '3629', 'FLD 11': 'P', 'FLD 12': '2.2'}, 'SEG 2': {'FLD 1': 'PID', 'FLD 2': '2', 'FLD 3': '2161348462', 'FLD 4': '20809880170', 'FLD 5': '1614614', 'FLD 6': {'CMP 1': '20809880170', 'CMP 2': 'TESTPAT'}, 'FLD 8': '19760924', 'FLD 9': 'M', 'FLD 12': {'CMP 5': '00000-0000'}, 'FLD 19': {'CMP 1': '86427531', 'CMP 4': '03'}, 'FLD 20': 'SSN# HERE'}, 'SEG 3': {'FLD 1': 'ORC', 'FLD 2': 'NW', 'FLD 3': {'CMP 1': '8642753100012', 'CMP 2': 'LIS'}, 'FLD 4': {'CMP 1': '20809880170', 'CMP 2': 'LCS'}, 'FLD 10': '19980727000000', 'FLD 13': 'HAVILAND'}, 'SEG 4': {'FLD 1': 'OBR', 'FLD 2': '1', 'FLD 3': {'CMP 1': '8642753100012', 'CMP 2': 'LIS'}, 'FLD 4': {'CMP 1': '20809880170', 'CMP 2': 'LCS'}, 'FLD 5': {'CMP 1': '008342', 'CMP 2': 'UPPER RESPIRATORY', 'CMP 3': 'CULTURE', 'CMP 4': 'L'}, 'FLD 8': '199807271