In [1]:
# Fonction pour récupérer toutes les messages hl7 contenus dans des fichiers .txt
# se trouvant dans un répertoire dont le chemin est passé en paramètre.
# cette version récupérère toutes les informations brutes dans une liste de dictionnaire,
# Où chaque dictionnaire repésente un message hl7 complet.


import os
from hl7apy import parser
from hl7apy.core import Group, Segment
from hl7apy.exceptions import UnsupportedVersion

def parse_hl7_files(directory_path):
    results = []
    
    for filename in os.listdir(directory_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, "r") as file:
                hl7 = file.read()
                
            try:
                msg = parser.parse_message(hl7.replace('\n', '\r'), find_groups=True, validation_level=2)
            except UnsupportedVersion:
                print(f'Unsupported version in file: {file_path}')
                msg = parser.parse_message(hl7.replace('\n', '\r'), find_groups=True, validation_level=2)
            
            results.append(parse_message_to_dict(msg))
    
    return results

def parse_message_to_dict(msg):
    message_dict = {"message": []}
    
    for segment in msg.children:
        if isinstance(segment, Segment):
            segment_dict = {"segment": segment.name, "fields": []}
            for field in segment.children:
                segment_dict["fields"].append({
                    "field_name": field.name,
                    "field_value": field.value
                })
            message_dict["message"].append(segment_dict)
        elif isinstance(segment, Group):
            group_dict = {"group": segment.name, "segments": []}
            for group_segment in segment.children:
                if isinstance(group_segment, Segment):
                    segment_dict = {"segment": group_segment.name, "fields": []}
                    for field in group_segment.children:
                        segment_dict["fields"].append({
                            "field_name": field.name,
                            "field_value": field.value
                        })
                    group_dict["segments"].append(segment_dict)
                elif isinstance(group_segment, Group):
                    group_dict["segments"].append(parse_group_to_dict(group_segment))
            message_dict["message"].append(group_dict)
    
    return message_dict

def parse_group_to_dict(group):
    group_dict = {"group": group.name, "segments": []}
    
    for segment in group.children:
        if isinstance(segment, Segment):
            segment_dict = {"segment": segment.name, "fields": []}
            for field in segment.children:
                segment_dict["fields"].append({
                    "field_name": field.name,
                    "field_value": field.value
                })
            group_dict["segments"].append(segment_dict)
        elif isinstance(segment, Group):
            group_dict["segments"].append(parse_group_to_dict(segment))
    
    return group_dict

# Example usage
directory_path = r"D:\stage\data\DataMai2024\DataMai2024\archive(6)\EXEMPLE1_GLIMS_HL7_RESULTATS\EXEMPLE1_GLIMS_HL7_RESULTATS"
parsed_results = parse_hl7_files(directory_path)



In [10]:
print(parsed_results)

[{'message': [{'segment': 'MSH', 'fields': [{'field_name': 'MSH_1', 'field_value': '|'}, {'field_name': 'MSH_2', 'field_value': '^~\\&'}, {'field_name': 'MSH_3', 'field_value': 'GLIMS'}, {'field_name': 'MSH_4', 'field_value': 'carevue_hl7'}, {'field_name': 'MSH_5', 'field_value': '4041'}, {'field_name': 'MSH_6', 'field_value': 'carevue_hl7'}, {'field_name': 'MSH_7', 'field_value': '20240416145007'}, {'field_name': 'MSH_9', 'field_value': 'ORU^R01'}, {'field_name': 'MSH_10', 'field_value': '47508'}, {'field_name': 'MSH_11', 'field_value': 'P'}, {'field_name': 'MSH_12', 'field_value': '2.1'}, {'field_name': None, 'field_value': 'AL'}, {'field_name': None, 'field_value': 'NE'}]}, {'group': 'ORU_R01_PATIENT_RESULT', 'segments': [{'group': 'ORU_R01_PATIENT', 'segments': [{'segment': 'PID', 'fields': [{'field_name': 'PID_1', 'field_value': '1'}, {'field_name': 'PID_3', 'field_value': '308495690^^^I_STDENIS'}, {'field_name': 'PID_5', 'field_value': 'XXXXXX^XXXXXX^XXXXXX^^^M.'}, {'field_name':

In [28]:
import os
from hl7apy import parser
from hl7apy.core import Group, Segment
from hl7apy.exceptions import UnsupportedVersion

def parse_hl7_files(directory_path):
    results = []
    
    for filename in os.listdir(directory_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, "r") as file:
                hl7 = file.read()
                
            try:
                msg = parser.parse_message(hl7.replace('\n', '\r'), find_groups=True, validation_level=2)
            except UnsupportedVersion:
                print(f'Unsupported version in file: {file_path}')
                msg = parser.parse_message(hl7.replace('\n', '\r'), find_groups=True, validation_level=2)
            
            results.append(parse_message_to_dict(msg))
    
    return results

def parse_message_to_dict(msg):
    message_dict = []
    
    for segment in msg.children:
        if isinstance(segment, Segment):
            segment_dict = {"segment": segment.name, "fields": []}
            for field in segment.children:
                segment_dict["fields"].append({
                    "field_name": field.name,
                    "field_value": field.value
                })
            message_dict.append(segment_dict)
        elif isinstance(segment, Group):
            group_dict = {"group": segment.name, "segments": []}
            for group_segment in segment.children:
                if isinstance(group_segment, Segment):
                    segment_dict = {"segment": group_segment.name, "fields": []}
                    for field in group_segment.children:
                        segment_dict["fields"].append({
                            "field_name": field.name,
                            "field_value": field.value
                        })
                    group_dict["segments"].append(segment_dict)
                elif isinstance(group_segment, Group):
                    group_dict["segments"].append(parse_group_to_dict(group_segment))
            message_dict.append(group_dict)
    
    return message_dict

def parse_group_to_dict(group):
    group_dict = {"group": group.name, "segments": []}
    
    for segment in group.children:
        if isinstance(segment, Segment):
            segment_dict = {"segment": segment.name, "fields": []}
            for field in segment.children:
                segment_dict["fields"].append({
                    "field_name": field.name,
                    "field_value": field.value
                })
            group_dict["segments"].append(segment_dict)
        elif isinstance(segment, Group):
            group_dict["segments"].append(parse_group_to_dict(segment))
    
    return group_dict

# Example usage
directory_path = r"D:\stage\data\DataMai2024\DataMai2024\archive(6)\EXEMPLE1_GLIMS_HL7_RESULTATS\EXEMPLE1_GLIMS_HL7_RESULTATS"
parsed_results = parse_hl7_files(directory_path)
print(parsed_results)


Defaulting to user installation because normal site-packages is not writeable


ERROR: Could not find a version that satisfies the requirement python-hl7 (from versions: none)
ERROR: No matching distribution found for python-hl7


ModuleNotFoundError: No module named 'hl7'

In [36]:
import os
import pandas as pd
from hl7apy.parser import parse_message
from hl7apy.exceptions import UnsupportedVersion

def extract_fields(element):
    """
    Extracts fields from a segment or group recursively.
    
    Parameters:
        element (hl7apy.core.Element): HL7 element object.
        
    Returns:
        dict: A dictionary containing field names and values.
    """
    fields = {}
    
    for child in element.children:
        if child.classname == 'Segment':
            for i, field in enumerate(child.children, start=1):
                field_name = f"{child.name}_{i}"
                field_value = field.to_er7() if field.to_er7() else ''
                fields[field_name] = field_value
        elif child.classname == 'Group':
            # Recursively extract fields from the group without adding group name in the field name
            group_fields = extract_fields(child)
            fields.update(group_fields)
    
    return fields

def parse_message_to_dict(msg):
    """
    Parses an HL7 message into a dictionary of segments and groups.
    
    Parameters:
        msg (hl7apy.core.Message): HL7 message object.
        
    Returns:
        dict: A dictionary containing segment names as keys and their field values.
    """
    message_dict = extract_fields(msg)
    return message_dict

def parse_hl7_files_to_dataframe(directory_path):
    """
    Parses HL7 files in the given directory and returns a DataFrame.
    
    Parameters:
        directory_path (str): Path to the directory containing HL7 files.
        
    Returns:
        pandas.DataFrame: DataFrame containing parsed HL7 data.
    """
    records = []
    
    for filename in os.listdir(directory_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, 'r') as file:
                hl7_message = file.read()
                try:
                    msg = parse_message(hl7_message.replace('\n', '\r'), find_groups=True, validation_level=2)
                except UnsupportedVersion:
                    print(f'Unsupported version in file: {file_path}')
                    msg = parse_message(hl7_message.replace('\n', '\r'), find_groups=True, validation_level=2)
                record = parse_message_to_dict(msg)
                record["message_id"] = filename
                records.append(record)
    
    df = pd.DataFrame(records)
    return df

# Exemple d'utilisation
directory_path = r"D:\stage\data\DataMai2024\DataMai2024\archive(6)\EXEMPLE1_GLIMS_HL7_RESULTATS\EXEMPLE1_GLIMS_HL7_RESULTATS"
df_segments = parse_hl7_files_to_dataframe(directory_path)
print(df_segments)


  MSH_1 MSH_2  MSH_3        MSH_4 MSH_5        MSH_6           MSH_7    MSH_8  \
0     |  ^~\&  GLIMS  carevue_hl7  4041  carevue_hl7  20240416145007  ORU^R01   
1     |  ^~\&  GLIMS  carevue_hl7  4041  carevue_hl7  20240416145008  ORU^R01   
2     |  ^~\&  GLIMS  carevue_hl7  4041  carevue_hl7  20240416151010  ORU^R01   
3     |  ^~\&  GLIMS  carevue_hl7  4041  carevue_hl7  20240416151505  ORU^R01   

   MSH_9 MSH_10  ...            OBX_5  OBX_6           OBX_7     OBX_8  \
0  47508      P  ...                N      X  20240416134000  BACTERIO   
1  47509      P  ...        %^%^GLIMS  35-51               L         F   
2  47524      P  ...        %^%^GLIMS  35-51               L         F   
3  47525      P  ...  UI/l^UI/L^GLIMS    <55               H         F   

                  OBX_9           message_id OBX_10  \
0  CNG^NGCUNGAMA^Calvin  2404161450-0001.txt    NaN   
1        20240416144600  2404161450-0002.txt  BIOCH   
2        20240415073800  2404161510-0001.txt  BIOCH   
3  

In [None]:
from hl7apy.core import Group, Segment


In [25]:
directory_path = r"D:\stage\data\DataMai2024\DataMai2024\archive(6)\EXEMPLE1_GLIMS_HL7_RESULTATS\EXEMPLE1_GLIMS_HL7_RESULTATS"
data = parse_hl7_files_to_dataframe(directory_path)
data.head()

Unnamed: 0,message_id,MSH_1,MSH_10,MSH_11,MSH_12,MSH_2,MSH_3,MSH_4,MSH_5,MSH_6,...,PID_3,PID_5,PID_7,PID_8,PV1_1,PV1_19,PV1_2,PV1_3,PV1_44,message_id.1
0,2404161450-0001.txt,|,47508,P,2.1,^~\&,GLIMS,carevue_hl7,4041,carevue_hl7,...,308495690^^^I_STDENIS,XXXXXX^XXXXXX^XXXXXX^^^M.,19901106,M,1,861935256,I,4041^1001,20240416063400,2404161450-0001.txt
1,2404161450-0002.txt,|,47509,P,2.1,^~\&,GLIMS,carevue_hl7,4041,carevue_hl7,...,308495690^^^I_STDENIS,XXXXXX^XXXXXX^XXXXXX^^^M.,19901106,M,1,861935256,I,4041^1001,20240416063400,2404161450-0002.txt
2,2404161510-0001.txt,|,47524,P,2.1,^~\&,GLIMS,carevue_hl7,4041,carevue_hl7,...,164037503100315^^^ASIP-SANTE-INS-NIR,XXXXXX^XXXXXX^XXXXXX^^^M.,19640304,M,1,861898375,I,4041^1006,20240410171100,2404161510-0001.txt
3,2404161515-0001.txt,|,47525,P,2.1,^~\&,GLIMS,carevue_hl7,4041,carevue_hl7,...,308481695^^^I_STDENIS,INCONNU DU TRENTE ET UN^MARS DEUX MILLE VNGT Q...,19640331,M,1,861818740,I,4041^1008,20240331223100,2404161515-0001.txt
