In [47]:
import os
from lxml import etree
import json

In [48]:
def pre_process(file_path):
    try:
        with open(file_path, 'rb') as file:
            xml_content = file.read()
        root = etree.fromstring(xml_content)
        for bpmndi_tag in root.xpath(".//bpmndi:*", namespaces={'bpmndi': 'http://www.omg.org/spec/BPMN/20100524/DI'}):
            parent = bpmndi_tag.getparent()
            if parent is not None:
                parent.remove(bpmndi_tag)
        modified_xml = etree.tostring(root, pretty_print=True, encoding='unicode')
        if isinstance(modified_xml, bytes):  # Ensure modified_xml is a string
            modified_xml = modified_xml.decode('utf-8')
        return modified_xml
    except Exception as e:
        print(f"Failed to process {file_path}: {e}")
        return None

In [49]:
def process_files_in_directory(directory_path):
    json_output = {"filename":"","XML_Contents":""}
    jsonl_filename = 'All_Conflcit_Patterns.jsonl'
    failed_files = []  # List to store names of failed files

    # Open the JSONL file for writing
    with open(jsonl_filename, 'w') as jsonl_file:
        # Iterate over each file in the directory
        for filename in os.listdir(directory_path):
            if filename.endswith('.bpmnq'):
                file_path = os.path.join(directory_path, filename)
                modified_xml = pre_process(file_path)
                if modified_xml:
                    # Add to JSON output
                    json_output["filename"] = filename
                    json_output["XML_Contents"] = modified_xml
                    
                    # Write to JSONL file
                    jsonl_file.write(json.dumps(json_output) + '\n')
                else:
                    failed_files.append(filename)  # Add the file to the list of failed files

    if failed_files:
        print("Failed to process the following files:")
        print("\n".join(failed_files))

In [50]:
pattern_base = r'C:\STS-Tool_SecBpmn_DM_v.1.0.0.201803081550\workspace\telemedicine\Models\SecBPMN2\conflicts_Patterns'
process_files_in_directory(pattern_base)