In [7]:
from rdflib import Graph, Namespace, Literal
from rdflib.namespace import RDF, RDFS, OWL, XSD
import re
from textwrap import dedent

# Namespaces
ABSTRACT = Namespace("http://www.imi.kit.edu/abstract-pass-ont#")
STANDARD = Namespace("http://www.i2pm.net/standard-pass-ont#")
BASE     = Namespace("http://subjective-me.jimdo.com/s-bpm/processmodels/2025-03-25/Page-1#")

def sid_to_pass_owl(mistral_text: str,
                    model_label: str = "PASS_Model",
                    out_file: str | None = None) -> str:

    # Parse Subjects and SID lines
    subjects = []
    sid_lines = []
    subj_mode, sid_mode = False, False
    for ln in mistral_text.splitlines():
        ln = ln.rstrip()
        if ln.startswith("### Subjects"):
            subj_mode, sid_mode = True, False
            continue
        if ln.startswith("### SID"):
            subj_mode, sid_mode = False, True
            continue
        if subj_mode and ln.startswith("-"):
            subjects.append(ln.lstrip("- ").strip())
        elif sid_mode and re.match(r"\d+\.", ln):
            sid_lines.append(ln)

    # Build graph
    g = Graph(base=BASE)
    g.bind("abstract-pass-ont", ABSTRACT)
    g.bind("standard-pass-ont", STANDARD)
    g.bind("owl", OWL)
    g.bind("rdfs", RDFS)
    g.bind("xsd", XSD)

    # Add PASSProcessModel individual
    model_uri = BASE[model_label]
    g.add((model_uri, RDF.type, STANDARD.PASSProcessModel))
    g.add((model_uri, STANDARD.hasModelComponentID, Literal(f"{model_uri}#Model", datatype=XSD.string)))
    g.add((model_uri, STANDARD.hasModelComponentLabel, Literal(model_label, lang="en")))

    # Add ModelLayer SID_1
    sid_layer = BASE["SID_1"]
    g.add((sid_layer, RDF.type, ABSTRACT.ModelLayer))
    g.add((sid_layer, STANDARD.hasModelComponentID, Literal("SID_1", datatype=XSD.string)))
    g.add((sid_layer, STANDARD.hasModelComponentLabel, Literal("SID_1", lang="en")))
    g.add((sid_layer, STANDARD.hasPriorityNumber, Literal(1, datatype=XSD.positiveInteger)))
    g.add((model_uri, STANDARD.contains, sid_layer))

    # Subjects as FullySpecifiedSubject
    subj_id_map = {}
    for idx, subj_label in enumerate(subjects, start=2):
        sid = f"SID_1_FullySpecifiedSubject_{idx}"
        subj_uri = BASE[sid]
        subj_id_map[subj_label] = subj_uri

        g.add((subj_uri, RDF.type, STANDARD.FullySpecifiedSubject))
        g.add((subj_uri, STANDARD.hasModelComponentID, Literal(sid, datatype=XSD.string)))
        g.add((subj_uri, STANDARD.hasModelComponentLabel, Literal(subj_label, lang="en")))
        g.add((subj_uri, STANDARD.hasMaximumSubjectInstanceRestriction, Literal(1, datatype=XSD.integer)))
        g.add((subj_uri, ABSTRACT.hasExecutionCostPerHour, Literal(0.0, datatype=XSD.double)))

        g.add((sid_layer, STANDARD.contains, subj_uri))
        g.add((model_uri, STANDARD.contains, subj_uri))

    # Process SID lines (messages)
    mel_counter = 1
    msg_counter = 1
    for line in sid_lines:
        m = re.match(r"\d+\.\s*(.+?)\s*->\s*(.+?):\s*(.+)", line)
        if not m:
            continue
        sender, receiver, msg = m.groups()
        sender_uri   = subj_id_map[sender.strip()]
        receiver_uri = subj_id_map[receiver.strip()]

        msg_spec_id = f"SID_1_MessageSpecification_{msg_counter}"
        msg_spec_uri = BASE[msg_spec_id]
        g.add((msg_spec_uri, RDF.type, STANDARD.MessageSpecification))
        g.add((msg_spec_uri, STANDARD.hasModelComponentID, Literal(msg_spec_id, datatype=XSD.string)))
        g.add((msg_spec_uri, STANDARD.hasModelComponentLabel, Literal(msg, lang="en")))

        payload_id = f"PayloadDefinition_of_{msg_spec_id}"
        payload_uri = BASE[payload_id]
        g.add((payload_uri, RDF.type, OWL.Class))
        g.add((msg_spec_uri, STANDARD.containsPayloadDescription, payload_uri))

        mel_id = f"MessageExchangeList_on_SID_1_StandardMessageConnector_{mel_counter}"
        mel_uri = BASE[mel_id]
        conn_id = f"SID_1_StandardMessageConnector_{mel_counter}"
        conn_uri = BASE[conn_id]

        g.add((mel_uri, RDF.type, STANDARD.MessageExchangeList))
        g.add((mel_uri, STANDARD.hasModelComponentID, Literal(mel_id, datatype=XSD.string)))
        g.add((mel_uri, STANDARD.hasModelComponentLabel, Literal(conn_id, lang="en")))
        g.add((mel_uri, STANDARD.contains, msg_spec_uri))

        g.add((conn_uri, RDF.type, STANDARD.StandardMessageConnector))
        g.add((conn_uri, STANDARD.hasSender, sender_uri))
        g.add((conn_uri, STANDARD.hasReceiver, receiver_uri))
        g.add((conn_uri, STANDARD.hasMessageType, msg_spec_uri))
        g.add((mel_uri, STANDARD.contains, conn_uri))

        for parent in (sid_layer, model_uri):
            g.add((parent, STANDARD.contains, mel_uri))
            g.add((parent, STANDARD.contains, msg_spec_uri))
            g.add((parent, STANDARD.contains, conn_uri))

        mel_counter += 1
        msg_counter += 1

          # Parse SBD section
        sbd_prefix = "SBD_1_"
        sbd_blocks = []
        current_block = []
        in_sbd = False

        for ln in mistral_text.splitlines():
            if ln.strip().startswith("### SBD:"):
                in_sbd = True
                continue
            if in_sbd:
                if re.match(r"\d+\.", ln.strip()):
                    if current_block:
                        sbd_blocks.append(current_block)
                    current_block = [ln.strip()]
                else:
                    if current_block and (ln.startswith(" ") or ln.startswith("\t")):
                        current_block.append(ln.strip())
        if current_block:
            sbd_blocks.append(current_block)

        # Extract states and collect info
        send_state_info = {}
        receive_state_info = {}
        state_uris = []
        transition_descriptions = {}

        for idx, block in enumerate(sbd_blocks, start=1):
            m = re.match(r"\d+\.\s*(\w+State):\s*(.*)", block[0])
            if not m:
                continue
            state_type, label = m.groups()

            to_val = from_val = msg_val = description_val = None

            if state_type == "SendState":
                for line in block[1:]:
                    if (m_to := re.match(r"To:\s*(.*)", line.strip())):
                        to_val = m_to.group(1).strip()
                    if (m_msg := re.match(r"Msg:\s*(.*)", line.strip())):
                        msg_val = m_msg.group(1).strip()
                send_state_info[idx] = {'to': to_val, 'msg': msg_val}

            elif state_type == "ReceiveState":
                for line in block[1:]:
                    if (m_from := re.match(r"From:\s*(.*)", line.strip())):
                        from_val = m_from.group(1).strip()
                    if (m_msg := re.match(r"Msg:\s*(.*)", line.strip())):
                        msg_val = m_msg.group(1).strip()
                receive_state_info[idx] = {'from': from_val, 'msg': msg_val}

            elif state_type in ["StartState", "DoState"]:
                for line in block[1:]:
                    if (m_desc := re.match(r"Description:\s*(.*)", line.strip())):
                        description_val = m_desc.group(1).strip()

            # Add State individual
            state_id = f"{sbd_prefix}{state_type}_{idx}"
            state_uri = BASE[state_id]
            state_uris.append((state_type, idx, label, description_val))  # keep desc here

            g.add((state_uri, RDF.type, STANDARD[state_type]))
            g.add((state_uri, STANDARD.hasModelComponentID, Literal(state_id, datatype=XSD.string)))
            g.add((state_uri, STANDARD.hasModelComponentLabel, Literal(label, lang="en")))
            g.add((sid_layer, STANDARD.contains, state_uri))
            g.add((model_uri, STANDARD.contains, state_uri))

        # Add transitions between states
        transition_counter = 1
        for i in range(len(state_uris) - 1):
            source_type, source_idx, source_label, source_desc = state_uris[i]
            target_type, target_idx, target_label, _ = state_uris[i + 1]

            trans_id = f"{sbd_prefix}{source_type}Transition_{transition_counter}"
            trans_uri = BASE[trans_id]

            # Determine transition type and label
            if source_type == "SendState":
                trans_type = STANDARD.SendTransition
                info = send_state_info.get(source_idx, {})
                to_val = info.get('to', 'Unknown Receiver')
                msg_val = info.get('msg', 'Unknown Message')
                trans_label = f"To: {to_val}\nMsg: {msg_val}"

            elif source_type == "ReceiveState":
                trans_type = STANDARD.ReceiveTransition
                info = receive_state_info.get(source_idx, {})
                from_val = info.get('from', 'Unknown Sender')
                msg_val = info.get('msg', 'Unknown Message')
                trans_label = f"From: {from_val}\nMsg: {msg_val}"

            else:
                trans_type = STANDARD.DoTransition
                # If a StartState or DoState has a description, treat it as the transition label
                trans_label = source_desc if source_desc else "Continue Process"

            # Create transition triples
            g.add((trans_uri, RDF.type, trans_type))
            g.add((trans_uri, STANDARD.hasModelComponentID, Literal(trans_id, datatype=XSD.string)))
            g.add((trans_uri, STANDARD.hasModelComponentLabel, Literal(trans_label, lang="en")))

            source_state_uri = BASE[f"{sbd_prefix}{source_type}_{source_idx}"]
            target_state_uri = BASE[f"{sbd_prefix}{target_type}_{target_idx}"]

            g.add((trans_uri, STANDARD.hasSourceState, source_state_uri))
            g.add((trans_uri, STANDARD.hasTargetState, target_state_uri))
            g.add((sid_layer, STANDARD.contains, trans_uri))
            g.add((model_uri, STANDARD.contains, trans_uri))

            transition_counter += 1



    # Dummy example class
    dm_class = BASE["VisioShapesInternalDataMappingFunction"]
    g.add((dm_class, RDF.type, OWL.Class))
    g.add((dm_class, RDFS.subClassOf, STANDARD.DataMappingFunction))

    # Serialize with DOCTYPE and entity declarations
    xml_body = g.serialize(format="application/rdf+xml")

    entities = dedent("""\
        <!DOCTYPE rdf:RDF [
            <!ENTITY owl "http://www.w3.org/2002/07/owl#" >
            <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
            <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
            <!ENTITY abstract-pass-ont "http://www.imi.kit.edu/abstract-pass-ont#" >
            <!ENTITY standard-pass-ont "http://www.i2pm.net/standard-pass-ont#" >
            <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
        ]>
    """)
    xml_body_nohead = "\n".join(xml_body.splitlines()[1:])
    final_xml = f'<?xml version="1.0"?>\n{entities}\n{xml_body_nohead}'

    if out_file:
        with open(out_file, "w", encoding="utf-8") as f:
            f.write(final_xml)

    return final_xml


In [8]:
mistral_block = """
### Subjects
- Customer
- E-commerce Website
- Inventory System
- Payment Gateway
- Email System

### SID:
1. Customer -> E-commerce Website: Order
2. E-commerce Website -> Inventory System: Inventory Check Request
3. Inventory System -> E-commerce Website: Inventory Check Response
4. E-commerce Website -> Customer: Payment Request
5. Customer -> Payment Gateway: Payment
6. Payment Gateway -> E-commerce Website: Payment Confirmation
7. E-commerce Website -> Email System: Order Confirmation Email
8. Email System -> Customer: Order Confirmation Email

### SBD:
#### Customer:
1. StartState: Decide to place order
   Description: initiate order
2. SendState: Send Order to E-commerce Website
   To:  E-commerce Website
   Msg: Order
3. ReceiveState: Receive Payment Request from E-commerce Website
   From: E-commerce Website
   Msg: Payment Request
4. SendState: Send Payment to Payment Gateway
   To:  Payment Gateway
   Msg: Payment
5. ReceiveState: Receive Order Confirmation Email from Email System
   From: Email System
   Msg: Order Confirmation Email
6. EndState: Order confirmed
"""

xml = sid_to_pass_owl(mistral_block, out_file="/home/s/smanan/ecommerce_pass.owl")
#print(xml[:1000])   # truncated preview