In [None]:
from rdflib import Graph, Namespace, Literal  
from rdflib.namespace import RDF, RDFS, OWL, XSD
import re
from textwrap import dedent
from typing import Optional
import unicodedata

# Namespaces
ABSTRACT = Namespace("http://www.imi.kit.edu/abstract-pass-ont#")
STANDARD = Namespace("http://www.i2pm.net/standard-pass-ont#")
BASE     = Namespace("http://subjective-me.jimdo.com/s-bpm/processmodels/2025-03-25/Page-1#")

def normalize_name(name: str) -> str:
    import unicodedata
    name = unicodedata.normalize("NFKC", name)
    name = re.sub(r'\s+', ' ', name.strip().lower().rstrip(':'))
    name = name.replace("–", "-").replace("—", "-")
    return name

def sid_to_pass_owl(llama_text: str,
                    model_label: str = "PASS_Model",
                    out_file: Optional[str] = None) -> str:

    # STEP 1: Parse SID lines
    subjects = set()
    sid_lines = []
    sid_mode = False
    for ln in llama_text.splitlines():
        ln = ln.rstrip()
        print(ln)
        
        if re.search(r"###\s*(SID|Subject Interaction Diagram)", ln, re.IGNORECASE):
            sid_mode = True
            print("side_mode")
            continue
            
        if sid_mode and re.match(r"###\s+\w+", ln) and "SID" not in ln and "Subject Interaction Diagram" not in ln:
            break
            
        
                
        if sid_mode:
            ln_stripped = ln.lstrip()  # remove leading spaces
            if re.match(r"\d+\.", ln_stripped):
                sid_lines.append(ln_stripped)
                print(sid_lines)
                m = re.match(r"\d+\.\s*(.+?)\s*->\s*(.+?):", ln_stripped)
                if m:
                    sender, receiver = m.groups()
                    subjects.add(sender.strip())
                    subjects.add(receiver.strip())
                
    subjects = list(subjects)  
    print("SID lines:", sid_lines)
    print("Extracted subjects:", subjects)
    
    # STEP 2: Build RDF graph
    g = Graph(base=BASE)
    g.bind("abstract-pass-ont", ABSTRACT)
    g.bind("standard-pass-ont", STANDARD)
    g.bind("owl", OWL)
    g.bind("rdfs", RDFS)
    g.bind("xsd", XSD)

    model_uri = BASE[model_label]
    g.add((model_uri, RDF.type, STANDARD.PASSProcessModel))
    g.add((model_uri, STANDARD.hasModelComponentID, Literal(f"{model_uri}#Model", datatype=XSD.string)))
    g.add((model_uri, STANDARD.hasModelComponentLabel, Literal(model_label, lang="en")))

    sid_layer = BASE["SID_1"]
    g.add((sid_layer, RDF.type, ABSTRACT.ModelLayer))
    g.add((sid_layer, STANDARD.hasModelComponentID, Literal("SID_1", datatype=XSD.string)))
    g.add((sid_layer, STANDARD.hasModelComponentLabel, Literal("SID_1", lang="en")))
    g.add((sid_layer, STANDARD.hasPriorityNumber, Literal(1, datatype=XSD.positiveInteger)))
    g.add((model_uri, STANDARD.contains, sid_layer))

    # STEP 3: Subjects
    subj_id_map = {}
    for idx, subj_label in enumerate(subjects, start=2):
        sid = f"SID_1_FullySpecifiedSubject_{idx}"
        subj_uri = BASE[sid]
        subj_id_map[normalize_name(subj_label)] = subj_uri  # store normalized
        print(subj_id_map[normalize_name(subj_label)])

        g.add((subj_uri, RDF.type, STANDARD.FullySpecifiedSubject))
        g.add((subj_uri, STANDARD.hasModelComponentID, Literal(sid, datatype=XSD.string)))
        g.add((subj_uri, STANDARD.hasModelComponentLabel, Literal(subj_label, lang="en")))
        g.add((subj_uri, STANDARD.hasMaximumSubjectInstanceRestriction, Literal(1, datatype=XSD.integer)))
        g.add((subj_uri, ABSTRACT.hasExecutionCostPerHour, Literal(0.0, datatype=XSD.double)))

        g.add((sid_layer, STANDARD.contains, subj_uri))
        g.add((model_uri, STANDARD.contains, subj_uri))

    # STEP 4: Process SID messages
    mel_counter = 1
    msg_counter = 1
    for line in sid_lines:
        m = re.match(r"\d+\.\s*(.+?)\s*->\s*(.+?):\s*(.+)", line)
        if not m:
            continue
        sender, receiver, msg = m.groups()
        sender_uri   = subj_id_map[normalize_name(sender)]
        receiver_uri = subj_id_map[normalize_name(receiver)]

        msg_spec_id = f"SID_1_MessageSpecification_{msg_counter}"
        msg_spec_uri = BASE[msg_spec_id]
        g.add((msg_spec_uri, RDF.type, STANDARD.MessageSpecification))
        g.add((msg_spec_uri, STANDARD.hasModelComponentID, Literal(msg_spec_id, datatype=XSD.string)))
        g.add((msg_spec_uri, STANDARD.hasModelComponentLabel, Literal(msg, lang="en")))

        payload_id = f"PayloadDefinition_of_{msg_spec_id}"
        payload_uri = BASE[payload_id]
        g.add((payload_uri, RDF.type, OWL.Class))
        g.add((msg_spec_uri, STANDARD.containsPayloadDescription, payload_uri))

        mel_id = f"MessageExchangeList_on_SID_1_StandardMessageConnector_{mel_counter}"
        mel_uri = BASE[mel_id]
        conn_id = f"SID_1_StandardMessageConnector_{mel_counter}"
        conn_uri = BASE[conn_id]

        g.add((mel_uri, RDF.type, STANDARD.MessageExchangeList))
        g.add((mel_uri, STANDARD.hasModelComponentID, Literal(mel_id, datatype=XSD.string)))
        g.add((mel_uri, STANDARD.hasModelComponentLabel, Literal(conn_id, lang="en")))
        g.add((mel_uri, STANDARD.contains, msg_spec_uri))

        g.add((conn_uri, RDF.type, STANDARD.StandardMessageConnector))
        g.add((conn_uri, STANDARD.hasSender, sender_uri))
        g.add((conn_uri, STANDARD.hasReceiver, receiver_uri))
        g.add((conn_uri, STANDARD.hasMessageType, msg_spec_uri))
        g.add((mel_uri, STANDARD.contains, conn_uri))

        for parent in (sid_layer, model_uri):
            g.add((parent, STANDARD.contains, mel_uri))
            g.add((parent, STANDARD.contains, msg_spec_uri))
            g.add((parent, STANDARD.contains, conn_uri))

        mel_counter += 1
        msg_counter += 1  
    
    # === Detect ### SBD section first ===
    # STEP 5: Parse SBD section
    sbd_text_lines = []
    in_sbd_section = False
    for ln in llama_text.splitlines():
        ln_strip = ln.strip()
        if ln_strip.startswith("### SBD") or ln_strip.startswith("### Subject Behavior Diagram"):
            in_sbd_section = True
            continue
        if in_sbd_section and ln_strip.startswith("### "):
            break
        if in_sbd_section:
            sbd_text_lines.append(ln)

    sbd_sections = {}
    current_subject = None
    current_lines = []

    for ln in sbd_text_lines:
        ln_strip = ln.strip()
        if ln_strip.startswith("#### "):
            if current_subject and current_lines:
                sbd_sections[current_subject] = current_lines
            current_subject = ln_strip.replace("####", "").replace(":", "").strip()
            current_lines = []
        elif current_subject:
            current_lines.append(ln)
    if current_subject and current_lines:
        sbd_sections[current_subject] = current_lines


    # STEP 6: LOOP through EACH SBD section
    sbd_index = 1
    for sbd_name, sbd_lines in sbd_sections.items():
        subj_uri = subj_id_map.get(normalize_name(sbd_name))
        if not subj_uri:
            print(f"Warning: No matching FullySpecifiedSubject for {sbd_name}, skipping SBD")
            continue

        subj_sid_idx = subj_uri.split("_")[-1]
        sbd_id = f"SBD_{sbd_index}_SID_1_FullySpecifiedSubject_{subj_sid_idx}"
        sbd_uri = BASE[sbd_id]
        g.add((sbd_uri, RDF.type, STANDARD.SubjectBehavior))
        g.add((sbd_uri, STANDARD.hasModelComponentID, Literal(sbd_id, datatype=XSD.string)))
        g.add((sbd_uri, STANDARD.hasModelComponentLabel, Literal(f"SBD: {sbd_name}", lang="en")))
        g.add((sbd_uri, STANDARD.hasPriorityNumber, Literal(sbd_index, datatype=XSD.positiveInteger)))
        g.add((subj_uri, STANDARD.hasBehavior, sbd_uri))

        # --- Parse numbered states ---
        sbd_blocks = []
        current_block = []
        for ln in sbd_lines:
            if re.match(r"\d+\.", ln.strip()):
                if current_block:
                    sbd_blocks.append(current_block)
                current_block = [ln.strip()]
            else:
                if current_block:
                    current_block.append(ln.strip())
        if current_block:
            sbd_blocks.append(current_block)

        # --- Build lookup ---
        state_lookup = {}
        connections = []

        for block in sbd_blocks:
            m = re.match(r"(\d+)\.\s*(\w+State|GotoStep):\s*(.*)", block[0])
            if not m:
                continue
            step_num, state_type, label = m.groups()
            step_num = int(step_num)

            desc_val, to_val, from_val, msg_val = None, None, None, None
            next_steps = []
            branches = []
            choices = []
            goto_target = None

            in_choices = False
            in_branches = False

            for line in block[1:]:
                if m_desc := re.match(r"Description:\s*(.*)", line):
                    desc_val = m_desc.group(1).strip()
                if m_to := re.match(r"To:\s*(.*)", line):
                    to_val = m_to.group(1).strip()
                if m_from := re.match(r"From:\s*(.*)", line):
                    from_val = m_from.group(1).strip()
                if m_msg := re.match(r"Msg:\s*(.*)", line):
                    msg_val = m_msg.group(1).strip()
                if m_next := re.match(r"Next:\s*(\d+)", line):
                    next_steps.append(int(m_next.group(1)))
                if m_goto := re.match(r"GotoStep:\s*(\d+)", line):
                    goto_target = int(m_goto.group(1))

                # Choices start
                if line.startswith("Choices:"):
                    in_choices = True
                    continue
                if in_choices and line.startswith("Branches:"):
                    in_choices = False
                if in_choices and line.strip().startswith("-"):
                    # - From: ...  (followed by Msg & Next)
                    choice_from, choice_msg, choice_next = None, None, None
                    if m_from2 := re.match(r"-\s*From:\s*(.*)", line):
                        choice_from = m_from2.group(1).strip()
                    elif m_msg2 := re.match(r"Msg:\s*(.*)", line):
                        choice_msg = m_msg2.group(1).strip()
                    elif m_next2 := re.match(r"Next:\s*(\d+)", line):
                        choice_next = int(m_next2.group(1))
                        choices.append((choice_from, choice_msg, choice_next))

                # Branches start
                if line.startswith("Branches:"):
                    in_branches = True
                    continue
                if in_branches and line.strip().startswith("-"):
                    if m_step := re.match(r"-\s*Step:\s*(\d+)", line):
                        branches.append(int(m_step.group(1)))

            state_lookup[step_num] = {
                "type": state_type,
                "label": label,
                "desc": desc_val,
                "to": to_val,
                "from": from_val,
                "msg": msg_val
            }

            for n in next_steps:
                connections.append((step_num, n, None, None))
            for b in branches:
                connections.append((step_num, b, None, None))
            for cf, cm, cn in choices:
                connections.append((step_num, cn, cf, cm))
            if goto_target:
                connections.append((step_num, goto_target, None, None))
                

            # --- Create the state individual ---
            state_id = f"SBD_{sbd_index}_{state_type}_{step_num}"
            state_uri = BASE[state_id]
            g.add((state_uri, RDF.type, STANDARD[state_type]))
            g.add((state_uri, STANDARD.hasModelComponentID, Literal(state_id, datatype=XSD.string)))
            g.add((state_uri, STANDARD.hasModelComponentLabel, Literal(label, lang="en")))
            g.add((sbd_uri, STANDARD.contains, state_uri))

        # --- Create transitions ---
        transition_counter = 1
        for source_step, target_step, choice_from, choice_msg in connections:
            src_info = state_lookup[source_step]
            tgt_info = state_lookup[target_step]

            trans_id = f"SBD_{sbd_index}_{src_info['type']}Transition_{transition_counter}"
            trans_uri = BASE[trans_id]

            if src_info['type'] == "SendState":
                trans_type = STANDARD.SendTransition
                trans_label = f"To: {src_info['to']}\nMsg: {src_info['msg']}"
            elif src_info['type'] == "ReceiveState":
                trans_type = STANDARD.ReceiveTransition
                if choice_from or choice_msg:
                    trans_label = f"From: {choice_from}\nMsg: {choice_msg}"
                else:
                    trans_label = f"From: {src_info['from']}\nMsg: {src_info['msg']}"
            elif src_info['type'] == "GotoStep":
                trans_type = STANDARD.DoTransition
                trans_label = f"Go to step {target_step}"
            else:
                trans_type = STANDARD.DoTransition
                trans_label = src_info['desc'] or "Continue Process"

            g.add((trans_uri, RDF.type, trans_type))
            g.add((trans_uri, STANDARD.hasModelComponentID, Literal(trans_id, datatype=XSD.string)))
            g.add((trans_uri, STANDARD.hasModelComponentLabel, Literal(trans_label, lang="en")))
            g.add((trans_uri, STANDARD.hasSourceState, BASE[f"SBD_{sbd_index}_{src_info['type']}_{source_step}"]))
            g.add((trans_uri, STANDARD.hasTargetState, BASE[f"SBD_{sbd_index}_{tgt_info['type']}_{target_step}"]))
            g.add((sbd_uri, STANDARD.contains, trans_uri))
            transition_counter += 1

        g.add((sid_layer, STANDARD.contains, sbd_uri))
        g.add((model_uri, STANDARD.contains, sbd_uri))
        sbd_index += 1
        
    # example class
    dm_class = BASE["VisioShapesInternalDataMappingFunction"]
    g.add((dm_class, RDF.type, OWL.Class))
    g.add((dm_class, RDFS.subClassOf, STANDARD.DataMappingFunction))

    # STEP 7: Serialize RDF/XML
    xml_body = g.serialize(format="application/rdf+xml")

    entities = dedent("""\
        <!DOCTYPE rdf:RDF [
            <!ENTITY owl "http://www.w3.org/2002/07/owl#" >
            <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
            <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
            <!ENTITY abstract-pass-ont "http://www.imi.kit.edu/abstract-pass-ont#" >
            <!ENTITY standard-pass-ont "http://www.i2pm.net/standard-pass-ont#" >
            <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
        ]>
    """)
    xml_body_nohead = "\n".join(xml_body.splitlines()[1:])
    final_xml = f'<?xml version="1.0"?>\n{entities}\n{xml_body_nohead}'

    if out_file:
        with open(out_file, "w", encoding="utf-8") as f:
            f.write(final_xml)

    return final_xml

In [None]:
llama_block = """
        ### Subject Interaction Diagram (SID):

        1. Customer -> E-commerce Website: Order
        2. E-commerce Website -> Inventory System: Inventory Check Request
        3. Inventory System -> E-commerce Website: Inventory Check Response
        4. E-commerce Website -> Customer: Order Confirmation Request
        5. Customer -> Payment Gateway: Payment
        6. Payment Gateway -> E-commerce Website: Payment Confirmation
        7. E-commerce Website -> Customer: Order Confirmation Email
        8. E-commerce Website -> Customer: Order Declination Message
        9. Customer -> E-commerce Website: Order Cancellation Request
        10. Customer -> Another E-commerce Website: Order

        ### Subject Behavior Diagram (SBD):

        #### Customer:
        1. StartState: Decide to place an order
           Description: Customer initiates order process
           Next: 2
        2. SendState: Send Order to E-commerce Website
           To: E-commerce Website
           Msg: Order
           Next: 3
        3. ReceiveState: Receive Response from E-commerce Website
           Choices:
             - From: E-commerce Website
               Msg: Order Confirmation Request
               Next: 4
             - From: E-commerce Website
               Msg: Order Declination Message
               Next: 5
        4. SendState: Send Payment to Payment Gateway
           To: Payment Gateway
           Msg: Payment
           Next: 6
        5. DoState: Decide upon further action
           Description: Customer decides what to do next
           Branches:
             - Step: 7
               Description: Cancel order
             - Step: 10
               Description: Choose another e-commerce supplier
        6. ReceiveState: Receive Payment Confirmation from Payment Gateway
           From: Payment Gateway
           Msg: Payment Confirmation
           Next: 8
        7. SendState: Send Order Cancellation Request to E-commerce Website
           To: E-commerce Website
           Msg: Order Cancellation Request
           Next: 9
        8. ReceiveState: Receive Order Confirmation Email from E-commerce Website
           From: E-commerce Website
           Msg: Order Confirmation Email
           Next: 11
        9. EndState: Order Cancelled
        10. GotoStep: 2  # Retry order loop
        11. EndState: Order Confirmed and Paid

        #### E-commerce Website:
        1. StartState: Receive Order from Customer
           Description: E-commerce website initiates order processing
           Next: 2
        2. SendState: Send Inventory Check Request to Inventory System
           To: Inventory System
           Msg: Inventory Check Request
           Next: 3
        3. ReceiveState: Receive Inventory Check Response from Inventory System
           From: Inventory System
           Msg: Inventory Check Response
           Choices:
             - From: Inventory System
               Msg: Item in Stock
               Next: 4
             - From: Inventory System
               Msg: Item Not in Stock
               Next: 5
        4. SendState: Send Order Confirmation Request to Customer
           To: Customer
           Msg: Order Confirmation Request
           Next: 6
        5. SendState: Send Order Declination Message to Customer
           To: Customer
           Msg: Order Declination Message
           Next: 7
        6. ReceiveState: Receive Payment Confirmation from Payment Gateway
           From: Payment Gateway
           Msg: Payment Confirmation
           Next: 8
        7. EndState: Order Declined
        8. SendState: Send Order Confirmation Email to Customer
           To: Customer
           Msg: Order Confirmation Email
           Next: 9
        9. EndState: Order Confirmed and Paid

        #### Inventory System:
        1. StartState: Receive Inventory Check Request from E-commerce Website
           Description: Inventory system initiates inventory check
           Next: 2
        2. DoState: Check Inventory
           Description: Inventory system checks item availability
           Branches:
             - Step: 3
               Description: Item in stock
             - Step: 4
               Description: Item not in stock
        3. SendState: Send Item in Stock Response to E-commerce Website
           To: E-commerce Website
           Msg: Item in Stock
           Next: 5
        4. SendState: Send Item Not in Stock Response to E-commerce Website
           To: E-commerce Website
           Msg: Item Not in Stock
           Next: 5
        5. EndState: Inventory Check Complete

        #### Payment Gateway:
        1. StartState: Receive Payment from Customer
           Description: Payment gateway initiates payment processing
           Next: 2
        2. DoState: Process Payment
           Description: Payment gateway processes payment
           Branches:
             - Step: 3
               Description: Payment successful
             - Step: 4
               Description: Payment failed
        3. SendState: Send Payment Confirmation to E-commerce Website
           To: E-commerce Website
           Msg: Payment Confirmation
           Next: 5
        4. SendState: Send Payment Declination to E-commerce Website
           To: E-commerce Website
           Msg: Payment Declination
           Next: 5
        5. EndState: Payment Processing Complete

        #### Another E-commerce Website:
        1. StartState: Receive Order from Customer
           Description: Another e-commerce website initiates order processing
           Next: 2
        2. SendState: Send Order Confirmation Request to Customer
           To: Customer
           Msg: Order Confirmation Request
           Next: 3
        3. ReceiveState: Receive Payment from Customer
           From: Customer
           Msg: Payment
           Next: 4
        4. SendState: Send Order Confirmation Email to Customer
           To: Customer
           Msg: Order Confirmation Email
           Next: 5
        5. EndState: Order Confirmed and Paid
        """

xml = sid_to_pass_owl(llama_block, out_file="E:\Thesis\PASS Diagrams\OWL_File\Llama-ecommerce.owl")