In [5]:
import json  
import os  
from typing import TypedDict, Annotated, Sequence, List, Optional  
import operator 

from langchain_openai import ChatOpenAI  
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage  
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate  
from langchain_core.output_parsers import PydanticOutputParser, JsonOutputParser  

from pydantic import BaseModel, Field

from langgraph.graph import StateGraph, END 

from dotenv import load_dotenv, find_dotenv
from datasets import load_dataset


In [20]:
REPO_ID = os.getenv("HUGGINGFACE_REPO_ID")
FILENAME = "data.json"

In [21]:
dataset = load_dataset(REPO_ID, data_files="data.json")

Generating train split: 15 examples [00:00, 1744.33 examples/s]


# ⚙️ CONFIGURATIONS

Set the following variables to control which process description is used in the dataset:

- `PROCESS_DESCRIPTION_ID`  
  The index of the process description to use (e.g., `0` for the first entry).

- `PROCESS_DESCRIPTION_TYPE`  
  Choose between `"normal_description"` or `"enhanced_description"` depending on the desired level of detail.

### 🧪 Example:
```python
PROCESS_DESCRIPTION_ID = 0
PROCESS_DESCRIPTION_TYPE = "normal_description"  # or "enhanced_description"
```

In [23]:

PROCESS_DESCRIPTION_ID = 0
PROCESS_DESCRIPTION_TYPE = "normal_description" 
PROCESS_DESCRIPTION = dataset["train"][PROCESS_DESCRIPTION_ID][PROCESS_DESCRIPTION_TYPE]

In [None]:
# LocalAI setup
OPENAI_API_BASE=os.getenv("OPENAI_API_BASE")
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")

In [None]:
load_dotenv(find_dotenv())

os.environ["LANGCHAIN_API_KEY"] = str(os.getenv("LANGCHAIN_API_KEY"))

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2", "true")
os.environ["LANGCHAIN_ENDPOINT"] = os.getenv("LANGCHAIN_ENDPOINT")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

In [None]:
# Open source LLM
llm = ChatOpenAI(temperature=0,  model_name="dev-llama-3-large", max_tokens=None)

In [5]:
# Gpt-4o
llm = ChatOpenAI(temperature=0, model_name="gpt-4o", max_tokens=None)

In [5]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]
    process_description: Annotated[Sequence[BaseMessage], operator.add]
    process_modell: Annotated[Sequence[BaseMessage], operator.add]

In [7]:
def modell(state):
    
    modell_prompt = """
    Objective: My task is to extract the essential elements of a Petri net from the provided text. I will not make the mistakes mentioned in the Examples which are not valid and not allowed to generate: section.

    Background: I understand that a Petri net graphically represents a system's structure and behavior, consisting of nodes termed Places and Transitions, connected via directed edges. It's crucial for every Petri net to be strongly connected, meaning any pair of nodes can be linked by a sequence of directed edges. In Petri nets, concurrency represents simultaneous transitions, indicating parallel processes. I understand that a Petri net always starts with a place and ends with a place. A cycle in a Petri net is a closed sequence of nodes and directed edges where the start and end node is the same, and all nodes and edges in the sequence are distinct (except for the first and last node). This typically involves a sequence of Places and Transitions that repeat, allowing for the representation of recurring processes or feedback loops within the system.

    Process:

    Concurrency Detection:

    I will quickly spot signs of concurrency.
    I need to recognize transitions that depict parallel activities.
    I will apply concurrency as necessary.
    I will ensure that branching occurs at a transition and merging concludes at a subsequent transition.
    If concurrency is detected, intermediate steps are allowed and can be added.
    Cycle Detection:

    If a cycle is detected, I must ensure that no deadlock occurs.
    I will carefully analyze the process structure to identify feedback loops or repeated paths.
    Upon detecting a cycle, I will check if appropriate control mechanisms are in place to prevent infinite repetition or blocking.
    I will ensure that there are clear conditions for entering and exiting the cycle to maintain process integrity.
    Title Determination:

    I will deduce an appropriate title for the Petri net based on its process representation.
    Place Identification:

    I will detail all places and label them in past participle format with succinct descriptions.
    Places with multiple outgoing edges will hint at process alternatives for me.
    If a place (Object Store) has multiple outgoing edges, it signifies alternatives representing a XOR-Split for me. I must ensure these paths reconverge at a subsequent place, representing an XOR-Join.
    Transition Identification:

    I will chronicle all transitions, naming them and providing concise details.
    I need to highlight transitions with multiple outgoing edges, as these indicate concurrency.
    If a transition (Activity) has multiple outgoing edges, it suggests concurrency representing a AND-Split to me. I must confirm that these concurrent paths merge at a subsequent transition, denoting an AND-Join.
    I will ensure that a transition is not the end of the process, as this would result in a deadlock.
    Edge Analysis:

    I will delve into all edges, expounding on their direction and the nodes they interlink.
    It's vital for me to ensure the Petri net remains strongly connected, integrating concurrency from Step 1.
    The same XOR-Join and AND-Join considerations as earlier apply here.
    Additional Constraints:

    I will adhere to the rules that branching transitions due to concurrency should merge at another transition (AND-Join), and branching paths from a place should converge at another place (XOR-Join).
    Output Format:

    Concurrency Detected: I will list the names of transitions linked with concurrency and associated places.
    Cycle Detected: I will list the names of transitions and places involved in the cycle.
    Places: Format - Place Name: Description.
    Transitions: Format - Transition Name: Description.
    Edges: Format - Transition --> Place OR Place --> Transition.
    Example Valid Output:

    Places: P1: Order Placed - A customer places an order for a book online. P2: Order Received - The bookstore staff receives the order.

    Transitions: T1: Place Order - The customer visits the online bookstore and places an order.

    Edges: P1 --> T1 (Valid because starts at Place), T1 --> P2 (Valid because ends at Place)

    Examples which are not valid and not allowed to generate: Example Output for Net with Errors. Reason Place to Place **Places: P1: Order Placed - A customer places an order for a book online. P2: Order Received - The bookstore staff receives the order.

    Transitions: T1: Place Order - The customer visits the online bookstore and places an order.

    Edges: P1 --> P1, (Error: Place to Place) T1 --> P2

    Example Output for Net with Errors. Reason Transition to Transition Places: P1: Order Placed - A customer places an order for a book online. P2: Order Received - The bookstore staff receives the order.

    Transitions: T1: Place Order - The customer visits the online bookstore and places an order. T2: Process Order - The bookstore staff receives the order. Edges: P1 --> T1, T1 --> T2 (Error: Transition to Transition)

    Example Output for Net with Errors. Reason Net does not start with Place Places: P1: Order Placed - A customer places an order for a book online. P2: Order Received - The bookstore staff receives the order.

    Transitions: T1: Place Order - The customer visits the online bookstore and places an order. T2: Process Order - The bookstore staff receives the order. Edges: T1 --> P1, (Error: Net starts with Transition) P1 --> T2,

    Example Output for Net with Errors. Reason Net Ending at a Transition: Places: P1: Order Placed - A customer places an order for a book online. P2: Order Received - The bookstore staff receives the order.

    Transitions: T1: Place Order - The customer visits the online bookstore and places an order. T2: Process Order - The bookstore staff receives the order. Edges: P1 --> T1, T1 --> P2, P2 --> T2 (Error: Net ends with Transition)

    Example Output for Net with Errors. Split at Place not Joined at Place: Places: P1: Order Placed - A customer places an order for a book online. P2: Order Received - The bookstore staff receives the order. P3: Order Processed - The order is being processed by the bookstore staff. P4: Order Verified - The bookstore staff verifies the order details. P5: Book Located - The book is located in the bookstore's inventory.

    Transitions: T1: Place Order - The customer visits the online bookstore and places an order. T2: Receive Order - The bookstore staff receives the order and begins processing. T3: Verify Order - The bookstore staff verifies the order details. T4: Locate Book - The bookstore staff locates the book in their inventory.

    Edges: P1 --> T1, T1 --> P2, P2 --> T2, P2 --> T3, T2 --> P3, T3 --> P4, P3 --> T4, P4 --> T4 (Split at P4 not joined at Place), T4 --> P5

    Example Output for Net with Errors. Split at Transition not Joined at Transition: Places: P1: Order Placed - A customer places an order for a book online. P2: Order Received - The bookstore staff receives the order. P3: Order Processed - The order is being processed by the bookstore staff. P4: Order Verified - The bookstore staff verifies the order details. P5: Book Located - The book is located in the bookstore's inventory.

    Transitions: T1: Place Order - The customer visits the online bookstore and places an order. T2: Receive Order - The bookstore staff receives the order and begins processing. T3: Verify Order - The bookstore staff verifies the order details. T4: Locate Book - The bookstore staff locates the book in their inventory.

    Edges: P1 --> T1, T1 --> P2, T1 --> P3, P2 --> T2, P3 --> T3, T2 --> P4 (Split at T1 not joined at Transition), T3 --> P4 (Split at T1 not joined at Transition), P4 --> T4, T4 --> P5

    Note:

    I will depend strictly on the text’s content. I am aware that various places might tie to a single transition and vice versa.
    Every place and transition should have a minimum of one connecting edge.
    """
    
    process = state["process_description"]
    process_description = process[-1]
    
    message = [
    SystemMessage(content=modell_prompt),
    HumanMessage(content=process_description)]
    
    response = llm.invoke(message).content
    
    return {"process_modell": [response]}
    
    

In [None]:
class Name(BaseModel):
    de: str = Field(description="The name in German")
    en: str = Field(description="The name in English")

class Element(BaseModel):
    name: Name = Field(description="The name of the element in different languages")
    self: int = Field(description="The unique identifier of the element, start with 1")
    type: str = Field(description="The type of the element", enum=["Activity", "Object Store"])

class Edge(BaseModel):
    from_: int = Field(alias="from", description="The ID of the source element")
    to: int = Field(description="The ID of the target element")

class PetriNetTransition(BaseModel):
    elements: List[Element] = Field(description="Elements in the Petri net")
    edges: List[Edge] = Field(description="Edges connecting elements in the Petri net")

def call_pydantic_output_parser(state):
    modell = state["process_modell"]
    process_modell = modell[-1]
    
    prompt = ChatPromptTemplate.from_messages([
        ("system", """
        I am a world class Petri net designer and extract the data for a Petri net in structured formats. I use the information provided named Transitions, Places and Edges to create a Petri net. 
        Transitions and Places will be labeled as 'Activity' and 'Object Store' respectively, for straightforward identification. If the first element is a transition (or Activity), I have to add a place at the beginning and connect it to the transition correctly.
        
        {format_instructions}

        Additional Information
        Tip:
        
        use descriptive names
        Tip:

        Make sure to answer in the correct format
        """),
        ("human", "{input}")
    ])

    parser = JsonOutputParser(pydantic_object=PetriNetTransition)

    chain = prompt | llm | parser
    
    result = chain.invoke({
        "input": process_modell,
        "format_instructions": parser.get_format_instructions()
    })
    
    with open('process_description.json', 'w') as json_file:
        json.dump(result, json_file, indent=4, ensure_ascii=False)

    print("The JSON file was successfully created and saved.")
    
    return result

In [9]:
from langgraph.graph import StateGraph, END

workflow = StateGraph(AgentState)

workflow.add_node("Modeller", modell)
workflow.add_node("OutputParser", call_pydantic_output_parser)

workflow.add_edge('Modeller', 'OutputParser')

workflow.set_entry_point("Modeller")

workflow.set_finish_point("OutputParser")

graph = workflow.compile()

In [None]:
user_input = {"process_description": [PROCESS_DESCRIPTION]}

for output in graph.stream(user_input):
    for key, value in output.items():
        print(f"Output from node '{key}':")
        print("---")
        print(value)
    print("\n---\n")