In [1]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain_core.example_selectors import LengthBasedExampleSelector
from langchain_core.output_parsers import StrOutputParser
import asyncio
import os
import inspect
import logging
from lightrag import LightRAG, QueryParam
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
from lightrag.utils import EmbeddingFunc
import json
import re
WORKING_DIR = "/home/tttung/Khiem/thesis/1740930358"

In [2]:
import nest_asyncio
nest_asyncio.apply()

llm = ChatOllama(model = 'phi4', temperature=0)
rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=ollama_model_complete,
    llm_model_name='phi4',
    llm_model_max_async=4,
    llm_model_max_token_size=32768,
    llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}},
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
        max_token_size=8192,
        func=lambda texts: ollama_embed(
            texts, embed_model="nomic-embed-text", host="http://localhost:11434"
        ),
    ),

)
with open("/home/tttung/Khiem/thesis/1740930358/cooking_procedure.txt", "r", encoding="utf-8") as f:
    rag.insert(f.read())


  from .autonotebook import tqdm as notebook_tqdm
INFO:nano-vectordb:Load (0, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': '/home/tttung/Khiem/thesis/1740930358/vdb_entities.json'} 0 data
INFO:nano-vectordb:Load (1, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': '/home/tttung/Khiem/thesis/1740930358/vdb_relationships.json'} 1 data
INFO:nano-vectordb:Load (1, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': '/home/tttung/Khiem/thesis/1740930358/vdb_chunks.json'} 1 data
INFO:lightrag:Loaded document status storage with 1 records
INFO:lightrag:No new unique documents were found.
INFO:lightrag:All documents have been processed or are duplicates


INFO:lightrag:Storage Initialization completed!


In [3]:
num_of_steps = rag.query("How many steps in the document, what are they? just tell the number of steps and list name of steps with its number, no details", 
                             param=QueryParam(mode="global"))
num_of_steps

INFO:lightrag:Non-embedding cached missed(mode:global type:query)
INFO:lightrag:Non-embedding cached missed(mode:global type:keywords)
INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query edges: Document structure, Steps identification, top_k: 60, cosine: 0.2
INFO:lightrag:Global query uses 2 entites, 1 relations, 1 chunks
INFO:lightrag:Inserting 1 to llm_response_cache


'The document outlines 8 steps for cooking pasta:\n\n1. Boil the Water  \n2. Add Salt  \n3. Drop in the Pasta  \n4. Cook Until Al Dente  \n5. Save Some Pasta Water  \n6. Drain the Pasta  \n7. Mix with Sauce  \n8. Serve and Enjoy  '

In [4]:
few_shot_examples = [
    {
        "input": "Step 1: Prepare Ingredients\nStep 2: Cook Meat\nStep 3: Serve Dish",
        "output": """
            subgraph cluster_1 {{ label="Prepare Ingredients" }}
            subgraph cluster_2 {{ label="Cook Meat" }}
            subgraph cluster_3 {{ label="Serve Dish" }}
        """
    },
    {
        "input": "Preheat Oven\nMix Batter\nBake Cake",
        "output": """
            subgraph cluster_1 {{ label="Preheat Oven" }}
            subgraph cluster_2 {{ label="Mix Batter" }}
            subgraph cluster_3 {{ label="Bake Cake" }}
        """
    }
]

example_template = """
Input steps:
{input}

Output DOT code:
{output}
"""

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template=example_template
)

In [5]:
cot_instructions = """
You are tasked with converting a list of procedural steps into DOT code for a flowchart. 
Each step should become a subgraph with a descriptive label. Follow these steps:

1. Read the list of steps provided.
2. For each step, create a subgraph in DOT syntax (e.g., 'subgraph cluster_X {{ label="Step Name" }}').
3. Number the subgraphs sequentially (cluster_1, cluster_2, etc.).
4. Ensure the output is valid DOT code that can be rendered as a flowchart.

Now, process the following steps and output the DOT code:
{steps}
"""

In [6]:
few_shot_prompt = FewShotPromptTemplate(
    examples=few_shot_examples,
    example_prompt=example_prompt,
    prefix="Here are some examples of converting steps to DOT code:\n",
    suffix=cot_instructions,
    input_variables=["steps"],
    example_separator="\n---\n"
)

In [7]:
# Use num_of_steps directly as the input
final_prompt = few_shot_prompt.format(steps=num_of_steps)
dot_code_output = llm.invoke(final_prompt)
dot_code_output_step1 = dot_code_output.content
# Print the result
print("Generated DOT code:\n", dot_code_output.content)


Generated DOT code:
 To convert the given list of procedural steps into DOT code for a flowchart, we will follow these instructions:

1. **Read the List of Steps**: We have 8 steps provided in the task.
2. **Create Subgraphs**: For each step, create a subgraph using the DOT syntax `subgraph cluster_X { label="Step Name" }`.
3. **Number the Subgraphs Sequentially**: Start numbering from `cluster_1` and increment for each subsequent step.

Here is the conversion of the steps into DOT code:

```dot
subgraph cluster_1 { label="Boil the Water" }
subgraph cluster_2 { label="Add Salt" }
subgraph cluster_3 { label="Drop in the Pasta" }
subgraph cluster_4 { label="Cook Until Al Dente" }
subgraph cluster_5 { label="Save Some Pasta Water" }
subgraph cluster_6 { label="Drain the Pasta" }
subgraph cluster_7 { label="Mix with Sauce" }
subgraph cluster_8 { label="Serve and Enjoy" }
```

This DOT code represents each step as a subgraph, which can be rendered into a flowchart. Each subgraph is labeled 

## Step 2

In [8]:
step_lines = num_of_steps.strip().split("\n")[1:]  # Skip intro line
step_names = [line.strip() for line in step_lines if line.strip()]
step_names

['1. Boil the Water',
 '2. Add Salt',
 '3. Drop in the Pasta',
 '4. Cook Until Al Dente',
 '5. Save Some Pasta Water',
 '6. Drain the Pasta',
 '7. Mix with Sauce',
 '8. Serve and Enjoy']

In [9]:
step_details = {}

for step in step_names:
    query_step2 = (
        f"For the step '{step}' in the document, identify: "
        "1. The actor (who or what performs the action, if specified), "
        "2. The main action, procedure, or event, etc "
        "3. The entities (nouns/objects involved, excluding the actor), "
        "4. Relevant info (conditions, details, or constraints). "
        "Return the response in this format:\n"
        "Actor: <actor>\nAction: <action>\nEntities: <entity1>, <entity2>, ...\nRelevant Info: <info>"
    )
    details = rag.query(query_step2, param=QueryParam(mode="local"))
    step_details[step] = details
    print(f"Step 2 - Details for '{step}':\n", details)

INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)
INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Actor, Main action, Entities, Relevant information, Boil water, Step, Document, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 2 - Details for '1. Boil the Water':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Step '2', Add Salt, Actor, Main action, Entities, Relevant information, Instruction document, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 2 - Details for '2. Add Salt':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Actor, Action, Entities, Relevant info, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 2 - Details for '3. Drop in the Pasta':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Al Dente, Cooking Time/Method, Pasta, Heat Source, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 2 - Details for '4. Cook Until Al Dente':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Save water, Pasta cooking, Starch, Sauce thickening, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 2 - Details for '5. Save Some Pasta Water':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Step 6, Actor, Action, Entities, Relevant Info, Pasta, Water, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 2 - Details for '6. Drain the Pasta':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Actor, Mix with Sauce, Ingredients, Sauce, Bowl, Utensils, Temperature, Duration, Order of Mixing, Types of Sauce, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 2 - Details for '7. Mix with Sauce':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Actor, Main Action/Procedure, Entities, Relevant Information, top_k: 60, cosine: 0.2


Step 2 - Details for '8. Serve and Enjoy':
 Sorry, I'm not able to provide an answer to that question.[no-context]


In [10]:
step_details

{'1. Boil the Water': "Sorry, I'm not able to provide an answer to that question.[no-context]",
 '2. Add Salt': "Sorry, I'm not able to provide an answer to that question.[no-context]",
 '3. Drop in the Pasta': "Sorry, I'm not able to provide an answer to that question.[no-context]",
 '4. Cook Until Al Dente': "Sorry, I'm not able to provide an answer to that question.[no-context]",
 '5. Save Some Pasta Water': "Sorry, I'm not able to provide an answer to that question.[no-context]",
 '6. Drain the Pasta': "Sorry, I'm not able to provide an answer to that question.[no-context]",
 '7. Mix with Sauce': "Sorry, I'm not able to provide an answer to that question.[no-context]",
 '8. Serve and Enjoy': "Sorry, I'm not able to provide an answer to that question.[no-context]"}

In [11]:
few_shot_examples = [
    {
        "input": (
            "Step 1: Brian cooks meat\n"
            "Actor: Brian\n"
            "Action: Cooks\n"
            "Entities: Meat\n"
            "Relevant Info: On medium heat"
        ),
        "output": """
            'subgraph cluster_1 {{ label="Brian cooks meat";\n'
            '  actor_1 [label="Brian"];\n'
            '  action_1 [label="Cooks"];\n'
            '  entity_1 [label="Meat"];\n'
            '  info_1 [label="On medium heat"];\n'
            '}}'
        """
    },
    {
        "input": (
            "Step 2: Boil the water\n"
            "Actor: None\n"
            "Action: Boil\n"
            "Entities: Water\n"
            "Relevant Info: High heat"
        ),
        "output": """
            'subgraph cluster_2 {{ label="Boil the water";\n'
            '  action_2 [label="Boil"];\n'
            '  entity_2 [label="Water"];\n'
            '  info_2 [label="High heat"];\n'
            '}}'
        """
    }
]

In [12]:
example_template = """
Input step and details:
{input}

Output DOT code:
{output}
"""

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template=example_template
)

# Chain-of-thought instructions with actor
cot_instructions = """
You are converting a procedural step and its details into DOT code for a flowchart. 
Each step is a subgraph containing nodes for actor, action, entities, and relevant info. Follow these rules:
1. Read the step name and its details (Actor, Action, Entities, Relevant Info).
2. Create a subgraph with the step name as the label: 'subgraph cluster_X {{ label="Step Name"; }}'.
3. Inside the subgraph, add nodes:
   - If an actor is specified (not 'None'): 'actor_X [label="Actor"];', if not defined, skp
   - For the action: 'action_X [label="Action"];'
   - For each entity: 'entity_Y [label="Entity"];'
   - For relevant info (if not empty): 'info_Z [label="Info"];'
4. Use sequential cluster numbering (cluster_1, cluster_2, etc.) based on step order.
5. Use unique node IDs within each subgraph (e.g., actor_1, action_1, entity_1, info_1 for cluster_1).
6. Ensure double quotes around all label text and semicolons after each node.
7. Indent nodes for readability, but keep the subgraph on multiple lines.
8. Only output the dotcode, no other details

Process this step and its details and output the DOT code:
{step_details}
"""

# Combine into few-shot prompt
few_shot_prompt = FewShotPromptTemplate(
    examples=few_shot_examples,
    example_prompt=example_prompt,
    prefix="Here are examples of converting step details to DOT code with nodes:\n",
    suffix=cot_instructions,
    input_variables=["step_details"],
    example_separator="\n---\n"
)


In [13]:
dot_code_outputs = []
for i, step in enumerate(step_names, 1):
    step_input = f"{step}\n{step_details[step]}"
    final_prompt = few_shot_prompt.format(step_details=step_input)
    dot_code_output = llm.invoke(final_prompt)
    dot_code_output = dot_code_output.content
    dot_code_outputs.append(dot_code_output)
    print(f"Step 2 - Generated DOT code for '{step}':\n", dot_code_output)

# Combine all subgraphs
#combined_dot_code = "\n".join(str(dot_code_outputs))
dot_code_outputs

Step 2 - Generated DOT code for '1. Boil the Water':
 ```dot
subgraph cluster_3 { label="Boil the Water";
  action_3 [label="Boil"];
  entity_3 [label="Water"];
}
```

Explanation:

1. **Subgraph Label**: The step name "Boil the Water" is used as the subgraph label: `label="Boil the Water";`.
2. **Cluster Numbering**: This is the third step, so it uses `cluster_3`.
3. **Actor Node**: Since the actor is not specified (or is 'None'), no actor node is added.
4. **Action Node**: The action "Boil" is included as `action_3 [label="Boil"];`.
5. **Entity Node**: The entity "Water" is included as `entity_3 [label="Water"];`.
6. **Relevant Info**: There's no relevant info provided, so no info node is added.
7. **Node IDs**: Sequential numbering within the subgraph (e.g., action_3, entity_3) ensures unique identifiers for each node.

This DOT code represents the step "Boil the Water" with its specified details in a structured format suitable for inclusion in a flowchart.
Step 2 - Generated DOT co

['```dot\nsubgraph cluster_3 { label="Boil the Water";\n  action_3 [label="Boil"];\n  entity_3 [label="Water"];\n}\n```\n\nExplanation:\n\n1. **Subgraph Label**: The step name "Boil the Water" is used as the subgraph label: `label="Boil the Water";`.\n2. **Cluster Numbering**: This is the third step, so it uses `cluster_3`.\n3. **Actor Node**: Since the actor is not specified (or is \'None\'), no actor node is added.\n4. **Action Node**: The action "Boil" is included as `action_3 [label="Boil"];`.\n5. **Entity Node**: The entity "Water" is included as `entity_3 [label="Water"];`.\n6. **Relevant Info**: There\'s no relevant info provided, so no info node is added.\n7. **Node IDs**: Sequential numbering within the subgraph (e.g., action_3, entity_3) ensures unique identifiers for each node.\n\nThis DOT code represents the step "Boil the Water" with its specified details in a structured format suitable for inclusion in a flowchart.',
 '```dot\nsubgraph cluster_3 { label="Add Salt";\n  act

In [14]:
# remove \n in the string and replace with a space
dot_code_outputs = [dot_code_output.replace('\n', '  ') for dot_code_output in dot_code_outputs]
# remove "dot"
dot_code_outputs = [dot_code_output.replace("dot", "") for dot_code_output in dot_code_outputs]
dot_code_outputs


['```  subgraph cluster_3 { label="Boil the Water";    action_3 [label="Boil"];    entity_3 [label="Water"];  }  ```    Explanation:    1. **Subgraph Label**: The step name "Boil the Water" is used as the subgraph label: `label="Boil the Water";`.  2. **Cluster Numbering**: This is the third step, so it uses `cluster_3`.  3. **Actor Node**: Since the actor is not specified (or is \'None\'), no actor node is added.  4. **Action Node**: The action "Boil" is included as `action_3 [label="Boil"];`.  5. **Entity Node**: The entity "Water" is included as `entity_3 [label="Water"];`.  6. **Relevant Info**: There\'s no relevant info provided, so no info node is added.  7. **Node IDs**: Sequential numbering within the subgraph (e.g., action_3, entity_3) ensures unique identifiers for each node.    This DOT code represents the step "Boil the Water" with its specified details in a structured format suitable for inclusion in a flowchart.',
 '```  subgraph cluster_3 { label="Add Salt";    action_3 

## Step 3

In [15]:
step_relations = {}
for step in step_names:
    query_step3 = (
        f"For the step '{step}' with details:\n{step_details[step]}\n"
        "Identify the direct relationships between the actor and entities, where the action is the relationship label. "
        "Return in this format:\n"
        "From: <actor> To: <entity> Label: <action>\n"
        "If no actor, use 'We' as the default actor depends on the context. Include one line per entity."
    )
    relations = rag.query(query_step3, param=QueryParam(mode="local"))
    step_relations[step] = relations
    print(f"Step 3 - Relationships for '{step}':\n", relations)

INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)
INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Stovetop, Kettle, Pot, Heat source, Steam, Evaporation, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 3 - Relationships for '1. Boil the Water':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Salt, Cooking Instructions, Proportions/Quantities, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 3 - Relationships for '2. Add Salt':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Pasta, Cooking Step, Actor, Entities, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 3 - Relationships for '3. Drop in the Pasta':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Al Dente, Cooking Time, Boiling Water, Stirring, Pasta Types, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 3 - Relationships for '4. Cook Until Al Dente':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Pasta water, Cooking pasta, Saving liquid, Al dente texture, Starch content, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 3 - Relationships for '5. Save Some Pasta Water':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Pasta, Saucepan or Pot, Colander, Hot Water, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 3 - Relationships for '6. Drain the Pasta':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Mix, Sauce, Ingredient Combination, top_k: 60, cosine: 0.2
INFO:lightrag:Non-embedding cached missed(mode:local type:query)
INFO:lightrag:Non-embedding cached missed(mode:local type:keywords)


Step 3 - Relationships for '7. Mix with Sauce':
 Sorry, I'm not able to provide an answer to that question.[no-context]


INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query nodes: Step 8, Serve, Enjoy, Dish presentation, Food service, top_k: 60, cosine: 0.2


Step 3 - Relationships for '8. Serve and Enjoy':
 Sorry, I'm not able to provide an answer to that question.[no-context]


In [16]:
few_shot_examples = [
    {
        "input": (
            "Step 1: Brian cooks meat\n"
            "Actor: Brian\nAction: Cooks\nEntities: Meat\nRelevant Info: On medium heat\n"
            "Relationships:\n"
            "From: Brian To: Meat Label: Cooks"
        ),
        "output": """
            'subgraph cluster_1 {{ label="Brian cooks meat";\n'
            '  actor_1 [label="Brian"];\n'
            '  entity_1 [label="Meat"];\n'
            '  info_1 [label="On medium heat"];\n'
            '  actor_1 -> entity_1 [label="Cooks"];\n'
            '}}'
        """
    },
    {
        "input": (
            "Step 2: Boil the water\n"
            "Actor: None\nAction: Boil\nEntities: Water\nRelevant Info: High heat\n"
            "Relationships:\n"
            "From: Process To: Water Label: Boil"
        ),
        "output":"""
            'subgraph cluster_2 {{ label="Boil the water";\n'
            '  actor_2 [label="Process"];\n'
            '  entity_2 [label="Water"];\n'
            '  info_2 [label="High heat"];\n'
            '  actor_2 -> entity_2 [label="Boil"];\n'
            '}}'
        """
    },
    {
        "input": (
            "Step 3: Add salt and pepper\n"
            "Actor: None\nAction: Add\nEntities: Salt, Pepper\nRelevant Info: To taste\n"
            "Relationships:\n"
            "From: Process To: Salt Label: Add\n"
            "From: Process To: Pepper Label: Add"
        ),
        "output": """
            'subgraph cluster_3 {{ label="Add salt and pepper";\n'
            '  actor_3 [label="Process"];\n'
            '  entity_3_1 [label="Salt"];\n'
            '  entity_3_2 [label="Pepper"];\n'
            '  info_3 [label="To taste"];\n'
            '  actor_3 -> entity_3_1 [label="Add"];\n'
            '  actor_3 -> entity_3_2 [label="Add"];\n'
            '}}'
        """
    }
]

# Example template
example_template = """
Input step, details, and relationships:
{input}

Output DOT code with nodes and edges:
{output}
"""

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template=example_template
)

# Chain-of-thought instructions
cot_instructions = """
You are converting a procedural step, its details, and relationships into DOT code for a flowchart. 
Each step is a subgraph with nodes and directed edges. Follow these rules:
1. Read the step name, details (Actor, Action, Entities, Relevant Info), and relationships.
2. Create a subgraph: 'subgraph cluster_X {{ label="Step Name"; }}'.
3. Add nodes:
   - For Actor (use 'Process' if 'None'): 'actor_X [label="Actor"];' 
   - For each Entity: 'entity_X_Y [label="Entity"];'
   - For Relevant Info (if not empty): 'info_X [label="Info"];'
4. Add edges from relationships in the format 'From: <actor> To: <entity> Label: <action>' from Relevant Info:
   - '<actor_X> -> <entity_X_Y> [label="Action"];'
5. Use sequential numbering for clusters (cluster_1, cluster_2, etc.).
6. Use unique node IDs within each subgraph (e.g., actor_1, entity_1_1, info_1).
7. Ensure double quotes around labels and semicolons after each statement.
8. Omit Action node as its already represented as relation 
9. Only output the dotcode(remove the info in final output), no other details

Process this step, its details, and relationships, and output the DOT code:
{step_data}
"""

In [17]:
few_shot_prompt = FewShotPromptTemplate(
    examples=few_shot_examples,
    example_prompt=example_prompt,
    prefix="Here are examples of converting step details and relationships to DOT code:\n",
    suffix=cot_instructions,
    input_variables=["step_data"],
    example_separator="\n---\n"
)

In [18]:
dot_code_outputs = []
for i, step in enumerate(step_names, 1):
    step_input = f"{step}\n{step_details[step]}\nRelationships:\n{step_relations[step]}"
    final_prompt = few_shot_prompt.format(step_data=step_input)
    dot_code_output = llm.invoke(final_prompt)
    dot_code_output = dot_code_output.content
    dot_code_outputs.append(dot_code_output)
    print(f"Step 3 - Generated DOT code for '{step}':\n", dot_code_output)

Step 3 - Generated DOT code for '1. Boil the Water':
 ```dot
subgraph cluster_1 { label="Boil the water";
  actor_1 [label="Process"];
  entity_1 [label="Water"];
  info_1 [label="High heat"];
  actor_1 -> entity_1 [label="Boil"];
}
```

This DOT code represents the procedural step "Boil the Water" as a subgraph. It includes nodes for the process (actor), the entity involved (water), and relevant information about how to boil it (high heat). The relationship between the process and water is represented by an edge labeled "Boil".
Step 3 - Generated DOT code for '2. Add Salt':
 ```dot
subgraph cluster_1 { label="Add Salt";
  actor_1 [label="Process"];
  entity_1_1 [label="Salt"];
  actor_1 -> entity_1_1 [label="Add"];
}
```

This DOT code represents the procedural step "Add Salt" as a subgraph. It includes nodes for the process (actor) and the entity involved, with an edge labeled "Add" connecting them.
Step 3 - Generated DOT code for '3. Drop in the Pasta':
 ```dot
subgraph cluster_3 { 

In [19]:
dot_code_outputs = [dot_code_output.replace('\n', '  ') for dot_code_output in dot_code_outputs]
# remove "dot"
dot_code_outputs = [dot_code_output.replace("dot", "") for dot_code_output in dot_code_outputs]
dot_code_outputs

['```  subgraph cluster_1 { label="Boil the water";    actor_1 [label="Process"];    entity_1 [label="Water"];    info_1 [label="High heat"];    actor_1 -> entity_1 [label="Boil"];  }  ```    This DOT code represents the procedural step "Boil the Water" as a subgraph. It includes nodes for the process (actor), the entity involved (water), and relevant information about how to boil it (high heat). The relationship between the process and water is represented by an edge labeled "Boil".',
 '```  subgraph cluster_1 { label="Add Salt";    actor_1 [label="Process"];    entity_1_1 [label="Salt"];    actor_1 -> entity_1_1 [label="Add"];  }  ```    This DOT code represents the procedural step "Add Salt" as a subgraph. It includes nodes for the process (actor) and the entity involved, with an edge labeled "Add" connecting them.',
 '```  subgraph cluster_3 { label="Drop in the Pasta";    actor_3 [label="Process"];    entity_3_1 [label="Pasta"];  }    ```    Explanation:    - The step name is "Dro

## Step 4

In [20]:
query_step4 = (
    f"Given the steps:\n{num_of_steps}\n"
    "Identify the relationships between these main steps, including sequential, conditional, looping, branching, merging, or dependency relationships. "
    "Examples include 'followed by', 'if', 'loop until', 'depends on', 'branches to', 'merges from'. and more others, you name it "
    "Return in this format, one per line:\n"
    "From: <step_number> To: <step_number> Label: <relationship>\n"
    "Examples:\n"
    "'From: 1 To: 2 Label: followed by' if step 1 is followed by step 2.\n"
    "'From: 2 To: 3 Label: if' if step 3 occurs only if step 2 is true.\n"
    "'From: 3 To: 1 Label: loop until' if step 3 loops back to step 1 until a condition.\n"
    "'From: 1 To: 2 Label: depends on' if step 2 depends on step 1.\n"
    "'From: 2 To: 3 Label: branches to' if step 2 branches to step 3.\n"
    "'From: 3 To: 4 Label: merges from' if step 4 merges from step 3."
)
inter_step_relations = rag.query(query_step4, param=QueryParam(mode="global"))
print("Step 4 - Raw inter-step relationships:\n", inter_step_relations)

INFO:lightrag:Non-embedding cached missed(mode:global type:query)
INFO:lightrag:Non-embedding cached missed(mode:global type:keywords)
INFO:lightrag:Inserting 1 to llm_response_cache
INFO:lightrag:Query edges: Sequential Steps, Process Relationships, Cooking Procedure, Culinary Techniques, top_k: 60, cosine: 0.2
INFO:lightrag:Global query uses 2 entites, 1 relations, 1 chunks
INFO:lightrag:Inserting 1 to llm_response_cache


Step 4 - Raw inter-step relationships:
 ### Relationships Between Pasta Cooking Steps

Based on the outlined steps for cooking pasta, here are the relationships identified between them:

- **From: 1 To: 2 Label: followed by**  
  Step 2 (Add Salt) is directly followed by Step 1 (Boil the Water).

- **From: 2 To: 3 Label: followed by**  
  Step 3 (Drop in the Pasta) follows Step 2 (Add Salt) once the water has reached a rolling boil and salt has been added.

- **From: 3 To: 4 Label: followed by**  
  Step 4 (Cook Until Al Dente) is performed after Step 3 (Drop in the Pasta), where you begin checking for doneness according to package instructions.

- **From: 4 To: 5 Label: follows if**  
  Step 5 (Save Some Pasta Water) occurs after Step 4, but only if testing indicates that pasta water should be saved. This is conditional upon reaching an appropriate stage in cooking.

- **From: 5 To: 6 Label: followed by**  
  Step 6 (Drain the Pasta) follows Step 5 (Save Some Pasta Water) once the des

In [21]:
few_shot_examples = [
    {
        "input": (
            "Steps:\n"
            "1. Check water level\n"
            "2. Boil the water\n"
            "Relationships:\n"
            "From: 1 To: 2 Label: followed by"
        ),
        "output": (
            'cluster_1 -> cluster_2 [label="followed by in sequence"];'
        )
    },
    {
        "input": (
            "Steps:\n"
            "1. Boil the water\n"
            "2. Check if boiling\n"
            "3. Add pasta\n"
            "Relationships:\n"
            "From: 1 To: 2 Label: followed by\n"
            "From: 2 To: 3 Label: if\n"
            "From: 2 To: 1 Label: loop until"
        ),
        "output": (
            'cluster_1 -> cluster_2 [label="followed by to check"];\n'
            'cluster_2 -> cluster_3 [label="if water is boiling"];\n'
            'cluster_2 -> cluster_1 [label="loop until boiling"];'
        )
    },
    {
        "input": (
            "Steps:\n"
            "1. Prepare ingredients\n"
            "2. Cook meat\n"
            "3. Boil water\n"
            "Relationships:\n"
            "From: 1 To: 2 Label: depends on\n"
            "From: 1 To: 3 Label: branches to"
        ),
        "output": (
            'cluster_1 -> cluster_2 [label="depends on preparation"];\n'
            'cluster_1 -> cluster_3 [label="branches to boiling"];'
        )
    },
    {
        "input": (
            "Steps:\n"
            "1. Cook meat\n"
            "2. Boil water\n"
            "3. Combine ingredients\n"
            "Relationships:\n"
            "From: 1 To: 3 Label: merges from\n"
            "From: 2 To: 3 Label: merges from"
        ),
        "output": (
            'cluster_1 -> cluster_3 [label="merges meat into"];\n'
            'cluster_2 -> cluster_3 [label="merges water into"];'
        )
    }
]

# Example template
example_template = """
Input steps and relationships:
{input}

Output DOT edges between subgraphs:
{output}
"""

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template=example_template
)

# Chain-of-thought instructions
cot_instructions = """
You are generating DOT code edges to connect subgraphs representing procedural steps, using the Step 1 DOT code for context. 
Follow these rules:
1. Read the list of steps, Step 1 DOT code (subgraph titles), and relationships in the format 'From: <step_number> To: <step_number> Label: <relationship>'.
2. For each relationship, create an edge between subgraphs:
   - 'cluster_X -> cluster_Y [label="Enhanced Relationship"];'
3. Use the Step 1 DOT code to ensure cluster labels match the steps (e.g., cluster_1 for step 1).
4. Enhance the relationship label based on context and type, using the subgraph titles for specificity(relationship lables could be used  with synonyms, act accordingly):
   - 'followed by' -> e.g., 'followed by to <cluster_Y label>'
   - 'if' -> e.g., 'if <cluster_X condition>'
   - 'loop until' -> e.g., 'loop until <cluster_Y state>'
   - 'depends on' -> e.g., 'depends on <cluster_X completion>'
   - 'branches to' -> e.g., 'branches to <cluster_Y action>'
   - 'merges from' -> e.g., 'merges from <cluster_X result>'
   - 'parallel' -> e.g., 'parallel with <cluster_Y label>'
   - 'synchronize' -> e.g., 'synchronize with <cluster_X label>'
   - 'except' -> e.g., 'except if <cluster_X condition>'
   - 'trigger' -> e.g., 'triggers <cluster_Y label>'
5. Use the step numbers to match cluster IDs (e.g., step 1 is cluster_1).
6. Ensure double quotes around labels and semicolons after each edge.
7. Output one edge per line.
8. Make sure that the output is only the DOT code, no other details.

Process these steps, Step 1 DOT code, and relationships, and output the DOT edges:
{relations}
"""

In [22]:
few_shot_prompt = FewShotPromptTemplate(
    examples=few_shot_examples,
    example_prompt=example_prompt,
    prefix="Here are examples of converting inter-step relationships to DOT edges with Step 1 DOT code:\n",
    suffix=cot_instructions,
    input_variables=["relations"],
    example_separator="\n---\n"
)
step4_input = f"Steps:\n{num_of_steps}\nStep 1 DOT code:\n{dot_code_output_step1}\nRelationships:\n{inter_step_relations}"
final_prompt = few_shot_prompt.format(relations=step4_input)
step4_dot_output = llm.invoke(final_prompt)
step4_dot_output =  step4_dot_output.content
print("Step 4 - Generated inter-subgraph edges:\n", step4_dot_output)

Step 4 - Generated inter-subgraph edges:
 ```dot
cluster_1 -> cluster_2 [label="followed by to Add Salt"];
cluster_2 -> cluster_3 [label="followed by to Drop in Pasta"];
cluster_3 -> cluster_4 [label="followed by to Cook Until Al Dente"];
cluster_4 -> cluster_5 [label="follows if pasta is cooked"];
cluster_5 -> cluster_6 [label="followed by to Drain the Pasta"];
cluster_6 -> cluster_7 [label="followed by to Mix with Sauce"];
cluster_7 -> cluster_8 [label="followed by to Serve and Enjoy"];
```

This DOT code represents the relationships between the steps in cooking pasta, using enhanced labels for clarity. Each edge connects two subgraphs (clusters) corresponding to sequential or conditional steps in the process. The labels are crafted based on the context of each relationship type, ensuring that they convey the intended procedural flow.
