In [1]:
import os
import re
from typing import List, Tuple

from llama_index.core.workflow import (
    Workflow,
    Context,
    step,
    Event,
    StartEvent,
    StopEvent,
    InputRequiredEvent,
    HumanResponseEvent,
)

from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage
)
from llama_index.core import SimpleDirectoryReader, Document
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.utils.workflow import draw_all_possible_flows
from langchain.embeddings import HuggingFaceInstructEmbeddings
import InstructorEmbedding
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain_groq import ChatGroq
from dotenv import load_dotenv

ModuleNotFoundError: No module named 'llama_index'

In [None]:
import openai
OPENAI_API_KEY = ""

import os

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

openai.api_key = os.environ['OPENAI_API_KEY']

In [None]:
API_KEY = ""

import getpass
import os

if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = API_KEY

In [4]:
from dotenv import load_dotenv
load_dotenv()

False

In [5]:
# import InstructorEmbedding
# # import huggingface_hub
# # from sentence_transformers import SentenceTransformer
# from langchain.embeddings import HuggingFaceInstructEmbeddings

# model_name = 'hkunlp/instructor-base'

# embedding_model = HuggingFaceInstructEmbeddings(
#     model_name = model_name,
#     # model_kwargs = {"device" : device}
# )

In [6]:
# from langchain_groq import ChatGroq
# Groq_llm = ChatGroq(
#     model="llama-3.1-8b-instant",
#     temperature=0,
#     max_tokens=None,
#     timeout=None,
#     max_retries=2,
# )

# Groq_llm.predict("Just reposonse Okay")


In [7]:
# Groq_llm.predict("Just reposonse Okay")

In [8]:
class GenerateQuestionsEvent(Event):
    """Triggered after setup, contains the original user query and a list of follow‑up questions."""

    UserQuery: str
    GeneratedQuestions: List[str]
    Feedback: str


class QueryEvent(Event):
    """Triggered after question generation – contains the composite textual query to the RAG engine."""

    SQLQuery: str


class GraphEvent(Event):
    """Triggered after retrieval – contains table/column names extracted from relevant SQL snippets."""

    Tables: str


class ResponseEvent(Event):
    """Contains the natural‑language answer generated for the user."""

    Response: str


class FeedbackEvent(Event):
    """Contains free‑form user feedback about the previous response, used to decide whether to loop."""
    Feedback: str

In [9]:
# docs = SimpleDirectoryReader("docs", recursive=True).load_data()

In [10]:
# print(docs[1].text)

In [24]:
# Define the string to save
text = "Hello, this text will be saved in a new file!"

# Create and write to the file
with open('ourspace/my_file.txt', 'w') as file:  # 'w' = write mode (creates or overwrites)
    file.write(text)

print("File 'my_file.txt' has been created and saved!")

File 'my_file.txt' has been created and saved!


In [65]:
class SoftwareDocBot(Workflow):
    """A Retrieval‑Augmented Generation (RAG) workflow with human‑in‑the‑loop feedback.

    Steps
    -----
    1. set_up – Build / load the vector store, then trigger GenerateQuestionsEvent.
    2. generate_questions – Use LLM to decompose the user query into follow‑up questions, trigger QueryEvent.
    3. rag_query – Query the vector store, collect relevant SQL and tables, trigger ResponseEvent & GraphEvent.
    4. graph_query – (Optional) Enrich with graph metadata (stub for now), trigger ResponseEvent.
    5. document_response – Draft a user‑facing answer, then pause via InputRequiredEvent.
    6. get_feedback – Decide whether to stop, or loop back to rag_query with extra feedback.
    """
    def __init__(self, storage_dir: str, data_dir: str, model: str = "groq", *args, **kwargs):
        super().__init__(*args, **kwargs)
        load_dotenv()
        self.llm_groq = ChatGroq(
            model="llama-3.1-8b-instant",
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2,
        )
        self.llm_openai = OpenAI(model="gpt-4o-mini")
        self.embedding_model = HuggingFaceInstructEmbeddings(
            model_name = 'hkunlp/instructor-base'
        )
        self.model = model
        # -------- constants ------------------------------------------------------
        self.storage_dir: str = storage_dir
        self.data_dir: str = data_dir  # directory containing *.sql files to embed

    def _generate(self, prompt: str) -> str:
        if self.model == "groq":
            return self.llm_groq.predict(prompt)
        elif self.model == "4o-mini":
            return self.llm_openai.complete(prompt).text
        else:
            raise ValueError(f"Unknown model: {self.model}")

    def document_sql(self) -> str:
        # Translate SQL to English
        doc_prompt = (
            "You are an expert SQL analyst who knows business processes."
            "You will be given a SQL procedure and need to analyze it."
            "Provide a detailed explanation of the SQL procedure with the following format:"
            "1. Overview: What the SQL procedure does?"
            "2. Input: What are the input parameters?"
            "3. Output: What is the expected output?"
            "4. Tables: What tables are used?"
            "5. Logic: What is the logic of the SQL procedure?"
            "6. Operations: What operations are used(read, write, update, delete)?"
            "7. UseCase: What is the use cases of the SQL procedure?"

            "Output: Markdown format."

            "SQL Procedure: {sql}"
        )

        # load docs
        docs = SimpleDirectoryReader("docs", recursive=True).load_data()
        
        for i, doc in enumerate(docs[1:], start=1):
            if doc.text:
                answer = self._generate(doc_prompt.format(sql=doc.text))
                # create a file and write answer to text file
                with open(f"ourspace/{i}.txt", "w") as f:
                    f.write(answer)
                

        return f"{i} procedures are documented"



    # -------- set‑up ---------------------------------------------------------
    @step
    async def set_up(self, ctx: Context, ev: StartEvent) -> GenerateQuestionsEvent:
        """Initialise the LLM, embedding model, vector store & query engine."""
        # LLM
        # self.llm = Groq_llm
        
        # Build / load the vector index
        if os.path.exists(self.storage_dir):
            storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
            index = load_index_from_storage(storage_context, embed_model=self.embedding_model)
        else:
            # Read SQL files as Documents
            docs: List[Document] = []
            if os.path.exists(self.data_dir):
                docs = SimpleDirectoryReader(self.data_dir, recursive=True).load_data()
            else:
                print("[Warning] data_dir not found – creating empty index.")

            index = VectorStoreIndex.from_documents(
                docs,
                embed_model=self.embedding_model,
            )
            index.storage_context.persist(persist_dir=self.storage_dir)

        # Store the query engine for later steps
        self.retriever = index.as_retriever(similarity_top_k=5)

        # Save the user query in the context for later use
        await ctx.set("user_query", getattr(ev, "UserQuery", ""))       

        return GenerateQuestionsEvent(UserQuery=getattr(ev, "UserQuery", ""), GeneratedQuestions=[], Feedback="")

    # -------- question generation -------------------------------------------
    @step
    async def generate_questions(
        self, ctx: Context, ev: GenerateQuestionsEvent
    ) -> QueryEvent:
        """Generate follow‑up questions using the LLM (or reuse after feedback)."""
        user_query: str = await ctx.get("user_query")
        rag_prompt = (
            f"User Query: {user_query}\n. "
            "Based on the user query, retrieve the most relevant store procedures.\n"
        )
        retrieved_procedures = self.retriever.retrieve(rag_prompt)
        # extra_feedback: str = getattr(ev, "Feedback", "")
        user_feedback: str = getattr(ev, "Feedback", "")
        latest_response: str = await ctx.get("latest_response", default="")
        prompt = (
            "You are an expert SQL analyst who knows business processes. "
            "Break the following user request into 3 underlying questions "
            "that will help to answer the user request."
            "The questions will be used to retrieve the most relevant information."
            "The questions must be based on the available context given below."
            "Return each question on a new line.\n\n"
            f"User request: {user_query}\n"
            f"Available context: {retrieved_procedures}\n"
        )
        # if extra_feedback:
        #     prompt += f"\nPrevious feedback: {extra_feedback}\n"
        if user_feedback:
            prompt += f"\nUser feedback on the last answer: {user_feedback}\n"
        if latest_response:
            prompt += f"\n\n The last answer was: {latest_response}"

        completion = self._generate(prompt)
        print(completion)
        questions: List[str] = [q.strip("- \n") for q in completion.splitlines() if q.strip()]

        await ctx.set("generated_questions", questions)
        composite_query = "\n".join(questions) if questions else user_query
        return QueryEvent(SQLQuery=composite_query)

    # -------- RAG query ------------------------------------------------------
    @step
    async def rag_query(self, ctx: Context, ev: QueryEvent) -> GraphEvent:
        """Retrieve relevant SQL procedures and extract involved tables."""
        query_text = ev.SQLQuery
        print(f"[RAG] Query: {query_text}")

        rag_prompt = (
            f"Query: {query_text}\n. "
            "Based on the query, retrieve the most relevant store SQL procedures and codes.\n"
            "Return only the SQL codes."
        )
        # Retrieve SQL codes
        results = self.retriever.retrieve(rag_prompt)
        sql_snippets: List[str] = [r.text for r in results] if results else []
        rag_prompt = (
            f"Query: {query_text}\n. "
            "Based on the query, retrieve the most relevant explanation of the SQL procedures.\n"
            "Return only the explanation written in natural language."
        )
        # Retrieve explanation
        results = self.retriever.retrieve(rag_prompt)
        explanation: str = results
        print(f"checking snippets {sql_snippets}")
        print(f"checking explanation {explanation}")

        # Extract table information
        # tables = _extract_tables(sql_snippets)
        # table_list = ", ".join(tables) if tables else "<none found>"

        # Store for later steps
        await ctx.set("retrieved_sql", sql_snippets)
        await ctx.set("explanation", explanation)
        # await ctx.set("tables", table_list)

        return GraphEvent(Tables="")

    # -------- Graph query ----------------------------------------------------
    @step
    async def graph_query(self, ctx: Context, ev: GraphEvent) -> ResponseEvent:
        """Placeholder for graph lookup – currently just passes tables along."""
        # In a full implementation, we would query a KG here.
        # await ctx.set("graph_info", ev.Tables)
        return ResponseEvent(Response="")

    # -------- Draft response -------------------------------------------------
    @step
    async def document_response(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
        """Compose the final answer using LLM and pause for human feedback."""
        user_query = await ctx.get("user_query")
        sql_snippets: List[str] = await ctx.get("retrieved_sql")
        explanation: str = await ctx.get("explanation")
        # tables: str = await ctx.get("tables")
        # graph_info: str = await ctx.get("graph_info", default="")
        tables = ""
        graph_info = ""

        context_blob = "\n\n".join(sql_snippets) if sql_snippets else "[No SQL snippets retrieved]"
        
        latest_response: str = await ctx.get("latest_response", default="")
        latest_feedback: str = await ctx.get("latest_feedback", default="")
        
        prompt = (
            "You are an expert business analyst who knows SQL and business processes."
            "Using the following SQL procedures, tables, explanation of the SQL procedures, "
            "and any graph info provided, answer the user's question in detail.\n\n"
            "Standard Format:"
            "Please provide a structured response with the following sections: \n"
            "1. rules: List of business rules identified \n"
            "2. constraints: List of business constraints \n"
            "3. calculations: List of any calculations or formulas \n"
            "4. workflows: List of process workflows \n"
            "5. tables: List of related tables \n"
            "Note:"
            "1. If you don't have enough information, just say 'Not enough information' for each section.\n\n"
            "2. If the specified section is provided in the context, use the provided information and ignore the standard format.\n\n"
            "Answer: The answer to the user's question. Don't include any other information beyond the standard format."
            "\n\n"
            "Output format: Markdown."
            "\n\n"
            "Context: The following information is provided for context: \n\n"
            "\n\n"
        )

        content = (
            f"User question:\n{user_query}\n\n"
            f"Generated questions:\n{await ctx.get('generated_questions')}\n\n"
            f"SQL procedures:\n{context_blob}\n\n"
            f"Tables found: {tables}\n\n"
            # f"Explanation of the SQL procedures:\n{explanation}\n\n"
            "\n\n"
        )
        if graph_info:
            content += f"Graph info:\n{graph_info}\n\n"
        if latest_response:
            content += f"Last answser:\n{latest_response}\n\n"
        if latest_feedback:
            content += f"Last feedback:\n{latest_feedback}\n\n"
        
        # print(content)
        await ctx.set("latest_content", content)

        response_text = self._generate(prompt + content).strip()
        await ctx.set("latest_response", response_text)

        # Ask the human to review
        return InputRequiredEvent(prefix="How does this look? Give me any feedback you have on any of the answers.",
            result=response_text)

    # -------- Feedback loop --------------------------------------------------
    @step
    async def get_feedback(
        self, ctx: Context, ev: HumanResponseEvent
    ) -> GenerateQuestionsEvent | StopEvent:
        """Process human feedback – decide to loop or stop."""
        feedback_text = ev.response.strip()

        await ctx.set("latest_feedback", feedback_text)

        if feedback_text.lower().startswith("okay"):
            # We're done!
            print("[Workflow] Human accepted the answer. Stopping.")
            return StopEvent(result=await ctx.get("latest_response"))

        # Otherwise loop – feed the feedback into question generation again
        print("[Workflow] Received feedback – refining answer.")
        return GenerateQuestionsEvent(UserQuery=getattr(ev, "UserQuery", ""), GeneratedQuestions=[], Feedback=feedback_text)

# 1.Test

In [35]:
# w.document_sql()

In [None]:
WORKFLOW_FILE = "workflows/SoftwareDocBot_workflow.html"
draw_all_possible_flows(w, filename=WORKFLOW_FILE)

workflows/SoftwareDocBot_workflow.html


In [None]:
UserQuery = "Explain the logic of space recommendation query"

In [None]:
UserQuery = "Explain the booking process"

In [None]:
handler = w.run(
    UserQuery=UserQuery,
)

async for event in handler.stream_events():
    if isinstance(event, InputRequiredEvent):
        print("We've filled in your form! Here are the results:\n")
        print(event.result)
        # now ask for input from the keyboard
        response = input(event.prefix)
        handler.ctx.send_event(
            HumanResponseEvent(
                response=response
            )
        )

response = await handler
print("Agent complete! Here's your final result:")
print(str(response))

Running step set_up
Step set_up produced event GenerateQuestionsEvent
Running step generate_questions
Here are 3 concise follow-up questions to help retrieve the most relevant SQL procedures:

1. What are the key steps involved in the booking process, and which tables in the database are likely to be affected (e.g. customers, bookings, rooms, etc.)?

2. Are there any specific business rules or constraints that apply to the booking process, such as minimum stay requirements, payment methods, or cancellation policies?

3. Are there any metrics or KPIs related to the booking process that we need to consider, such as booking conversion rates, average booking value, or customer retention rates?
Step generate_questions produced event QueryEvent
Running step rag_query
[RAG] Query: Here are 3 concise follow-up questions to help retrieve the most relevant SQL procedures:
1. What are the key steps involved in the booking process, and which tables in the database are likely to be affected (e.g. c

In [None]:
handler = w.run(
    UserQuery=UserQuery,
)

async for event in handler.stream_events():
    if isinstance(event, InputRequiredEvent):
        print("We've filled in your form! Here are the results:\n")
        print(event.result)
        # now ask for input from the keyboard
        # response = input(event.prefix)
        handler.ctx.send_event(
            HumanResponseEvent(
                response="OKAY"
            )
        )

response = await handler
print("Agent complete! Here's your final result:")
print(str(response))

Running step set_up
Step set_up produced event GenerateQuestionsEvent
Running step generate_questions
Step generate_questions produced event QueryEvent
Running step rag_query
[RAG] Query: Here are 3 concise follow-up questions to help retrieve the most relevant SQL procedures:
1. What are the different stages involved in the booking process (e.g. inquiry, reservation, confirmation, payment)?
2. Are there any specific tables or databases that store information related to bookings, such as customer information, room availability, or payment details?
3. Are there any business rules or constraints that govern the booking process, such as minimum stay requirements, cancellation policies, or loyalty program benefits?
checking snippets ["USE OurSpace;\n\nDROP PROCEDURE IF EXISTS `sp_BookSpace`;\n\nDELIMITER // \n\n/*\nCALL sp_BookSpace(1,1,'2025-03-14 11:00:00','2025-03-14 15:00:00',now(),'test');\n*/\n\nCREATE PROCEDURE `sp_BookSpace` (\n\tIN cid \t\t\tINT,\n    IN sid \t\t\tINT,\n    IN sta

# 2. Test

In [68]:
w = SoftwareDocBot(
    storage_dir="rag_index", 
    data_dir="docs", 
    model="groq", 
    timeout=600,
    verbose=True)

load INSTRUCTOR_Transformer
max_seq_length  512


In [69]:
async def answer_question(workflow, question):
    handler = workflow.run(
        UserQuery=question
    )
    print(question)
    async for event in handler.stream_events():
        if isinstance(event, InputRequiredEvent):
            print("We've filled in your form! Here are the results:\n")
            print(event.result)
            handler.ctx.send_event(
                HumanResponseEvent(
                    response="OKAY"
                )
            )
    response = await handler
    gen_questions = await handler.ctx.get('generated_questions')
    gen_questions = "\n".join(gen_questions)
    content = await handler.ctx.get('latest_content')
    return str(response), gen_questions,content
    

In [52]:
import json

# load json file
with open('sample_questions.json', 'r') as f:
    questions = json.load(f)
    

In [42]:
answer_format = questions['answer_format']

format_answer = f"""
{answer_format['instructions']} 
1. rules: {answer_format['sections'][0]['rules']} 
2. constraints: {answer_format['sections'][1]['constraints']} 
3. calculations: {answer_format['sections'][2]['calculations']}
4. workflows: {answer_format['sections'][3]['workflows']} 
5. tables: {answer_format['sections'][4]['tables']} 
"""

print(format_answer)




Please provide a structured response in Markdown format with the following: 
1. rules: List of business rules identified 
2. constraints: List of business constraints 
3. calculations: List of any calculations or formulas
4. workflows: List of process workflows 
5. tables: List of related tables 



In [70]:
w = SoftwareDocBot(
    storage_dir="ourspace_index", 
    data_dir="ourspace", 
    model="groq", 
    timeout=600,
    verbose=True)

Questions, Followup_Questions, Content, Answers = [], [], [], []
for question in questions['questions']:
    # question += f"\n{format_answer}"
    response, followup_questions, content = await answer_question(w, question)
    print(response)
    Questions.append(question)
    Followup_Questions.append(followup_questions)
    Content.append(content)
    Answers.append(response)
    break

# store typical questions and answers as json
typical_qa = {
    'questions': Questions,
    'followup_questions': Followup_Questions,
    'content': Content,
    'answers': Answers
}

# save typical questions and answers to json
with open('standard_qa.json', 'w') as f:
    json.dump(typical_qa, f)

    

load INSTRUCTOR_Transformer
max_seq_length  512
Explain the booking process implemented.
Running step set_up
Step set_up produced event GenerateQuestionsEvent
Running step generate_questions
Based on the provided context, the following three underlying questions can help to answer the user request to explain the booking process implemented:

1. What are the different stages or steps involved in the booking process, and how do they interact with each other?

This question will help to identify the various procedures and queries involved in the booking process, such as creating a booking, editing a booking, and canceling a booking.

2. How does the system ensure data consistency and integrity throughout the booking process, particularly when it comes to updating or canceling bookings?

This question will help to understand the mechanisms in place to prevent data inconsistencies, such as transactions, locking, and validation checks.

3. What are the different types of bookings that can be

In [77]:
import knowledge_graph_pipeline as pipeline

In [73]:
sql_file = "./ourspace/DatabaseSchema.txt"
parsed_schema = pipeline.parse_schema(sql_file)
parsed_schema

{'OurSpace': {'columns': [], 'primary_keys': [], 'foreign_keys': []},
 'user': {'columns': [{'name': 'id',
    'type': 'INT',
    'constraints': ['AUTO_INCREMENT', 'PRIMARY KEY']},
   {'name': 'name', 'type': 'VARCHAR(255)', 'constraints': ['NOT NULL']},
   {'name': 'email', 'type': 'VARCHAR(255)', 'constraints': ['NOT NULL']},
   {'name': 'phone', 'type': 'VARCHAR(15)', 'constraints': ['NOT NULL']},
   {'name': 'address', 'type': 'VARCHAR(500)', 'constraints': []},
   {'name': 'preferedRange', 'type': 'FLOAT', 'constraints': []},
   {'name': 'isCustomer', 'type': 'BOOLEAN', 'constraints': ['DEFAULT FALSE']},
   {'name': 'isOwner', 'type': 'BOOLEAN', 'constraints': ['DEFAULT FALSE']},
   {'name': 'rating', 'type': 'FLOAT', 'constraints': []},
   {'name': 'createdAt',
    'type': 'DATETIME',
    'constraints': ['DEFAULT CURRENT_TIMESTAMP()']},
   {'name': 'updatedAt',
    'type': 'DATETIME',
    'constraints': ['DEFAULT CURRENT_TIMESTAMP()',
     'ON UPDATE CURRENT_TIMESTAMP()']}],
  'p

In [74]:
knowledge_graph = pipeline.build_knowledge_graph(parsed_schema)

In [80]:
pipeline.save_kg_graphml(knowledge_graph, 'sql_knowledge_graph')

In [78]:
pipeline.visualize_kg_interactive(knowledge_graph, filename='kg.html')

kg.html


In [92]:
def extract_query_labels(sql_query):
    loaded_knowledge_graph = pipeline.load_kg_graphml('sql_knowledge_graph')
    parsed_query = pipeline.parse_sql(sql_query)
    _, query_id = pipeline.update_kg_with_query(loaded_knowledge_graph, parsed_query)
    return pipeline.extract_query_labels(loaded_knowledge_graph, query_id)

In [97]:
for relation in extract_query_labels(sql_query)['relations']:
    print(relation)

query_2856755213282575382
{'source': 'column_space.id', 'target': 'table_space', 'label': 'BELONGS_TO'}
{'source': 'table_booking', 'target': 'table_space', 'label': 'JOINED_IN_QUERY'}
{'source': 'column_booking.id', 'target': 'table_booking', 'label': 'BELONGS_TO'}
{'source': 'column_booking.sid', 'target': 'table_booking', 'label': 'BELONGS_TO'}
{'source': 'column_booking.sid', 'target': 'column_space.id', 'label': 'JOINED_WITH'}
{'source': 'column_booking.bookingDate', 'target': 'table_booking', 'label': 'BELONGS_TO'}
{'source': 'column_booking.startTime', 'target': 'table_booking', 'label': 'BELONGS_TO'}
{'source': 'column_booking.endTime', 'target': 'table_booking', 'label': 'BELONGS_TO'}
{'source': 'column_booking.spacePrice', 'target': 'table_booking', 'label': 'BELONGS_TO'}
{'source': 'column_booking.sysFee', 'target': 'table_booking', 'label': 'BELONGS_TO'}
{'source': 'column_booking.total', 'target': 'table_booking', 'label': 'BELONGS_TO'}
{'source': 'column_booking.status', 

In [81]:
loaded_knowledge_graph = pipeline.load_kg_graphml('sql_knowledge_graph')

In [82]:
sql_query = """
    SELECT 
        b.id AS booking_id, 
        b.bookingDate, 
        b.startTime, 
        b.endTime, 
        b.spacePrice, 
        b.sysFee, 
        b.total, 
        b.status, 
        b.remark 
    FROM booking b 
    JOIN space s ON b.sid = s.id;
"""

In [83]:
parsed_query = pipeline.parse_sql(sql_query)
parsed_query

QueryComponents(raw_query='\n    SELECT \n        b.id AS booking_id, \n        b.bookingDate, \n        b.startTime, \n        b.endTime, \n        b.spacePrice, \n        b.sysFee, \n        b.total, \n        b.status, \n        b.remark \n    FROM booking b \n    JOIN space s ON b.sid = s.id;\n', query_type='SELECT', tables={'booking', 'space'}, columns={'booking.endTime', 'booking.id', 'booking.status', 'booking.remark', 'booking.startTime', 'booking.bookingDate', 'booking.sysFee', 'space.id', 'booking.spacePrice', 'booking.total', 'booking.sid'}, joins=[{'source': 'booking', 'target': 'space', 'type': 'JOIN', 'conditions': [{'left': 'sid', 'right': 'id', 'operator': '='}]}], conditions=[], aggregations=set(), subqueries=set(), group_by=[], order_by=[], limit=None, having=[], ctes={})

In [84]:
knowledge_graph_update, query_id = pipeline.update_kg_with_query(loaded_knowledge_graph, parsed_query)

query_2856755213282575382


In [89]:
pipeline.extract_query_labels(loaded_knowledge_graph, query_id)['relations']

[{'source': 'column_space.id', 'target': 'table_space', 'label': 'BELONGS_TO'},
 {'source': 'table_booking',
  'target': 'table_space',
  'label': 'JOINED_IN_QUERY'},
 {'source': 'column_booking.id',
  'target': 'table_booking',
  'label': 'BELONGS_TO'},
 {'source': 'column_booking.sid',
  'target': 'table_booking',
  'label': 'BELONGS_TO'},
 {'source': 'column_booking.sid',
  'target': 'column_space.id',
  'label': 'JOINED_WITH'},
 {'source': 'column_booking.bookingDate',
  'target': 'table_booking',
  'label': 'BELONGS_TO'},
 {'source': 'column_booking.startTime',
  'target': 'table_booking',
  'label': 'BELONGS_TO'},
 {'source': 'column_booking.endTime',
  'target': 'table_booking',
  'label': 'BELONGS_TO'},
 {'source': 'column_booking.spacePrice',
  'target': 'table_booking',
  'label': 'BELONGS_TO'},
 {'source': 'column_booking.sysFee',
  'target': 'table_booking',
  'label': 'BELONGS_TO'},
 {'source': 'column_booking.total',
  'target': 'table_booking',
  'label': 'BELONGS_TO'},
