# BMKG - Assignment 3

This notebook intends to automatically generate a SCHEMA for any given KG.

In [9]:
from operator import itemgetter
import getpass
import os

from typing import Any

from rdflib import Graph

from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import PromptTemplate, format_document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import get_buffer_string
from langchain_openai import OpenAI

In [2]:
if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Provide your OpenAI API Key")

1) Get the RDF graph

In [72]:
schema = Graph()
schema.parse("../Assignment 2/climate_kg_ontology.ttl")

graph = Graph()
graph = graph.parse("../Assignment 1/src/graph_from_data_sources.ttl")

print("Number of triples: ", len(graph))

Number of triples:  178548


In [110]:
llm = OpenAI(temperature=0)

In [111]:
from langchain_core.prompts import ChatPromptTemplate

# Create the memory object that is used to add messages
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)
# Add a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)

# Prompt to reformulate the question using the chat history
reform_template = """Given the following chat history and a follow up question,
rephrase the follow up question to be a standalone straightforward question, in its original language.
Do not answer the question! Just rephrase reusing informations from the chat history.
Make it short and straight to the point.

Chat History:
{chat_history}
Follow up input:
{question}

Standalone question:
"""
REFORM_QUESTION_PROMPT = PromptTemplate.from_template(reform_template)

# Prompt to ask to answer the reformulated question
answer_template = """Given the following chat history and the schema 
of an RDF graph, generate a SPARQL (including prefixes) query to 
answer the question. Make sure to include only what is part of the schema,
so it is fine if you do not answer entirely the question, only the part that
is actually part of the schema should be included:
{schema}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(answer_template)

In [112]:
# Reformulate the question using chat history
reformulated_question = {
    "reformulated_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
        "schema": lambda x: x["schema"],
    }
    | REFORM_QUESTION_PROMPT
    | llm
    | StrOutputParser(),
}
# Construct the inputs for the final prompt using retrieved documents
final_inputs = {
    "schema": lambda x: x["reformulated_question"],
    "question": lambda x: print("💭 Reformulated question:", x["reformulated_question"]) or x["reformulated_question"],
}
# Generate the answer using the retrieved documents and answer prompt
answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,
}
# Put the chain together
final_chain = loaded_memory | reformulated_question | answer

def stream_chain(final_chain, memory: ConversationBufferMemory, inputs: dict[str, str]) -> dict[str, Any]:
    """Ask question, stream the answer output, and return the answer with source documents."""
    output = {"answer": ""}
    for chunk in final_chain.stream(inputs):
        if "answer" in chunk:
            output["answer"] += chunk["answer"]
            print(chunk["answer"], end="", flush=True)
    # Add messages to chat history
    memory.save_context(inputs, {"answer": output["answer"]})
    return output

In [113]:
output = stream_chain(final_chain, memory, {"schema": str(schema.serialize(format="ttl")), "question": "What is the label of the country Belgium?"})

💭 Reformulated question: What is the label of Belgium?
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?label
WHERE {
  ?country rdf:type <http://example.org/Country> .
  ?country rdfs:label ?label .
  FILTER (?country = <http://example.org/Belgium>)
}