In [4]:
import sys
!{sys.executable} -m pip install -qU langchain-huggingface

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [6]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [7]:
from rdflib import Graph, RDF, OWL, BNode
from typing import Set
g = Graph()
g.parse("./Ontology_Assignment.rdf")

def extract_class_bundle(g: Graph, class_uri) -> Graph:
    """
    Recursively collect all triples that describe or reference a given OWL class.
    Includes nested blank nodes (Restrictions, unionOf, etc.).
    """
    subg = Graph()
    visited: Set = set()

    def recurse(node):
        if node in visited:
            return
        visited.add(node)

        # Triples where node is the subject
        for s, p, o in g.triples((node, None, None)):
            subg.add((s, p, o))
            if isinstance(o, BNode):
                recurse(o)

        # Triples where node is the object
        for s, p, o in g.triples((None, None, node)):
            subg.add((s, p, o))
            if isinstance(s, BNode):
                recurse(s)

    recurse(class_uri)
    return subg


# --- Create bundles for each class ------------------------------------------
print("Extracting class bundles...")

class_bundles = []
for c in g.subjects(RDF.type, OWL.Class):
    subgraph = extract_class_bundle(g, c)
    rdf_text = subgraph.serialize(format="turtle")  # or "xml" if you prefer
    class_bundles.append(rdf_text)
    print(f"Extracted bundle for class: {c}")

print(f"Total class bundles: {len(class_bundles)}")

ontology_blocks = class_bundles

Extracting class bundles...
Extracted bundle for class: N731404a0dc404dcb9e975a982fed6f7f
Extracted bundle for class: Na338259eacec4f7db2caf962a50491e6
Extracted bundle for class: http://www.semanticweb.org/izabo/ontologies/2025/8/Ontology_Assignment#BakingIngredients
Extracted bundle for class: http://www.semanticweb.org/izabo/ontologies/2025/8/Ontology_Assignment#Ingredients
Extracted bundle for class: http://www.semanticweb.org/izabo/ontologies/2025/8/Ontology_Assignment#PizzaBianca
Extracted bundle for class: http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Adoption
Extracted bundle for class: http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Adult
Extracted bundle for class: Nf56625720a014ec887349572df1f5b6e
Extracted bundle for class: http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#AirborneAllergies
Extracted bundle for class: http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Allergies
Extracte

In [8]:
from langchain_core.documents import Document

# Convert ontology blocks (strings) into Documents
docs = [Document(page_content=text) for text in ontology_blocks]

# Add to vector store
document_ids = vector_store.add_documents(docs)

In [9]:
# --- Local LLM for generation (Ollama)
from langchain_ollama import ChatOllama

# Initialize your local LLM (change model name if needed)
llm = ChatOllama(
    model="phi3:mini",
)

In [10]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")
print(prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [11]:
from langchain.prompts import PromptTemplate

base_prompt = hub.pull("rlm/rag-prompt")

custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=f"""You will be given an ontology context, and a scenario. Find out based on the ontology context if the scenario is consistent or not with the ontology, you may use reasoning, you can think out loud. The last word should be "Consistent." or "Inconsistent.".
    For example: 
    Context ontology: (contains information on pizza containing tomato, tomato containing vitamin C)
    Scenario: Lily eats pizza for her vitamin C deficiency.
    Answer: Based on the subclasses of Pizza's containing Tomato, and Tomato have vitamin C, eating a pizza for a vitamin C deficiency is consistent. Consistent.

Context ontology:
{{context}}

Scenario:
{{question}}
"""
)
print(custom_prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} template='You will be given an ontology context, and a scenario. Find out based on the ontology context if the scenario is consistent or not with the ontology, you may use reasoning, you can think out loud. The last word should be "Consistent." or "Inconsistent.".\n    For example: \n    Context ontology: (contains information on pizza containing tomato, tomato containing vitamin C)\n    Scenario: Lily eats pizza for her vitamin C deficiency.\n    Answer: Based on the subclasses of Pizza\'s containing Tomato, and Tomato have vitamin C, eating a pizza for a vitamin C deficiency is consistent. Consistent.\n\nContext ontology:\n{context}\n\nScenario:\n{question}\n'


In [65]:
question = "Alex is 15, and is married to a girl called Lily."
k = 5
retrieved_docs = vector_store.similarity_search(question, k=k)
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
rendered_prompt = custom_prompt.invoke({"question": question, "context": docs_content})
answer = llm.invoke(rendered_prompt)

In [67]:
retrieved_docs

[Document(id='f83d4c64-d8a9-4cd0-9eb8-4c1c0eef5593', metadata={}, page_content='@prefix owl: <http://www.w3.org/2002/07/owl#> .\n@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\n<http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Birth> a owl:Class ;\n    rdfs:subClassOf <http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#LifeEvent> .\n\n[] a owl:AllDisjointClasses ;\n    owl:members ( <http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Birth> <http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Death> <http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Marriage> ) .\n\n'),
 Document(id='a5f98919-e0f4-4049-910f-85836c8e273d', metadata={}, page_content='@prefix owl: <http://www.w3.org/2002/07/owl#> .\n@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n

In [61]:
docs_content

'@prefix owl: <http://www.w3.org/2002/07/owl#> .\n@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\n<http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Birth> a owl:Class ;\n    rdfs:subClassOf <http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#LifeEvent> .\n\n[] a owl:AllDisjointClasses ;\n    owl:members ( <http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Birth> <http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Death> <http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Marriage> ) .\n\n\n\n@prefix owl: <http://www.w3.org/2002/07/owl#> .\n@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\n<http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Death> a owl:Class ;\n    rdfs:subClassOf <http://www.semanticweb.org/izabo/ontologie

In [None]:
import re

def scenario_splitter(text): ## right now just split every sentence into 2.
    # Split the text into sentences using a regex that keeps punctuation
    sentences = re.findall(r'[^.!?]+[.!?]', text)
    # Strip extra spaces
    sentences = [s.strip() for s in sentences if s.strip()]
    # Group every two sentences together
    pairs = [' '.join(sentences[i:i+2]) for i in range(0, len(sentences), 2)]
    return pairs



In [None]:
def scenario_splitter_llm(text, llm):
    system_prompt = ( ### deze hele prompt moet beter
            "You are an assistant that extracts logical or factual 'scenarios' "
            "from a given story. A scenario is a self-contained statement or short passage "
            "that can be individually checked for correctness or consistency.\n\n"
            "Guidelines:\n"
            "- Each scenario should capture a single checkable fact, event, or claim.\n"
            "- Avoid redundancy — each scenario should be distinct.\n"
            "- Keep scenarios concise (one or two sentences max).\n"
            "- Preserve important details like names, places, or relationships.\n\n"
            "Return the scenarios as a numbered list."
        )

    # Combine system and user input
    prompt = f"{system_prompt}\n\nStory:\n{text}\n\nScenarios:\n"

    # Query the LLM
    response = llm.invoke(prompt)
    if hasattr(response, "content"):  # LangChain AIMessage
        output_text = response.content
    elif isinstance(response, dict) and "content" in response:
        output_text = response["content"]
    elif isinstance(response, str):
        output_text = response
    else:
        raise TypeError(f"Unexpected LLM response type: {type(response)}")

    # Extract list items using regex
    import re
    scenarios = re.findall(r'(?:\d+\.\s*)(.+)', output_text)
    scenarios = [s.strip() for s in scenarios if s.strip()]

    # If the LLM doesnt number them, fallback to line splitting
    if not scenarios:
        scenarios = [line.strip("-• \t") for line in output_text.splitlines() if line.strip()]

    return scenarios


In [None]:

story = """John is 15 years old and is on vacation with his wife Amira in Italy. Their daughter Anna can’t wait to visit the Eiffel Tower, but first they will go out to eat. John suggests they eat pizzas since Italy is famous for them, they will eat at restaurant Riccolo located in Florence.
Since Anna has a vitamin C deficiency she will order a pizza that contains lots of vitamin C, so she will get the pizza Bianca. John will get the classic Margherita pizza and Amira orders a pepperoni pizza.
They sit by the window of the small restaurant; the air filled with the smell of garlic and baking dough. Anna swings her legs impatiently under the table, still talking about the Eiffel Tower, while Amira flips through a guidebook about Florence.
15 minutes later the farmer named Leo from the restaurant brings their pizzas and they eat, and Anna says to John how cool it is that the waiter is from France."""


In [16]:
def batch_query(scenarios, vector_store, custom_prompt, llm, k=5):
    """
    Runs similarity search and LLM inference for each question in a list.

    Parameters:
        scenarios (list[str]): A list of scenario strings.
        vector_store: Vector store object with a `similarity_search` method.
        custom_prompt: Prompt template object with an `invoke` method.
        llm: Language model object with an `invoke` method.
        k (int): Number of documents to retrieve per question.

    Returns:
        list[dict]: A list of dictionaries containing 'question', 'context', and 'answer'.
    """
    results = []

    for scenario in scenarios:
        # 1. Retrieve similar documents
        retrieved_docs = vector_store.similarity_search(scenario, k=k)

        # 2. Join retrieved documents into one context string
        docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

        # 3. Render prompt
        rendered_prompt = custom_prompt.invoke({
            "question": scenario,
            "context": docs_content
        })

        # 4. Get model answer
        answer = llm.invoke(rendered_prompt)

        # 5. Store result
        results.append({
            "question": scenario,
            "context": docs_content,
            "answer": answer
        })

    return results



In [17]:
scenarios = scenario_splitter(story)
results = batch_query(scenarios, vector_store, custom_prompt, llm, k=5)

In [29]:
scenarios2 = scenario_splitter_llm(story, llm)

In [30]:
scenarios2

['John is married to Amira.',
 'Amira has a vitamin C deficiency.',
 'Anna will order the Bianca pizza with lots of vitamin C for her deficiency at Riccolo restaurant in Florence, Italy on vacation with John and Amira.',
 'Their daughter Anna cannot wait to visit the Eiffel Tower during their trip to Italy but first wants to eat a Bianca pizza due to her vitamin C needs.',
 'John suggests they have Margherita or pepperoni at Riccolo restaurant, while Amira gets one with lots of Vitamin C for health reasons and Anna chooses based on the amount needed to address her deficiency but also considers wanting a traditional Italian pizza she likes best among them in their window seat.',
 "John is 15 years old; he's vacationing with his wife Amira, who cannot hear due to wearing headphones while reading about Florence landmarks and history - unaware of Anna’s impatience as they sit by the restaurant window waiting for their pizzas from a local French farmer waiter named Leo.",
 "After 15 minutes

In [None]:
## to do: create a better ontology block maker.
## to do: create better "scenarios" (s.t. the LLM can better be presented with the fact)
## to do: reconstruct the story
## Write one-shot examples
## Try it with a better LLM

