
# Minimal Consistent-Narrative Agent (PoC)
A clean, from-scratch notebook that runs locally with your own LLaMA via **Ollama** and performs:
1) **Chunking** (sentence-based)  
2) **Concept & relation extraction** (`subject | relation | object` lines)  
3) **Ontology RAG retrieval** from an RDF file parsed with RDFLib and indexed in **Chroma** using **semantic embeddings**.

> Keep it simple: no LangGraph in this minimal version. You can drop these functions into your Module‑1 nodes later.


In [27]:

import sys, platform
print("Python:", sys.executable)
print("Version:", sys.version)
print("OS:", platform.platform())

# TIP: If any import below fails, uncomment and run the installers.
# import subprocess
# pip_install(["rdflib", "langchain-ollama", "langchain-chroma", "chromadb", "sentence-transformers", "langchain-community"])


Python: c:\Users\Owen\Documents\ia\part2\local-langchain-academy-main\local-langchain-academy-main\.venv\Scripts\python.exe
Version: 3.11.5 (tags/v3.11.5:cce6ba9, Aug 24 2023, 14:38:34) [MSC v.1936 64 bit (AMD64)]
OS: Windows-10-10.0.26100-SP0


In [28]:

# --- Core imports
from rdflib import Graph, RDF, OWL
from typing import List, Tuple
import re

# --- Local LLM for generation (Ollama)
from langchain_ollama import ChatOllama

# --- Embeddings + Vector store for RAG
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# Initialize your local LLM (change model name if needed)
llm = ChatOllama(
    model="phi3:mini",
)

g = Graph()
g.parse("./Ontology_Assignment.rdf")
ontology_blocks = []
for s in g.subjects(RDF.type, OWL.Class):
    # Get all triples related to that class
    related_triples = g.triples((s, None, None))
    subgraph = Graph()
    for t in related_triples:
        subgraph.add(t)
    # Serialize the subgraph as RDF/XML string
    rdf_xml = subgraph.serialize(format="xml")
    ontology_blocks.append(rdf_xml)
    print(rdf_xml)


# Semantic embeddings (CPU, fast). If your environment blocks remote code execution,
# switch to 'sentence-transformers/all-MiniLM-L6-v2' without model_kwargs.
embeddings = HuggingFaceEmbeddings(
    model_name="nomic-ai/nomic-embed-text-v1",
    model_kwargs={"trust_remote_code": True}
)

vectorstore = Chroma.from_texts(
    texts=ontology_blocks,
    embedding=embeddings,
    collection_name="ontology",
    persist_directory="./chroma_ontology_store"
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 5})


<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
   xmlns:owl="http://www.w3.org/2002/07/owl#"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
>
  <rdf:Description rdf:nodeID="N62bd6465b7d049f1a5ab1683a18b9539">
    <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#Class"/>
    <owl:unionOf rdf:nodeID="Ndb7beaaf98224de4808c22eee86b3af0"/>
  </rdf:Description>
</rdf:RDF>

<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
   xmlns:owl="http://www.w3.org/2002/07/owl#"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
>
  <rdf:Description rdf:nodeID="N63674010cfcb419885a0c16a3636d224">
    <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#Class"/>
    <owl:unionOf rdf:nodeID="Ne2274d5499c2448792a320188942afe7"/>
  </rdf:Description>
</rdf:RDF>

<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
>
  <rdf:Description rdf:about="http://www.semanticweb.org/izab

<All keys matched successfully>


In [30]:

def split_into_sentences(text: str) -> List[str]:
    # Simple sentence splitter; good enough for PoC. Adjust as needed.
    sents = re.split(r"(?<=[.!?])\s+", text.strip())
    return [s for s in sents if s]

def chunk_story_by_sentences(story: str, group_n: int = 1) -> List[str]:
    """Return chunks with exactly `group_n` sentences per chunk (last may be shorter)."""
    sents = split_into_sentences(story)
    if group_n <= 1:
        return sents
    chunks = []
    for i in range(0, len(sents), group_n):
        chunks.append(" ".join(sents[i:i+group_n]))
    return chunks

# Quick demo
_demo = """Alex is 15 years old and is on vacation with his wife Amira in Italy.
Their daughter Anna can't wait to visit the Eiffel Tower, but first they will eat pizza in Florence.
Since Anna has a vitamin C deficiency, she orders orange juice with her meal.
Meanwhile, Alex and Amira discuss their plans to visit the Colosseum next week.
They are excited about seeing more of Italy and learning about its history."""

print("Sentence-per-chunk demo:")
for i, ch in enumerate(chunk_story_by_sentences(_demo, group_n=1), 1):
    print(f" Chunk {i} \n{ch}\n")


Sentence-per-chunk demo:
 Chunk 1 
Alex is 15 years old and is on vacation with his wife Amira in Italy.

 Chunk 2 
Their daughter Anna can't wait to visit the Eiffel Tower, but first they will eat pizza in Florence.

 Chunk 3 
Since Anna has a vitamin C deficiency, she orders orange juice with her meal.

 Chunk 4 
Meanwhile, Alex and Amira discuss their plans to visit the Colosseum next week.

 Chunk 5 
They are excited about seeing more of Italy and learning about its history.



In [None]:
from typing import List
import re

def extract_facts_from_chunks(chunks: List[str]) -> List[str]:
    """
    Ask the local LLM to list factual statements from each chunk.
    Returns a flat list of fact strings (one per fact).
    """
    all_facts = []

    for i, ch in enumerate(chunks, 1):
        print(f"Extracting facts from chunk {i}/{len(chunks)}...")
        prompt = f"""
Read the following text and list all clear factual statements it contains.
Write each fact as a short, standalone sentence.
Do not speculate. Only write facts you are sure of you can derive from this text.
You may derive facts from other things, such as locations in locations: example, if we are in Florence and visiting the Eiffel today, the Eiffel Tower is in Florence etc.

Text:
{ch}

Facts:
"""
        resp = llm.invoke(prompt)
        text = getattr(resp, "content", str(resp)).strip()

        # Split into individual facts (handles -, *, newlines)
        facts = [ln.strip(" -*\n\r\t") for ln in text.splitlines() if ln.strip()]
        facts = [f for f in facts if len(f.split()) > 2]  # skip garbage
        all_facts.extend(facts)
    print(f"Extracted {len(all_facts)} total facts.")
    return all_facts


In [36]:
chunks = chunk_story_by_sentences(_demo, group_n=1)
facts_list = extract_facts_from_chunks(chunks)

for i, facts in enumerate(facts_list, 1):
    print(f"\n--- Fact {i} ---\n{facts}")


Extracting facts from chunk 1/5...
Extracting facts from chunk 2/5...
Extracting facts from chunk 3/5...
Extracting facts from chunk 4/5...
Extracting facts from chunk 5/5...
Extracted 15 total facts.

--- Fact 1 ---
Alex is 15 years old.

--- Fact 2 ---
Alex's wife is named Amira.

--- Fact 3 ---
Alex and Amira are currently in Italy together for a vacation.

--- Fact 4 ---
The speaker and their family are planning a trip where eating pizza in Florence comes before a visit to the Eiffel Tower.

--- Fact 5 ---
Anna is excited about potentially seeing the Eiffel Tower during this trip.

--- Fact 6 ---
Anna has a vitamin C deficiency.

--- Fact 7 ---
Oranges are rich in vitamin C.

--- Fact 8 ---
Orange juice is made from oranges.

--- Fact 9 ---
Anna includes orange juice as part of her meal routine to manage her condition.

--- Fact 10 ---
Alex and Amira are planning a trip.

--- Fact 11 ---
They plan to go to Rome.

--- Fact 12 ---
The planned date for their trip is next week.

--- Fa

In [37]:
facts_list

['Alex is 15 years old.',
 "Alex's wife is named Amira.",
 'Alex and Amira are currently in Italy together for a vacation.',
 'The speaker and their family are planning a trip where eating pizza in Florence comes before a visit to the Eiffel Tower.',
 'Anna is excited about potentially seeing the Eiffel Tower during this trip.',
 'Anna has a vitamin C deficiency.',
 'Oranges are rich in vitamin C.',
 'Orange juice is made from oranges.',
 'Anna includes orange juice as part of her meal routine to manage her condition.',
 'Alex and Amira are planning a trip.',
 'They plan to go to Rome.',
 'The planned date for their trip is next week.',
 'Their destination of choice within Italy is the Colosseum.',
 'They want to see more of Italy.',
 'They are interested in Italian history.']

In [38]:
def retrieve_constraints_for_facts(facts: List[str], retriever, top_k: int = 5):
    """
    For each fact (text string), retrieve the top-k relevant ontology triples.
    Returns a dict: {fact: [ontology lines...]}.
    """
    results = {}
    for i, fact in enumerate(facts, 1):
        print(f"Retrieving ontology constraints for fact {i}/{len(facts)}...")
        retrieved_docs = retriever.invoke(fact)
        lines = [doc.page_content for doc in retrieved_docs]
        results[fact] = lines
    return results

In [39]:
constraints = retrieve_constraints_for_facts(facts_list, retriever)

# Show results
for fact, lines in constraints.items():
    print(f"\nFact: {fact}")
    for l in lines:
        print("   •", l)


Retrieving ontology constraints for fact 1/15...
Retrieving ontology constraints for fact 2/15...
Retrieving ontology constraints for fact 3/15...
Retrieving ontology constraints for fact 4/15...
Retrieving ontology constraints for fact 5/15...
Retrieving ontology constraints for fact 6/15...
Retrieving ontology constraints for fact 7/15...
Retrieving ontology constraints for fact 8/15...
Retrieving ontology constraints for fact 9/15...
Retrieving ontology constraints for fact 10/15...
Retrieving ontology constraints for fact 11/15...
Retrieving ontology constraints for fact 12/15...
Retrieving ontology constraints for fact 13/15...
Retrieving ontology constraints for fact 14/15...
Retrieving ontology constraints for fact 15/15...

Fact: Alex is 15 years old.
   • N05e808d68be74259807631aa7c92f6a5 http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Adult
   • Nb5539025d74f4d4ba69c8144aa509e93 http://www.w3.org/2001/XML

In [40]:
from typing import List, Dict

def evaluate_fact_consistency(facts: List[str], constraints: Dict[str, List[str]], llm, verbose=True):
    """
    Evaluate the consistency of each fact against its retrieved ontology constraints.
    Calls the LLM once per fact.

    Args:
        facts: List of factual statements (strings).
        constraints: Dict mapping each fact -> list of ontology constraint lines.
        llm: LLM instance (e.g. ChatOllama(model='phi3:mini')).
        verbose: Print intermediate results (default: True).

    Returns:
        A list of dicts:
        [
            {
                "fact": str,
                "constraints": [str, ...],
                "assessment": "consistent" | "inconsistent" | "uncertain",
                "explanation": str
            },
            ...
        ]
    """
    results = []

    for i, fact in enumerate(facts, 1):
        cons = constraints.get(fact, [])
        if not cons:
            if verbose:
                print(f"⚪ Skipping fact {i}: no constraints retrieved.")
            continue

        constraint_text = "\n".join(f"- {c}" for c in cons)

        prompt = f"""
You are a logical reasoning assistant.

FACT:
{fact}

ONTOLOGY CONSTRAINTS:
{constraint_text}

TASK:
Determine whether the fact above is consistent or inconsistent with the ontology constraints.
Answer with one of: "consistent", "inconsistent", or "uncertain".
Then explain briefly why.

Format your answer as:
Result: <consistent/inconsistent/uncertain>
Reason: <one sentence reason>
"""

        if verbose:
            print(f"\n Evaluating fact {i}/{len(facts)}:\n{fact}")

        try:
            resp = llm.invoke(prompt)
            text = getattr(resp, "content", str(resp)).strip()

            # Parse the output robustly
            result = {
                "fact": fact,
                "constraints": cons,
                "assessment": "uncertain",
                "explanation": text
            }

            # Try to extract a structured label
            lowered = text.lower()
            if "consistent" in lowered and "inconsistent" not in lowered:
                result["assessment"] = "consistent"
            elif "inconsistent" in lowered:
                result["assessment"] = "inconsistent"

            results.append(result)

        except Exception as e:
            print(f"LLM failed for fact {i}: {e}")

    return results


In [41]:
constraints

{'Alex is 15 years old.': ['N05e808d68be74259807631aa7c92f6a5 http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Adult',
  'Nb5539025d74f4d4ba69c8144aa509e93 http://www.w3.org/2001/XMLSchema#minInclusive 18',
  'Nb19f2501c13d493fa2e497609c37a6d1 http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Person',
  'N6b821ed43a854fb3b72635bc543d38eb http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Adult',
  'Nfd148ef8c6884c6d9b9430ea297052ca http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#Birth'],
 "Alex's wife is named Amira.": ['http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#John http://www.semanticweb.org/izabo/ontologies/2025/8/untitled-ontology-8#isMarriedTo http://www.semanticweb.org/izabo

In [None]:
result = evaluate_fact_consistency(facts=facts_list,constraints=constraints,llm=llm)


 Evaluating fact 1/15:
Alex is 15 years old.

 Evaluating fact 2/15:
Alex's wife is named Amira.

 Evaluating fact 3/15:
Alex and Amira are currently in Italy together for a vacation.

 Evaluating fact 4/15:
The speaker and their family are planning a trip where eating pizza in Florence comes before a visit to the Eiffel Tower.

 Evaluating fact 5/15:
Anna is excited about potentially seeing the Eiffel Tower during this trip.

 Evaluating fact 6/15:
Anna has a vitamin C deficiency.
