In [None]:
from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, URIRef

rdflib imported successfully ✅


# Create Ontology

In [None]:
ONTOLOGY_TTL = """
@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix owl:  <http://www.w3.org/2002/07/owl#> .
@prefix xsd:  <http://www.w3.org/2001/XMLSchema#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix ex:   <http://rahaf.org/project#> .

##################################################
# Classes
##################################################

ex:Project a owl:Class ;
    rdfs:label "Project" .

ex:Task a owl:Class ;
    rdfs:label "Task" .

ex:Agent a owl:Class ;
    rdfs:subClassOf foaf:Person ;
    rdfs:label "Agent" .

ex:Decision a owl:Class ;
    rdfs:label "Decision" .

##################################################
# Object properties
##################################################

ex:assignedTo a owl:ObjectProperty ;
    rdfs:domain ex:Task ;
    rdfs:range  ex:Agent ;
    rdfs:label "assigned to" .

ex:dependsOn a owl:ObjectProperty ;
    rdfs:domain ex:Task ;
    rdfs:range  ex:Task ;
    rdfs:label "depends on" .

ex:relatedToProject a owl:ObjectProperty ;
    rdfs:domain ex:Task ;
    rdfs:range  ex:Project ;
    rdfs:label "related to project" .

ex:wasDecidedBy a owl:ObjectProperty ;
    rdfs:domain ex:Decision ;
    rdfs:range  ex:Agent ;
    rdfs:label "was decided by" .

ex:concernsTask a owl:ObjectProperty ;
    rdfs:domain ex:Decision ;
    rdfs:range  ex:Task ;
    rdfs:label "concerns task" .

##################################################
# Datatype properties
##################################################

ex:hasStatus a owl:DatatypeProperty ;
    rdfs:domain ex:Task ;
    rdfs:range  xsd:string ;
    rdfs:label "has status" .
"""

# Load ontology into a graph
g_onto = Graph()
g_onto.parse(data=ONTOLOGY_TTL, format="turtle")

print(f"Ontology loaded with {len(g_onto)} triples ✅")


Ontology loaded with 33 triples ✅


1.1- Populate ontology

In [None]:
DATA_TTL = """
@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix owl:  <http://www.w3.org/2002/07/owl#> .
@prefix xsd:  <http://www.w3.org/2001/XMLSchema#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix ex:   <https://rahaf.org/projects#> .

##################################################
# Project
##################################################

ex:Project_AttendanceSystem a ex:Project ;
    rdfs:label "Attendance System" .

##################################################
# Agents (people)
##################################################

ex:Agent_Lora a ex:Agent ;
    foaf:name "Lora" .

ex:Agent_Rahaf a ex:Agent ;
    foaf:name "Rahaf" .

ex:Agent_Sam a ex:Agent ;
    foaf:name "Sam" .

##################################################
# Tasks
##################################################

ex:Task_APIBackend a ex:Task ;
    rdfs:label "Implement API backend" ;
    ex:hasStatus "In Progress" ;
    ex:assignedTo ex:Agent_Lora ;
    ex:relatedToProject ex:Project_AttendanceSystem .

ex:Task_FrontendUI a ex:Task ;
    rdfs:label "Design frontend UI" ;
    ex:hasStatus "Not Started" ;
    ex:assignedTo ex:Agent_Rahaf ;
    ex:relatedToProject ex:Project_AttendanceSystem .

ex:Task_DataPipeline a ex:Task ;
    rdfs:label "Build data pipeline" ;
    ex:hasStatus "In Progress" ;
    ex:assignedTo ex:Agent_Lora ;
    ex:relatedToProject ex:Project_AttendanceSystem ;
    ex:dependsOn ex:Task_APIBackend .

ex:Task_Notifications a ex:Task ;
    rdfs:label "Implement notifications" ;
    ex:hasStatus "Blocked" ;
    ex:assignedTo ex:Agent_Rahaf ;
    ex:relatedToProject ex:Project_AttendanceSystem ;
    ex:dependsOn ex:Task_APIBackend .

ex:Task_Dashboard a ex:Task ;
    rdfs:label "Create analytics dashboard" ;
    ex:hasStatus "Not Started" ;
    ex:assignedTo ex:Agent_Sam ;
    ex:relatedToProject ex:Project_AttendanceSystem ;
    ex:dependsOn ex:Task_DataPipeline .

##################################################
# Decisions
##################################################

ex:Decision_RateLimit a ex:Decision ;
    rdfs:comment "Set API rate limit to 200 requests per minute." ;
    ex:wasDecidedBy ex:Agent_Sam ;
    ex:concernsTask ex:Task_APIBackend .

ex:Decision_Priority a ex:Decision ;
    rdfs:comment "Prioritize data pipeline before dashboard." ;
    ex:wasDecidedBy ex:Agent_Rahaf ;
    ex:concernsTask ex:Task_DataPipeline .
"""

# Load data into a separate graph
g_data = Graph()
g_data.parse(data=DATA_TTL, format="turtle")

print(f"Data graph loaded with {len(g_data)} triples ✅")


Data graph loaded with 44 triples ✅


In [24]:
g = g_onto + g_data
print(f"Combined graph has {len(g)} triples ✅")


Combined graph has 77 triples ✅


1.2- Querying the graph

In [None]:
# Make sure we have the same prefixes
EX = Namespace("https://rahaf.org/projects#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

query_tasks_agents = """
PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX ex:   <https://rahaf.org/projects#>

SELECT ?task ?taskLabel ?agentName
WHERE {
  ?task a ex:Task ;
        rdfs:label ?taskLabel ;
        ex:assignedTo ?agent .
  ?agent foaf:name ?agentName .
}
ORDER BY ?taskLabel
"""

print("Running SPARQL query...\n")

for row in g.query(query_tasks_agents):
    task_iri = row.task
    task_label = str(row.taskLabel)
    agent_name = str(row.agentName)
    print(f"- Task: {task_label:30s} | Assigned to: {agent_name}")


Running SPARQL query...

- Task: Build data pipeline            | Assigned to: Lora
- Task: Create analytics dashboard     | Assigned to: Sam
- Task: Design frontend UI             | Assigned to: Rahaf
- Task: Implement API backend          | Assigned to: Lora
- Task: Implement notifications        | Assigned to: Rahaf


In [None]:
task_label_input = "Build data pipeline" 

query_task_context = f"""
PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX ex:   <https://rahaf.org/projects#>

SELECT ?task ?status ?agentName ?projectLabel ?decisionText
WHERE {{
  ?task a ex:Task ;
        rdfs:label "{task_label_input}" ;
        ex:hasStatus ?status ;
        ex:assignedTo ?agent ;
        ex:relatedToProject ?project .

  ?agent foaf:name ?agentName .
  ?project rdfs:label ?projectLabel .

  OPTIONAL {{
    ?decision a ex:Decision ;
              ex:concernsTask ?task ;
              rdfs:comment ?decisionText .
  }}
}}
"""

print("Running task context query for label:", task_label_input, "\n")

for row in g.query(query_task_context):
    print("Task IRI         :", row.task)
    print("Status           :", row.status)
    print("Assigned to      :", row.agentName)
    print("Project          :", row.projectLabel)
    print("Decision (if any):", row.decisionText)
    print("-" * 60)


Running task context query for label: Build data pipeline 

Task IRI         : https://rahaf.org/projects#Task_DataPipeline
Status           : In Progress
Assigned to      : Lora
Project          : Attendance System
Decision (if any): Prioritize data pipeline before dashboard.
------------------------------------------------------------


In [27]:
from typing import Tuple, List, Optional

def get_task_context(task_label: str, graph: Graph) -> Tuple[Optional[str], List[dict]]:
    """
    Given a task label (e.g. 'Build data pipeline'),
    query the knowledge graph and return:
      - a nicely formatted text context (for LLM input)
      - the raw rows as a list of dicts

    If the task is not found, returns (None, []).
    """
    query = f"""
    PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX ex:   <https://rahaf.org/projects#>

    SELECT ?task ?status ?agentName ?projectLabel ?decisionText
    WHERE {{
      ?task a ex:Task ;
            rdfs:label "{task_label}" ;
            ex:hasStatus ?status ;
            ex:assignedTo ?agent ;
            ex:relatedToProject ?project .

      ?agent foaf:name ?agentName .
      ?project rdfs:label ?projectLabel .

      OPTIONAL {{
        ?decision a ex:Decision ;
                  ex:concernsTask ?task ;
                  rdfs:comment ?decisionText .
      }}
    }}
    """

    rows = list(graph.query(query))

    if not rows:
        print(f" No task found with label: {task_label}")
        return None, []

    # Extract core info (same for all rows)
    first = rows[0]
    task_iri = str(first.task)
    status = str(first.status)
    agent_name = str(first.agentName)
    project_label = str(first.projectLabel)

    # Collect all distinct decisions (if there are multiple rows)
    decisions = []
    for r in rows:
        if r.decisionText:
            d = str(r.decisionText)
            if d not in decisions:
                decisions.append(d)

    # Build a nicely formatted text block
    context_lines = [
        f"Task label: {task_label}",
        f"Task IRI: {task_iri}",
        f"Status: {status}",
        f"Assigned to: {agent_name}",
        f"Project: {project_label}",
    ]

    if decisions:
        context_lines.append("Decisions:")
        for d in decisions:
            context_lines.append(f"- {d}")
    else:
        context_lines.append("Decisions: None recorded in the knowledge graph.")

    context_text = "\n".join(context_lines)

    # Also prepare a structured version of the rows
    structured_rows = []
    for r in rows:
        structured_rows.append({
            "task": str(r.task),
            "status": str(r.status),
            "agentName": str(r.agentName),
            "projectLabel": str(r.projectLabel),
            "decisionText": str(r.decisionText) if r.decisionText else None,
        })

    return context_text, structured_rows


In [28]:
ctx, rows = get_task_context("Implement notifications", g)

print("===== CONTEXT TEXT =====")
print(ctx)

print("\n===== RAW ROWS (DEBUG) =====")
for r in rows:
    print(r)


===== CONTEXT TEXT =====
Task label: Implement notifications
Task IRI: https://rahaf.org/projects#Task_Notifications
Status: Blocked
Assigned to: Rahaf
Project: Attendance System
Decisions: None recorded in the knowledge graph.

===== RAW ROWS (DEBUG) =====
{'task': 'https://rahaf.org/projects#Task_Notifications', 'status': 'Blocked', 'agentName': 'Rahaf', 'projectLabel': 'Attendance System', 'decisionText': None}


# Stage 2

In [64]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_community.llms import HuggingFacePipeline

model_name = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
hf_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

gen_pipeline = pipeline(
    "text2text-generation",
    model=hf_model,
    tokenizer=tokenizer,
    max_new_tokens=256,
)

llm = HuggingFacePipeline(pipeline=gen_pipeline)

print("LLM is ready ✅")


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cuda:0


LLM is ready ✅


In [65]:
response = llm.invoke("Explain in one sentence what a Task in a project is.")
print(response)

Task is a task in a project.


In [88]:
from rdflib import Namespace
import re
import textwrap

EX = Namespace("https://rahaf.org/projects#")

def list_task_labels(graph: Graph):
    query = """
    PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX ex:   <https://rahaf.org/projects#>

    SELECT ?taskLabel
    WHERE {
      ?task a ex:Task ;
            rdfs:label ?taskLabel .
    }
    ORDER BY ?taskLabel
    """
    labels = [str(row.taskLabel) for row in graph.query(query)]
    return labels

def _normalize_tokens(text: str):
    # Lowercase and keep only alphanumeric "words"
    return re.findall(r"\w+", text.lower())

def guess_task_label_from_question(question: str, graph: Graph) -> str:
    """
    Better heuristic:
    - get all task labels from the KG
    - tokenize both label and question
    - compute overlap score = |label_tokens ∩ question_tokens| / |label_tokens|
    - pick label with best score
    """
    labels = list_task_labels(graph)
    q_tokens = set(_normalize_tokens(question))

    if not labels:
        raise ValueError("No tasks found in the knowledge graph.")

    best_label = None
    best_score = 0.0

    for label in labels:
        label_tokens = set(_normalize_tokens(label))
        if not label_tokens:
            continue

        overlap = label_tokens & q_tokens
        score = len(overlap) / len(label_tokens)

        if score > best_score:
            best_score = score
            best_label = label

    if best_label and best_score > 0:
        print(f"overlap score={best_score:.2f}")
        return best_label

    print(" No token overlap between question and labels.")
    print("   Available task labels:", labels)
    return None

def answer_question_with_graph(question: str, graph: Graph, llm) -> str:
    """
    Full mini GraphRAG pipeline:
    1) choose a task label based on the question
    2) get graph context for that task (SPARQL)
    3) ask the LLM to answer using ONLY that context
    """
    task_label = guess_task_label_from_question(question, graph)

    if task_label is None:
        return f"Could not find any task label"

    context_text, _ = get_task_context(task_label, graph)

    if context_text is None:
        return f"Could not find any task for question: {question}"

    prompt = f"""
    You are given some structured context about a project task.

    Context:
    {context_text}

    Question:
    {question}

    Answer the question using only the context above.
    If the answer is not in the context, say:
    "The knowledge graph does not contain this information."

    Answer:
    """

    # remove leading indentation for safety
    prompt = textwrap.dedent(prompt).strip()

    # print("===== GRAPH CONTEXT PASSED TO LLM =====")
    # print(context_text)
    # print("=======================================")

    answer = llm.invoke(prompt)
    return answer


In [94]:
# question = "Which tasks share the same assigned agent?"
questions={'q1':"Why is the notifications task blocked?",
           'q2':"Who is responsible for building the data pipeline",
           'q3':"What is the status of the task building the data pipeline ?",
           'q4':"Who is working on the frontend UI?",
           'q5':"What are the tasks that Rahaf is in charge of?"}
for question in questions.values():
  print("===== QUESTION =====")
  print(question)
  answer = answer_question_with_graph(question, g, llm)
  print("\n===== LLM ANSWER =====")
  print(answer)
  print("=================================================================\n")


===== QUESTION =====
Why is the notifications task blocked?
overlap score=0.50

===== LLM ANSWER =====
The knowledge graph does not contain this information

===== QUESTION =====
Who is responsible for building the data pipeline
overlap score=0.67

===== LLM ANSWER =====
Lora

===== QUESTION =====
What is the status of the task building the data pipeline ?
overlap score=0.67

===== LLM ANSWER =====
In Progress

===== QUESTION =====
Who is working on the frontend UI?
overlap score=0.67

===== LLM ANSWER =====
Rahaf

===== QUESTION =====
What are the tasks that Rahaf is in charge of?
 No token overlap between question and labels.
   Available task labels: ['Build data pipeline', 'Create analytics dashboard', 'Design frontend UI', 'Implement API backend', 'Implement notifications']

===== LLM ANSWER =====
Could not find any task label

