In [None]:
from openai import OpenAI
import json

# Ollama OpenAI-compatible endpoint
client = OpenAI(
    base_url="http://localhost:11434/v1",
    api_key="ollama",
)

def chat(messages, model, temperature=0, **config):
    """Generic chat call, returns full message object."""
    response = client.chat.completions.create(
        model=model,
        temperature=temperature,
        messages=messages,
        **config,
    )
    return response.choices[0].message


In [None]:
router_tools = [
    {
        "type": "function",
        "function": {
            "name": "select_graph",
            "description": "Select which company graph DB (meta, google, or nvidia) is most relevant to answer the question.",
            "parameters": {
                "type": "object",
                "properties": {
                    "graph": {
                        "type": "string",
                        "description": "The selected graph database.",
                        "enum": ["meta", "google", "nvidia"]
                    },
                    "confidence": {
                        "type": "number",
                        "description": "Confidence between 0 and 1 that this is the right graph.",
                    },
                    "reason": {
                        "type": "string",
                        "description": "Short explanation of why this graph was selected."
                    }
                },
                "required": ["graph"]
            }
        }
    }
]


In [None]:
def route_to_graph(question: str, model: str = "mistral:latest") -> dict:
    """
    Uses function calling to choose which graph db to query.
    Returns dict like: {"graph": "meta", "confidence": 0.93, "reason": "..."}
    """
    messages = [
        {
            "role": "system",
            "content": (
                "You are a router. Your job is to choose which company's knowledge graph "
                "should be queried to answer the user's question. "
                "Options: meta, google, nvidia."
            )
        },
        {
            "role": "user",
            "content": f"Decide which graph to use for this question: '{question}'"
        },
    ]

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=router_tools,
        tool_choice="auto",
        temperature=0,
    )

    message = response.choices[0].message

    if not message.tool_calls:
        # Fallback: guess "meta" if nothing is returned
        return {"graph": "meta", "confidence": 0.5, "reason": "No tool call, defaulting to meta."}

    tool_call = message.tool_calls[0]
    args = json.loads(tool_call.function.arguments)

    # Ensure defaults
    return {
        "graph": args.get("graph", "meta"),
        "confidence": float(args.get("confidence", 0.5)),
        "reason": args.get("reason", ""),
    }


In [None]:
from neo4j import GraphDatabase
import numpy as np
# from sentence_transformers import SentenceTransformer  # example

# driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "graphgraph"))
embed_model = SentenceTransformer("all-MiniLM-L6-v2")  # example

def retrieve_from_meta(question: str, k: int = 4) -> list[dict]:
    """
    Retrieve top-k similar chunks from the Meta Neo4j graph using vector index 'pdf'.
    Returns list of dicts with keys: text, score, index.
    """
    question_embedding = embed_model.encode([question])[0].tolist()

    cypher = """
    CALL db.index.vector.queryNodes('pdf', $k, $question_embedding) 
    YIELD node AS hits, score
    RETURN hits.text AS text, score, hits.index AS index
    """

    records, _, _ = driver.execute_query(
        cypher,
        question_embedding=question_embedding,
        k=k,
        database_="neo4j",  # or your db name
    )

    # Convert from Neo4j record objects to plain dicts
    return [
        {"text": r["text"], "score": r["score"], "index": r["index"]}
        for r in records
    ]


def retrieve_from_google(question: str, k: int = 4) -> list[dict]:
    # Placeholder for future Google KG implementation
    raise NotImplementedError("Google graph retriever not implemented yet.")


def retrieve_from_nvidia(question: str, k: int = 4) -> list[dict]:
    # Placeholder for future NVIDIA KG implementation
    raise NotImplementedError("NVIDIA graph retriever not implemented yet.")


In [None]:
def retrieve_docs(question: str, graph_choice: str, k: int = 4) -> list[dict]:
    """
    Dispatch to the appropriate retriever based on graph_choice.
    """
    if graph_choice == "meta":
        return retrieve_from_meta(question, k=k)
    elif graph_choice == "google":
        return retrieve_from_google(question, k=k)
    elif graph_choice == "nvidia":
        return retrieve_from_nvidia(question, k=k)
    else:
        raise ValueError(f"Unknown graph choice: {graph_choice}")


In [None]:
ANSWER_SYSTEM_PROMPT = """
You're a company ESG expert. 
You can only use the provided documents to answer the user's question.
If the answer is not present or not fully specified in the documents, say: "I don't know based on these documents."
You may see HTML structures (tables); interpret them as data.
"""

def answer_with_docs(question: str, docs: list[dict], model: str = "mistral:latest") -> str:
    docs_text = [doc["text"] for doc in docs]

    user_message = f"""
Use the following documents to answer the question.
Only use information from these documents; if you don't know, say you don't know.

Documents:
{docs_text}

---

Question: {question}
"""

    msg = chat(
        [
            {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
            {"role": "user", "content": user_message},
        ],
        model=model,
        temperature=0,
    )
    return msg.content


In [None]:
def main(question: str) -> str:

    # 2) Route to the right graph
    routing = route_to_graph(question, model="mistral:latest")
    graph_choice = routing["graph"]

    print(f"[ROUTER] Chosen graph: {graph_choice} (conf={routing['confidence']:.2f})")
    if routing["reason"]:
        print(f"[ROUTER REASON] {routing['reason']}")

    # 3) Retrieve docs (for now only meta works)
    docs = retrieve_docs(question, graph_choice, k=4)

    if not docs:
        return "I couldn't find any relevant documents in the selected graph."

    # 4) Answer using Mistral based only on retrieved docs
    final_answer = answer_with_docs(question, docs, model="mistral:latest")
    return final_answer


In [None]:
print(main("meta scope 1 emissions"))