In [1]:
!pip install dotenv neo4j

Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Collecting python-dotenv (from dotenv)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv, neo4j, dotenv
Successfully installed dotenv-0.9.9 neo4j-5.28.1 python-dotenv-1.1.0


In [2]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv(dotenv_path=r"/kaggle/input/secrets/.env")

# Retrieve the Hugging Face token
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("HF_TOKEN not found in environment variables")
else:
    print("HF_TOKEN loaded successfully")

# Neo4j Credentials
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE", "neo4j")

# Check if all are loaded
if not all([NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD, NEO4J_DATABASE]):
    raise ValueError("One or more Neo4j credentials are missing in environment variables")

print("Neo4j credentials loaded successfully")

HF_TOKEN loaded successfully
Neo4j credentials loaded successfully


In [3]:
# user_query = "What are eBay's listing requirements?"
# user_query = "What laws govern the eBay User Agreement for U.S. users?"
user_query = "What ebay may terminate?"
# user_query = "Tell about Ebay servies?"

entities_in_query = user_query.split()

print("Entities in user query:", entities_in_query)

Entities in user query: ['What', 'ebay', 'may', 'terminate?']


In [4]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

def retrieve_relevant_triplets(entities):
    # query = """
    # MATCH (a)-[r]->(b)
    # WHERE a.name IN $entities OR b.name IN $entities
    # RETURN a.name AS subject, type(r) AS relation, b.name AS object
    # LIMIT 10
    # """
    # query = """
    # MATCH (a)-[r]->(b)
    # WHERE ANY(e IN $entities WHERE a.name CONTAINS e OR b.name CONTAINS e)
    # RETURN a.name AS subject, type(r) AS relation, b.name AS object
    # LIMIT 30
    # """
    query = """
    MATCH (a)-[r]->(b)
        WHERE ANY(e IN $entities WHERE toLower(a.name) CONTAINS toLower(e) OR toLower(b.name) CONTAINS toLower(e))
        RETURN a.name AS subject, type(r) AS relation, b.name AS object
        LIMIT 30
    """
    with driver.session() as session:
        result = session.run(query, entities=entities)
        return [f"{row['subject']} {row['relation']} {row['object']}" for row in result]

In [5]:
# import torch
# from transformers import pipeline

# # Initialize the LLaMA 3.2B Instruct model for chat-style input
# model_id = "meta-llama/Llama-3.2-3B-Instruct"
# llm = pipeline(
#     "text-generation",
#     model=model_id,
#     torch_dtype=torch.bfloat16,
#     device_map="cuda",
# )

# # Step 1: Get relevant graph facts
# triplets = retrieve_relevant_triplets(entities_in_query)

# # Step 2: Build a context-aware question prompt
# def build_prompt(context_triplets, question):
#     # if not context_triplets:
#     #     context = "No relevant facts were found in the knowledge graph."
#     # else:
#     context = "\n".join(f"- {triplet}" for triplet in context_triplets)
#     print(context)
    
#     return f"""Answer the user's question strictly using the knowledge graph facts provided.
# If the answer is not directly supported by the facts, respond with "I don't have enough information to answer that."
# And tell what you can tell
# Facts:
# {context}

# Question: {question}
# Answer:"""

# prompt = build_prompt(triplets, user_query)

# # Step 3: Wrap the prompt into chat-style input
# messages = [
#     {
#         "role": "system",
#         "content": (
#             "You are a precise assistant specialized in answering questions using knowledge graph data. "
#             "You only use the facts provided and avoid speculation."
#         ),
#     },
#     {"role": "user", "content": prompt},
# ]

# # Step 4: Run the model
# outputs = llm(messages, max_new_tokens=200, do_sample=False)

# # Step 5: Extract and print response
# response_text = outputs[0]["generated_text"]
# cleaned_response = "\n\nAnswer: " + response_text[2]['content'].split("Answer:")[-1].strip()
# print(cleaned_response)

In [6]:
driver.close()

In [8]:
import os
import json
import requests
from neo4j import GraphDatabase

# -- Neo4j Setup --
NEO4J_URI = os.environ["NEO4J_URI"]
NEO4J_USERNAME = os.environ["NEO4J_USERNAME"]
NEO4J_PASSWORD = os.environ["NEO4J_PASSWORD"]
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

def retrieve_relevant_triplets(entities):
    query = """
    MATCH (a)-[r]->(b)
    WHERE ANY(e IN $entities WHERE toLower(a.name) CONTAINS toLower(e) OR toLower(b.name) CONTAINS toLower(e))
    RETURN a.name AS subject, type(r) AS relation, b.name AS object
    LIMIT 30
    """
    with driver.session() as session:
        result = session.run(query, entities=entities)
        return [f"{row['subject']} {row['relation']} {row['object']}" for row in result]

# -- Prompt Construction --
def build_prompt(context_triplets, question):
    context = "\n".join(f"- {triplet}" for triplet in context_triplets) if context_triplets else "No relevant facts were found in the knowledge graph."
    return f"""Answer the user's question strictly using the knowledge graph facts provided.
If the answer is not directly supported by the facts, respond with "I don't have enough information to answer that."
And tell what you can tell.
Facts:
{context}

Question: {question}
Answer:"""

# -- Hugging Face Streaming Chat Completion --
API_URL = "https://router.huggingface.co/sambanova/v1/chat/completions"
headers = {
    "Authorization": f"Bearer {os.environ['HF_TOKEN']}",
}

def query_llm(messages):
    payload = {
        "model": "Meta-Llama-3.2-3B-Instruct",
        "messages": messages,
        "stream": True,
    }
    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
    for line in response.iter_lines():
        if not line.startswith(b"data:"):
            continue
        if line.strip() == b"data: [DONE]":
            return
        yield json.loads(line.decode("utf-8").lstrip("data:").rstrip("/n"))

# -- Main Workflow --
if __name__ == "__main__":
    user_query = "What ebay may terminate?"
    entities_in_query = user_query.split()  # Simple token-based entity extraction

    print("Entities in user query:", entities_in_query)

    # Step 1: Get KG triplets
    triplets = retrieve_relevant_triplets(entities_in_query)

    # Step 2: Build prompt
    prompt = build_prompt(triplets, user_query)

    # Step 3: Prepare chat-style input
    messages = [
        {
            "role": "system",
            "content": (
                "You are a precise assistant specialized in answering questions using knowledge graph data. "
                "You only use the facts provided and avoid speculation."
            ),
        },
        {"role": "user", "content": prompt},
    ]

    # Step 4: Query LLM and stream response
    print("\nAnswer:", end=" ", flush=True)
    for chunk in query_llm(messages):
        content = chunk["choices"][0]["delta"].get("content", "")
        print(content, end="", flush=True)

Entities in user query: ['What', 'ebay', 'may', 'terminate?']

Answer: According to the facts, eBay may terminate:

1. Our Services
2. Anyone
3. This Agreement
4. At any time
5. With notice
6. By giving notice
7. The Services