In [1]:
import os
from dotenv import load_dotenv
from langchain_neo4j import Neo4jGraph

_ = load_dotenv()
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

graph = Neo4jGraph(refresh_schema=False)

In [2]:
from neo4j import GraphDatabase

# Connect to the Neo4j database
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

# Define a function to run the APOC procedure
def export_all_to_json(tx):
    result = tx.run("CALL apoc.export.json.all(null, {stream: true})")
    return result.single()['data']

# Use the driver to run the function
with driver.session() as session:
    json_data = session.execute_read(export_all_to_json)

# Close the driver
driver.close()

# Now json_data contains the JSON representation of your entire graph
print(json_data)


{"type":"node","id":"16","labels":["Person"],"properties":{"id":"Marie Curie"}}
{"type":"node","id":"17","labels":["Person"],"properties":{"id":"Pierre Curie"}}
{"type":"node","id":"18","labels":["Organization"],"properties":{"id":"University Of Paris"}}
{"type":"node","id":"19","labels":["Award"],"properties":{"id":"Nobel Prize"}}
{"type":"relationship","id":"1152937997281263632","label":"WINNER","start":{"id":"16","labels":["Person"],"properties":{"id":"Marie Curie"}},"end":{"id":"19","labels":["Award"],"properties":{"id":"Nobel Prize"}}}
{"type":"relationship","id":"1152939096792891408","label":"SPOUSE","start":{"id":"16","labels":["Person"],"properties":{"id":"Marie Curie"}},"end":{"id":"17","labels":["Person"],"properties":{"id":"Pierre Curie"}}}
{"type":"relationship","id":"1152940196304519184","label":"PROFESSOR","start":{"id":"16","labels":["Person"],"properties":{"id":"Marie Curie"}},"end":{"id":"18","labels":["Organization"],"properties":{"id":"University Of Paris"}}}
{"type"

In [3]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain_ollama import OllamaLLM
from langchain_groq import ChatGroq
from langchain_google_genai import GoogleGenerativeAI

In [4]:



# llm = ChatOpenAI(model="gpt-4o-mini")
# llm = OllamaLLM(model="deepseek-r1:1.5b", temperature=0, format="json")
# llm = OllamaLLM(model="gemma3:4b", temperature=0, format="json")
# llm = OllamaLLM(model="mistral", temperature=0, format="json")

# llm = GoogleGenerativeAI(
#     model="gemini-2.0-flash",
#     api_key=os.getenv("GOOGLE_API_KEY"),
#     temperature=0,
#     max_tokens=None,
#     timeout=None,
#     max_retries=2,
#     # other params...    
# )
llm = ChatGroq(
    api_key=os.getenv("GROQ_API_KEY"),
    model=  "meta-llama/llama-4-scout-17b-16e-instruct", # "llama-3.3-70b-versatile", # "deepseek-r1-distill-llama-70b", # 
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)
llm_transformer = LLMGraphTransformer(llm=llm)

In [5]:
llm.invoke("Hi how are you?! what is going on?")

AIMessage(content="I'm doing well, thanks for asking! I'm a large language model, so I don't have feelings or emotions like humans do, but I'm always happy to chat with you and help with any questions or topics you'd like to discuss.\n\nAs for what's going on, I'm here and ready to assist you 24/7! I don't have real-time access to current events, but I can certainly try to help with any questions you have about the world or provide information on a wide range of topics.\n\nSo, how about you? How's your day going so far? Is there something specific on your mind that you'd like to talk about or learn more about? I'm all ears (or rather, all text)!", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 141, 'prompt_tokens': 20, 'total_tokens': 161, 'completion_time': 0.289478002, 'prompt_time': 0.002903674, 'queue_time': 1.3428619920000002, 'total_time': 0.292381676}, 'model_name': 'meta-llama/llama-4-scout-17b-16e-instruct', 'system_fingerprint': 'fp_5e21a8eaff',

In [6]:
from langchain_core.documents import Document

text = """
Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.
She was, in 1906, the first woman to become a professor at the University of Paris.
"""
documents = [Document(page_content=text)]
graph_documents = llm_transformer.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")
def clean_graph():
    query = """
    MATCH (n)
    DETACH DELETE n
    """
    graph.query(query)

clean_graph()
graph.add_graph_documents(graph_documents)

Nodes:[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={}), Node(id='Nobel Prize', type='Award', properties={})]
Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WINNER', properties={}), Relationship(source=Node(id='Pierre Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WINNER', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='PROFESSOR', properties={})]


In [None]:
##TODO NEO4J Knowledge Graph Evaluation ##

from neo4j import GraphDatabase

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

# 2. Ground Truth (Represent it in a way that's easy to compare)
ground_truth = {
    ("Marie_Curie", "BORN_IN", "1867"),
    ("Marie_Curie", "conducted", "research"),  # More general relation
    ("Marie_Curie", "conducted", "radioactivity"), # More specific relation
    ("Marie_Curie", "won", "Nobel_Prize"),
    ("Marie_Curie", "won", "Nobel_Prize"),  # Twice
    ("Marie_Curie", "professor_at", "University_of_Paris"),
    ("Pierre_Curie", "won", "Nobel_Prize"),
    ("Marie_Curie", "married_to", "Pierre_Curie"),  # Or a more specific relation
    ("Marie_Curie", "member_of", "Curie_family"),
    ("Pierre_Curie", "member_of", "Curie_family")
}

# 3. Evaluation (against ground truth)
def evaluate_knowledge_graph():
    with driver.session() as session:
        # a. Completeness (more precise now)
        completeness_count = 0
        for subject, relation, object_ in ground_truth:
            query = """
            MATCH (s:Entity {name: $subject})-[r]->(o:Entity {name: $object})
            WHERE type(r) = $relation  // Important: Match relation type
            RETURN count(*) AS count
            """
            result = session.run(query, subject=subject, object=object_, relation=relation).single()
            if result and result.value() > 0:
                completeness_count += 1

        completeness = completeness_count / len(ground_truth)
        print(f"Completeness: {completeness}")

        # b. Correctness/Accuracy (using ground truth)
        correct_count = 0
        for subject, relation, object_ in ground_truth:
            query = """
            MATCH (s:Entity {name: $subject})-[r]->(o:Entity {name: $object})
            WHERE type(r) = $relation
            RETURN count(*) AS count
            """
            result = session.run(query, subject=subject, object=object_, relation=relation).single()
            if result and result.value() > 0:
                correct_count += 1

        accuracy = correct_count / get_total_relations(session) if get_total_relations(session) > 0 else 0 # divide by total relations in KG
        print(f"Accuracy: {accuracy}")

        # c. Redundancy (check for duplicate relations as well)
        redundancy_query = """
        MATCH (n:Entity {name: $name})
        RETURN count(n) as node_count
        """
        name = "Marie_Curie"
        node_count = session.run(redundancy_query, name=name).single().value()
        print(f"Redundancy for Marie Curie node: {node_count}") # should be 1 if no redundancy

        redundancy_relation_query = """
        MATCH (s:Entity {name: $subject})-[r]->(o:Entity {name: $object})
        WHERE type(r) = $relation
        RETURN count(*) AS count
        """

        redundancy_relation_count = 0
        for subject, relation, object_ in ground_truth:
            result = session.run(redundancy_relation_query, subject=subject, object=object_, relation=relation).single()
            if result and result.value() > 1: # if more than one relation exists
                redundancy_relation_count += 1
        print(f"Redundancy for relations: {redundancy_relation_count}")


        # ... (Other metrics can be added as needed)

def get_total_relations(session):
    relation_query = """
    MATCH ()-[r]->()
    RETURN count(r) as relation_count
    """
    result = session.run(relation_query).single()
    return result.value() if result else 0

evaluate_knowledge_graph()
driver.close()



Completeness: 0.0




Accuracy: 0.0
Redundancy for Marie Curie node: 0




Redundancy for relations: 0
