In [15]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv(dotenv_path=r"C:\Users\Asus\Downloads\GraphRAG Project\Secrets\.env")

# Retrieve the Hugging Face token
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("HF_TOKEN not found in environment variables")
else:
    print("HF_TOKEN loaded successfully")

# Neo4j Credentials
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE", "neo4j")

# Check if all are loaded
if not all([NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD, NEO4J_DATABASE]):
    raise ValueError("One or more Neo4j credentials are missing in environment variables")

print("Neo4j credentials loaded successfully")

HF_TOKEN loaded successfully
Neo4j credentials loaded successfully


In [16]:
AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)

In [17]:
import json
with open(r"C:\Users\Asus\Downloads\GraphRAG Project\Triples\graphrag_triplets.json", "r") as f:
    triplets = json.load(f)

In [18]:
len(triplets)

810

In [19]:
from neo4j import GraphDatabase

# Initialize the Neo4j driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

In [20]:
import re

def sanitize_relation(rel):
    # Replace spaces with underscores, remove invalid characters
    rel = rel.strip().lower().replace(" ", "_")
    rel = re.sub(r"[^a-zA-Z0-9_]", "", rel)  # Keep only alphanumerics and underscores
    if re.match(r"^\d", rel):  # If it starts with digit, prefix it
        rel = "rel_" + rel
    return rel

def insert_triplet(tx, subj, rel, obj):
    rel_clean = sanitize_relation(rel)
    query = f"""
    MERGE (a:Entity {{name: $subj}})
    MERGE (b:Entity {{name: $obj}})
    MERGE (a)-[r:{rel_clean}]->(b)
    """
    
    tx.run(query, subj=subj, obj=obj)


def load_triplets_to_neo4j(filepath):
    import json
    with open(filepath, 'r') as f:
        triplets = json.load(f)

    with driver.session() as session:
        for t in triplets:
            session.execute_write(insert_triplet, t['subject'], t['relation'], t['object'])

# Usage
load_triplets_to_neo4j(r'C:\Users\Asus\Downloads\GraphRAG Project\Triples\graphrag_triplets.json')

In [21]:
def save_kg_summary():
    with driver.session() as session:
        node_count = session.run("MATCH (n) RETURN count(n)").single()[0]
        rel_count = session.run("MATCH ()-[r]->() RETURN count(r)").single()[0]
        rel_types = session.run("MATCH ()-[r]->() RETURN DISTINCT type(r)").value()

    summary = {
        "nodes": node_count,
        "relations": rel_count,
        "relation_types": rel_types
    }

    with open(r"C:\Users\Asus\Downloads\GraphRAG Project\KG\summary.json", "w") as f:
        json.dump(summary, f, indent=4)

save_kg_summary()

In [22]:
driver.close()

In [23]:
import matplotlib.pyplot as plt
import networkx as nx

# Connect to Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

def get_triples():
    with driver.session() as session:
        result = session.run("""
            MATCH (a)-[r]->(b)
            RETURN a.name AS source, type(r) AS relation, b.name AS target
        """)
        return [(row["source"], row["target"], row["relation"]) for row in result]

triples = get_triples()

# Build graph
G = nx.DiGraph()
for source, target, rel in triples:
    G.add_edge(source, target, label=rel)

# Draw graph
plt.figure(figsize=(16, 12))
pos = nx.spring_layout(G, k=0.5)
nx.draw(G, pos, with_labels=True, node_color='skyblue', edge_color='gray', node_size=2000, font_size=10)
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)

# Save as image
plt.title("Knowledge Graph from Neo4j")
plt.savefig(r"C:\Users\Asus\Downloads\GraphRAG Project\KG\knowledge_graph_image.png", format="PNG", dpi=400)
plt.close()

print("✅ Graph saved as 'knowledge_graph_image.png'")

driver.close()


✅ Graph saved as 'knowledge_graph_image.png'
