In [4]:
import sys
sys.path.append("..")  # add project root, not src

from src.graph_processor import GraphProcessor
import pickle

In [5]:
try:
    with open('../processed_documents.pkl', 'rb') as f:
        documents = pickle.load(f)
    print(f"✅ Loaded {len(documents)} processed documents")
except FileNotFoundError:
    print("❌ No processed documents found. Run 02_document_processing.ipynb first")
    documents = []

✅ Loaded 2 processed documents


In [6]:
if documents:
    processor = GraphProcessor()
    
    # Check current graph state
    stats = processor.get_graph_stats()
    print(f"Current graph stats: {stats}")
    
    if stats['nodes'] > 0:
        response = input("Graph already exists. Clear it? (y/n): ")
        if response.lower() == 'y':
            processor.clear_graph()

Current graph stats: {'nodes': 0, 'relationships': 0}


In [7]:
if documents:
    print("🔄 Creating knowledge graph...")
    success = processor.create_knowledge_graph(documents)
    
    if success:
        # Show final stats
        final_stats = processor.get_graph_stats()
        print("\n📊 Final Graph Statistics:")
        print(f"Nodes: {final_stats['nodes']}")
        print(f"Relationships: {final_stats['relationships']}")
        
        # Show some sample nodes
        print("\n📝 Sample nodes:")
        sample_nodes = processor.neo4j_graph.query("""
            MATCH (n) 
            RETURN labels(n) as type, n.id as id 
            LIMIT 10
        """)
        
        for node in sample_nodes:
            print(f"  {node['type']}: {node['id']}")
    else:
        print("❌ Failed to create knowledge graph")

🔄 Creating knowledge graph...
Converting documents to graph format...
Created 2 graph documents
Storing graph in Neo4j...
Graph created successfully: 23 nodes, 17 relationships

📊 Final Graph Statistics:
Nodes: 23
Relationships: 17

📝 Sample nodes:
  ['Technology']: Graphrag System
  ['Technology']: Langchain
  ['Technology']: Gemini
  ['Technology']: Neo4J
  ['Concept']: Document Processing
  ['Concept']: Knowledge Graph Creation
  ['Concept']: Vector Storage
  ['Concept']: Hybrid Retrieval
  ['Technology']: Python
  ['Concept']: Semantic Chunking


In [8]:
if documents:
    print("\n🔍 Graph Exploration:")
    
    # Node types
    node_types = processor.neo4j_graph.query("""
        MATCH (n) 
        RETURN DISTINCT labels(n) as node_type, count(n) as count
        ORDER BY count DESC
    """)
    
    print("Node types:")
    for node_type in node_types:
        print(f"  {node_type['node_type']}: {node_type['count']}")
    
    # Relationship types
    rel_types = processor.neo4j_graph.query("""
        MATCH ()-[r]->() 
        RETURN type(r) as rel_type, count(r) as count
        ORDER BY count DESC
    """)
    
    print("\nRelationship types:")
    for rel_type in rel_types:
        print(f"  {rel_type['rel_type']}: {rel_type['count']}")
    
    # Most connected nodes
    connected_nodes = processor.neo4j_graph.query("""
        MATCH (n)-[r]-()
        RETURN n.id as node, labels(n) as type, count(r) as connections
        ORDER BY connections DESC
        LIMIT 10
    """)
    
    print("\nMost connected nodes:")
    for node in connected_nodes:
        print(f"  {node['node']} ({node['type']}): {node['connections']} connections")


🔍 Graph Exploration:
Node types:
  ['Concept']: 18
  ['Technology']: 5

Relationship types:
  RELATED_TO: 12
  IMPLEMENTS: 5

Most connected nodes:
  Gemini (['Technology']): 3 connections
  Graphrag System (['Technology']): 3 connections
  Neo4J (['Technology']): 3 connections
  Relationships (['Concept']): 2 connections
  Hybrid Retrieval (['Concept']): 2 connections
  Vector Storage (['Concept']): 2 connections
  Langchain (['Technology']): 2 connections
  Graph Traversal (['Concept']): 2 connections
  Knowledge Graph Creation (['Concept']): 1 connections
  Semantic Chunking (['Concept']): 1 connections
