In [1]:
import sys
sys.path.append('..')

from src.document_loader import DocumentProcessor
from src.graph_processor import GraphProcessor
from src.retrieval_system import RetrievalSystem
from src.config import config
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def run_complete_pipeline():
    """Run the complete GraphRAG pipeline"""
    print("üöÄ Starting Complete GraphRAG Pipeline")
    print("="*50)
    
    start_time = time.time()
    
    # Step 1: Load Documents
    print("\nüìö Step 1: Loading Documents...")
    processor = DocumentProcessor()
    documents = processor.load_documents()
    
    if not documents:
        print("‚ùå No documents found. Please add .docx files to the documents folder.")
        return None
    
    # Step 2: Create Knowledge Graph
    print("\nüï∏Ô∏è  Step 2: Creating Knowledge Graph...")
    graph_processor = GraphProcessor()
    
    # Clear existing graph
    graph_processor.clear_graph()
    
    # Create new graph
    success = graph_processor.create_knowledge_graph(documents)
    
    if not success:
        print("‚ùå Failed to create knowledge graph")
        return None
    
    # Step 3: Initialize Retrieval System
    print("\nüîç Step 3: Setting up Retrieval System...")
    retrieval_system = RetrievalSystem(documents)
    
    end_time = time.time()
    print(f"\n‚úÖ Pipeline completed in {end_time - start_time:.2f} seconds")
    
    # Step 4: Show Statistics
    stats = graph_processor.get_graph_stats()
    print("\nüìä System Statistics:")
    print(f"  Documents processed: {len(documents)}")
    print(f"  Graph nodes: {stats['nodes']}")
    print(f"  Graph relationships: {stats['relationships']}")
    print(f"  Vector store size: {len(documents)}")
    
    return retrieval_system

In [3]:
retrieval_system = run_complete_pipeline()

üöÄ Starting Complete GraphRAG Pipeline

üìö Step 1: Loading Documents...
Found 1 documents to process...
Processing: sample_graphrag.docx
  Created 2 chunks
Total: 2 document chunks loaded

üï∏Ô∏è  Step 2: Creating Knowledge Graph...
Graph cleared successfully
Converting documents to graph format...
Created 2 graph documents
Storing graph in Neo4j...
Graph created successfully: 20 nodes, 19 relationships

üîç Step 3: Setting up Retrieval System...
Creating vector store...
Vector store created with 2 documents
‚ö†Ô∏è langchain-graph-retriever not available; falling back to vector-only retriever.

‚úÖ Pipeline completed in 30.03 seconds

üìä System Statistics:
  Documents processed: 2
  Graph nodes: 20
  Graph relationships: 19
  Vector store size: 2


In [4]:
if retrieval_system:
    print("\nüß™ Comprehensive System Testing")
    print("="*40)
    
    # Test categories
    test_cases = {
        "Factual Questions": [
            "What is GraphRAG?",
            "What technologies are used?",
            "What are the main components?"
        ],
        "Analytical Questions": [
            "How do the components work together?",
            "What are the advantages of this approach?",
            "What makes this system different from traditional RAG?"
        ],
        "Technical Questions": [
            "How does the graph traversal work?",
            "What is the role of embeddings?",
            "How are entities and relationships extracted?"
        ]
    }
    
    for category, questions in test_cases.items():
        print(f"\nüìã {category}:")
        for i, question in enumerate(questions, 1):
            print(f"\n  Q{i}: {question}")
            try:
                start_time = time.time()
                response = retrieval_system.query(question)
                end_time = time.time()
                
                print(f"  A{i}: {response[:300]}...")
                print(f"  ‚è±Ô∏è  Response time: {end_time - start_time:.2f}s")
                
            except Exception as e:
                print(f"  ‚ùå Error: {str(e)}")


üß™ Comprehensive System Testing

üìã Factual Questions:

  Q1: What is GraphRAG?
  A1: Based on the provided context from `sample_graphrag.docx`, a GraphRAG system is an implementation that leverages a combination of technologies and processes to enhance context understanding and retrieval accuracy.

Key aspects of a GraphRAG system include:
*   **Components**:
    *   **Document Proc...
  ‚è±Ô∏è  Response time: 5.24s

  Q2: What technologies are used?
  A2: The technologies used are:
*   LangChain
*   Gemini
*   Neo4j
*   Python

Source: sample_graphrag.docx...
  ‚è±Ô∏è  Response time: 1.83s

  Q3: What are the main components?
  A3: The main components of the GraphRAG system implementation are:

1.  **Document Processing**: Uses semantic chunking to process documents.
2.  **Knowledge Graph Creation**: Extracts entities and relationships using an LLM.
3.  **Vector Storage**: Stores embeddings for similarity search.
4.  **Hybrid ...
  ‚è±Ô∏è  Response time: 2.15s

üìã Analytical Q

In [5]:
if retrieval_system:
    print("\nüìä System Monitoring Dashboard")
    print("="*35)
    
    # Get graph processor for stats
    graph_processor = GraphProcessor()
    
    while True:
        # Current stats
        stats = graph_processor.get_graph_stats()
        
        print("\nüîÑ Current Status (Press Ctrl+C to stop):")
        print(f"  Graph Nodes: {stats['nodes']}")
        print(f"  Graph Relations: {stats['relationships']}")
        print("  System Status: ‚úÖ Active")
        
        # Test query
        test_query = "System status check"
        try:
            start_time = time.time()
            _ = retrieval_system.query(test_query)
            response_time = time.time() - start_time
            print(f"  Response Time: {response_time:.2f}s")
            print("  Health: ‚úÖ Healthy")
        except Exception as e:
            print(f"  Health: ‚ùå Error - {str(e)}")
        
        try:
            time.sleep(30)  # Update every 30 seconds
        except KeyboardInterrupt:
            print("\nüëã Monitoring stopped")
            break


üìä System Monitoring Dashboard

üîÑ Current Status (Press Ctrl+C to stop):
  Graph Nodes: 20
  Graph Relations: 19
  System Status: ‚úÖ Active
  Response Time: 2.21s
  Health: ‚úÖ Healthy

üîÑ Current Status (Press Ctrl+C to stop):
  Graph Nodes: 20
  Graph Relations: 19
  System Status: ‚úÖ Active
  Response Time: 5.03s
  Health: ‚úÖ Healthy

üîÑ Current Status (Press Ctrl+C to stop):
  Graph Nodes: 20
  Graph Relations: 19
  System Status: ‚úÖ Active
  Response Time: 2.30s
  Health: ‚úÖ Healthy

üîÑ Current Status (Press Ctrl+C to stop):
  Graph Nodes: 20
  Graph Relations: 19
  System Status: ‚úÖ Active


KeyboardInterrupt: 

In [6]:
if retrieval_system:
    import pickle
    
    print("\nüíæ Saving System State...")
    
    # Save the retrieval system (without the heavy components)
    system_state = {
        'config': config,
        'stats': graph_processor.get_graph_stats(),
        'timestamp': time.time()
    }
    
    with open('../system_state.pkl', 'wb') as f:
        pickle.dump(system_state, f)
    
    print("‚úÖ System state saved successfully")
    print("\nüéâ GraphRAG System is ready for production use!")
    print("\nNext steps:")
    print("- Access Neo4j browser at http://localhost:7474")
    print("- Use the retrieval_system.query() method for questions")
    print("- Monitor system performance using the dashboard above")


üíæ Saving System State...
‚úÖ System state saved successfully

üéâ GraphRAG System is ready for production use!

Next steps:
- Access Neo4j browser at http://localhost:7474
- Use the retrieval_system.query() method for questions
- Monitor system performance using the dashboard above
