# Document Intelligence RAG - Analytics Notebook

This notebook provides interactive visualizations and analytics for your RAG system.
You can run this independently to analyze your document collection and query patterns.

In [None]:
# Import required libraries
import sys
sys.path.append('..')

from rag_system import DocumentRAG
from dashboard_generator import DashboardGenerator
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt

In [None]:
# Initialize RAG system
rag = DocumentRAG()
print("RAG System initialized successfully!")

## üìä System Metrics

In [None]:
# Get system statistics
stats = rag.get_collection_stats()
docs = rag.get_uploaded_documents()

print(f"üìö Total Documents: {stats['total_documents']}")
print(f"üîç Total Chunks: {stats['total_chunks']}")
print(f"üìÑ Average Chunks per Document: {stats['total_chunks'] // max(stats['total_documents'], 1)}")

## üìà Document Distribution

In [None]:
# Create document distribution chart
if docs:
    df = pd.DataFrame(docs)
    
    fig = px.pie(
        df,
        names='filename',
        values='chunks',
        title='Document Chunk Distribution',
        color_discrete_sequence=px.colors.sequential.Plasma
    )
    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.show()
else:
    print("No documents uploaded yet. Upload some PDFs to see visualizations!")

## üîç Upload a Document

In [None]:
# Upload a document (replace with your PDF path)
# result = rag.upload_document("path/to/your/document.pdf")
# print(result)

## üí¨ Query the System

In [None]:
# Ask a question
# question = "What is the main topic of the document?"
# result = rag.query(question)
# 
# if result['status'] == 'success':
#     print(f"Question: {result['question']}")
#     print(f"\nAnswer: {result['answer']}")
#     print(f"\nSources:")
#     for source in result['sources']:
#         print(f"  - {source['source']} (Page {source['page']})")

## üìä Generate Standalone Dashboard

In [None]:
# Generate a standalone HTML dashboard
from dashboard_generator import create_dashboard

# Create dashboard (will auto-open in browser)
dashboard_path = create_dashboard(rag)
print(f"Dashboard created and opened: {dashboard_path}")

## üìã Document List

In [None]:
# Display uploaded documents as a table
if docs:
    df = pd.DataFrame(docs)
    display(df[['filename', 'chunks']])
else:
    print("No documents uploaded yet.")

## üé® Custom Visualizations

Add your own custom visualizations below!

In [None]:
# Your custom visualization code here