In [60]:
# Standard library imports
import os
import getpass
from typing import TypedDict, Annotated, List, Dict, Union

# Third-party imports
import gradio as gr
from langgraph.graph import StateGraph, START, END
from langchain_core.messages import BaseMessage, HumanMessage
import boto3

# --- AWS Bedrock Configuration (Important) ---
# Ensure your AWS credentials are configured. You can use environment variables or
# the AWS CLI `aws configure` command.
# For this skeleton, we will initialize the boto3 client.
# Replace 'us-east-1' with your preferred AWS region.

try:
    bedrock = boto3.client(
        service_name='bedrock',
        region_name='us-east-1'
    )
    print("Bedrock client initialized successfully.")
except Exception as e:
    print(f"Error initializing Bedrock client: {e}")
    print("Please ensure your AWS credentials are set up correctly.")

# A placeholder for our in-memory "vector database"
vector_store = {}

Bedrock client initialized successfully.


In [61]:
class AgentState(TypedDict):
    """
    The state for our LangGraph agent.
    """
    documents: List[str] # List of document identifiers
    chat_history: List[Dict[str, str]]
    query: str
    response: str
    is_inter_document_query: bool

In [81]:
import os
from langchain.chat_models import init_chat_model
from dotenv import load_dotenv

load_dotenv(override=True)

GEMINI_BASE_URL = os.getenv("GEMINI_BASE_URL")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
llm = init_chat_model("google_genai:gemini-2.0-flash")

In [68]:
# --- Node 1: Document Ingestion ---
def document_ingestion(state: AgentState):
    """
    Simulates document processing using Bedrock Data Automation.
    In a real scenario, this is where you'd call the Bedrock API,
    chunk the text, and store embeddings in a vector database.
    """
    print("---NODE 1: DOCUMENT INGESTION---")
    documents = state.get("documents", [])
    new_doc_name = state.get("query")

    if new_doc_name and new_doc_name not in documents:
        # Placeholder for Bedrock Data Automation and vector store
        print(f"Processing document '{new_doc_name}' with Bedrock Data Automation...")
        # For our skeleton, we just add the document name to a list and "vector store"
        documents.append(new_doc_name)
        vector_store[new_doc_name] = f"Content of {new_doc_name} is about..."
    
    return {"documents": documents, "response": f"Document '{new_doc_name}' processed successfully."}


# --- Node 2: Query Router ---
def query_router(state: AgentState):
    """
    Analyzes the user query to decide the next step.
    This is a simple heuristic based on keywords. A real agent might
    use a classification LLM call here.
    """
    print("---NODE 2: QUERY ROUTER---")
    query = state["query"].lower()
    
    # Simple logic to determine if it's a cross-document query
    if "compare" in query or "common" in query or "all documents" in query:
        print("Routing to inter-document analysis.")
        return "inter_document_analysis"
    else:
        print("Routing to single document retrieval.")
        return "document_retrieval"


# --- Node 3: Document Retrieval ---
def document_retrieval(state: AgentState):
    """
    Simulates searching a vector database for relevant chunks
    for a single document query.
    """
    print("---NODE 3: DOCUMENT RETRIEVAL---")
    query = state["query"]
    documents = state["documents"]
    
    # Placeholder for actual RAG search
    retrieved_context = "Retrieved context for your query."
    
    print(f"Simulating retrieval for query: '{query}'")
    
    # Pass context to the LLM generation node
    return {"response": f"Generating response based on single document context: '{retrieved_context}'"}


# --- Node 4: Inter-document Analysis ---
def inter_document_analysis(state: AgentState):
    """
    Simulates a more complex search and synthesis for
    cross-document queries.
    """
    print("---NODE 4: INTER-DOCUMENT ANALYSIS---")
    query = state["query"]
    documents = state["documents"]
    
    # Placeholder for a more complex search across all documents
    retrieved_context = "Combined context from all documents to find commonalities."
    
    print(f"Simulating inter-document analysis for query: '{query}'")
    
    return {"response": f"Generating response based on cross-document context: '{retrieved_context}'"}


# --- Node 5: LLM Generation ---
def llm_generation(state: AgentState):
    """
    Simulates a call to the Bedrock LLM to generate a final response.
    """
    print("---NODE 5: LLM GENERATION---")
    # response_prompt = state["response"]
    
    # In a real app, you would make a call to the Bedrock LLM here.
    # Example:
    # prompt = f"Based on the following context: '{response_prompt}', answer the user's query: '{state['query']}'"
    prompt = state["query"]
    # llm_response = bedrock_llm.invoke(prompt)
    print(prompt)
    final_response = llm.invoke(prompt)
    print(final_response)
    
    # Update chat history and return the final response
    chat_history = state.get("chat_history", [])
    chat_history.append({"user": state["query"], "assistant": final_response})
    
    return {"response": final_response, "chat_history": chat_history}

In [69]:
from IPython.display import Image, display
# Create the graph
workflow = StateGraph(AgentState)

# Add the nodes
# workflow.add_node("ingestion", document_ingestion)
# workflow.add_node("router", query_router)
# workflow.add_node("retrieval", document_retrieval)
# workflow.add_node("analysis", inter_document_analysis)
workflow.add_node("generation", llm_generation)

# Set up the conditional edges for the router
# workflow.add_conditional_edges(
#     "ingestion",
#     query_router,
#     {
#         "document_retrieval": "retrieval",
#         "inter_document_analysis": "analysis"
#     }
# )

# Set up the normal edges
workflow.add_edge(START, "generation")
# workflow.add_edge("ingestion", "router")
# # workflow.add_edge("retrieval", "generation")
# # workflow.add_edge("analysis", "generation")
workflow.add_edge("generation", END)

# Compile the graph
app = workflow.compile()
# display(Image(app.get_graph().draw_mermaid_png()))

In [65]:
from IPython.display import Image, display

display(Image(app.get_graph().draw_mermaid_png(max_retries=3, retry_delay=5)))
# try:
# except Exception:
#     # This requires some extra dependencies and is optional
#     pass

KeyboardInterrupt: 

In [80]:
# --- Gradio UI Functions ---

def process_document(file):
    """Function to handle document upload and ingestion."""
    if file is None:
        return "Please upload a document first."
    
    # We will pass the document name as the "query" to our ingestion node
    doc_name = os.path.basename(file.name)
    state = app.invoke({"query": doc_name})
    
    return state["response"]

def chat_interface(query, history):
    """Function to handle user chat messages."""
    
    # Prepare the state for the chat flow
    final_state = app.invoke({"query": query})
    final_state = final_state["content"]
    # The final output of the graph is the response
    return final_state
    # initial_state = State(messages={"role": "user", "content": query})
    # response = ""
    # for message_chunk in app.stream({"query": query, "chat_history": history}, stream_mode="messages"):
    #     if message_chunk.content:
    #         response += message_chunk.content
    #         yield response

# Create the Gradio interface using gr.Blocks for flexibility
with gr.Blocks(title="LangGraph Document Agent") as demo:
    gr.Markdown("# LangGraph Document Agent")
    gr.Markdown("Upload documents to ingest them, then chat with the agent.")
    
    with gr.Row():
        file_upload = gr.File(label="Upload Document")
        process_button = gr.Button("Process Document")
    
    ingestion_output = gr.Textbox(label="Document Processing Status")

    process_button.click(
        fn=process_document,
        inputs=file_upload,
        outputs=ingestion_output
    )

    chatbot = gr.ChatInterface(
        fn=chat_interface,
        examples=["What is the content of document1.txt?", "Compare the names across all documents.", "How is document2.pdf different from document3.docx?"]
    )

# Launch the Gradio app
demo.launch()

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7883
* To create a public link, set `share=True` in `launch()`.




---NODE 5: LLM GENERATION---
hey
content='Hey there! How can I help you today?' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []} id='run--f532ba6d-1686-4a34-8f56-0e7cdb79523b-0' usage_metadata={'input_tokens': 1, 'output_tokens': 11, 'total_tokens': 12, 'input_token_details': {'cache_read': 0}}


Traceback (most recent call last):
  File "c:\Users\PratikTalaviya\Crest-Projects\Document-Processing-Agent\.venv\Lib\site-packages\gradio\queueing.py", line 626, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
    )
    ^
  File "c:\Users\PratikTalaviya\Crest-Projects\Document-Processing-Agent\.venv\Lib\site-packages\gradio\route_utils.py", line 350, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<11 lines>...
    )
    ^
  File "c:\Users\PratikTalaviya\Crest-Projects\Document-Processing-Agent\.venv\Lib\site-packages\gradio\blocks.py", line 2250, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<8 lines>...
    )
    ^
  File "c:\Users\PratikTalaviya\Crest-Projects\Document-Processing-Agent\.venv\Lib\site-packages\gradio\blocks.py", line 1755, in call_function
    pre