#### ⭐ 1. Setup

In [None]:
#!pip install google-api-python-client
#!pip install google-api-python-client google-auth

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.tools import tool
from langgraph.graph import StateGraph, END
from typing import TypedDict, List, Any
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials

OPENAI_API_KEY = 'Yours'

llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-4o-mini")
emb = OpenAIEmbeddings(api_key=OPENAI_API_KEY)


#### ⭐ 2. UNIVERSAL LOCAL DOCUMENT LOADER

In [None]:
from langchain_community.document_loaders import (
    PyPDFLoader, Docx2txtLoader, TextLoader,
    UnstructuredMarkdownLoader, BSHTMLLoader
)

def load_local_document(path: str):
    path = path.lower()

    if path.endswith(".pdf"):
        return PyPDFLoader(path).load()

    elif path.endswith(".docx"):
        return Docx2txtLoader(path).load()

    elif path.endswith(".txt"):
        return TextLoader(path).load()

    elif path.endswith(".md"):
        return UnstructuredMarkdownLoader(path).load()

    elif path.endswith(".html"):
        return BSHTMLLoader(path).load()

    else:
        raise ValueError(f"Unsupported file type: {path}")


#### ⭐ 3. Build Local Document Retriever

In [None]:
def build_local_retriever(path):
    docs = load_local_document(path)

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_documents(docs)

    vectorstore = FAISS.from_documents(chunks, emb)
    return vectorstore.as_retriever()


#### ⭐ 4. GOOGLE DOC TOOL (Reads Google Doc Content)

In [None]:
def extract_doc_id(url: str) -> str:
    return url.split("/d/")[1].split("/")[0]


In [None]:
doc_id = extract_doc_id("https://docs.google.com/document/d/194t3QV5RzD1XuzIagNB0F-GPvXZspc4GCz9wkRQpfYg/edit")
@tool
def google_doc_reader(doc_id: str) -> str:
    """
    Read a Google Doc by ID and return all its text.
    """
    print(">>>>>>>>>>>>>>>>>>>>>>>",doc_id)

    SCOPES = ["https://docs.google.com/document/d/194t3QV5RzD1XuzIagNB0F-GPvXZspc4GCz9wkRQpfYg/edit"]
    creds = Credentials.from_service_account_file("wired-glyph-481113-m7-4e9b8f1c6579.json", scopes=SCOPES)
    service = build("docs", "v1", credentials=creds)
    doc = service.documents().get(documentId=doc_id).execute()
    
    output = []
    for element in doc.get("body").get("content"):
        para = element.get("paragraph")
        if not para:
            continue
        for elem in para.get("elements", []):
            text = elem.get("textRun", {}).get("content")
            if text:
                output.append(text)

    return "".join(output)


#### ⭐ 5. LOCAL DOCUMENT SEARCH TOOL

In [None]:
# @tool
# def local_docs_search(query: str, retriever=None):
#     """
#     Search across local docs using RAG retriever.
#     """
#     if retriever is None:
#         return "Retriever missing — cannot search local documents."

#     docs = retriever.invoke(query)
#     return "\n\n".join([d.page_content for d in docs])

from langchain_core.tools import tool

@tool
def local_docs_search(query: str):
    """Search local documents using the active retriever."""
    return f"TOOL_REQUEST:{query}"




#### ⭐ 6. Bind Tools to an LLM

In [None]:
llm_with_tools = llm.bind_tools([local_docs_search, google_doc_reader])

#### ⭐ 7. LANGGRAPH AGENT STATE

In [None]:
class AgentState(TypedDict, total=False):
    question: str
    answer: str
    retriever: Any



#### ⭐ 8. TOOL AGENT (LLM decides which tool to call)

In [None]:
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage

def tool_agent(state):
    question = state["question"]
    retriever = state.get("retriever")

    # Step 1 — ask LLM what to do
    response = llm_with_tools.invoke([
        SystemMessage(content="You may use tools. you may use this DOC_id 194t3QV5RzD1XuzIagNB0F-GPvXZspc4GCz9wkRQpfYg"),
        HumanMessage(content=question),
    ])

    # Step 2 — Did the LLM call a tool?
    if response.tool_calls:
        tool_call = response.tool_calls[0]
        tool_name = tool_call['name']
        tool_args = tool_call['args']

        if tool_name == "local_docs_search":
            query = tool_args["query"]
            docs = retriever.invoke(query)
            result_text = "\n\n".join([d.page_content for d in docs])
            return {"answer": result_text}

    # Step 3 — no tools used → answer directly
    return {"answer": response.content}




#### ⭐ 9. LANGGRAPH WORKFLOW

In [None]:
graph = StateGraph(AgentState)

graph.add_node("tools", tool_agent)
graph.set_entry_point("tools")
graph.add_edge("tools", END)

app = graph.compile()


#### ⭐ 10. RUN THE AGENT — LOCAL FILE EXAMPLE

In [None]:
retr = build_local_retriever("saikrishna_java_fullstack_developer.pdf")

result = app.invoke({
    "question": "the google document talking about?",
    "retriever": retr
})

print(result)


In [None]:
def google_doc_reader(doc_id: str) -> str:
    """
    Read a Google Doc by ID and return all its text.
    """
    print(">>>>>>>>>>>>>>>>>>>>>>>",doc_id)

    SCOPES = ["https://docs.google.com/document/d/194t3QV5RzD1XuzIagNB0F-GPvXZspc4GCz9wkRQpfYg/edit"]
    creds = Credentials.from_service_account_file("wired-glyph-481113-m7-4e9b8f1c6579.json", scopes=SCOPES)
    service = build("docs", "v1", credentials=creds)
    doc = service.documents().get(documentId=doc_id).execute()
    
    output = []
    for element in doc.get("body").get("content"):
        para = element.get("paragraph")
        if not para:
            continue
        for elem in para.get("elements", []):
            text = elem.get("textRun", {}).get("content")
            if text:
                output.append(text)

    return "".join(output)

In [None]:
url = "https://docs.google.com/document/d/1d26tMQe4U2YL0gRDLypwat1g1xT0gNR84jxaFkDkWe4/edit?tab=t.0#heading=h.h4jujkcu35n8"
doc_id = extract_doc_id(url)
print(doc_id)
result = google_doc_reader(doc_id)
result