In [None]:
!pip install --upgrade --quiet langchain langchain_community langchain-google-genai pypdf faiss-gpu langchain-huggingface googlesearch-python fpdf


In [None]:
# Set API keys
import os
from google.colab import userdata
os.environ['GEMINI_API_KEY'] = userdata.get('GOOGLE_API_KEY')

In [None]:
# Load PDF
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("/content/Legal Case Studies for RAG Demonstration.pdf")
documents = loader.load()

# Split documents
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
documents = text_splitter.split_documents(documents)

In [None]:
# Load embedding model
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5", encode_kwargs={"normalize_embeddings": True})

# Create vectorstore
from langchain.vectorstores import FAISS
vectorstore = FAISS.from_documents(documents, embeddings)

# Create retriever
retriever = vectorstore.as_retriever()

In [None]:
# Define web search tool using Google Search
from googlesearch import search

def google_search(query: str, num_results: int = 10):
    results = search(query, num_results=num_results)
    return "\n".join(results)

web_search_tool = google_search

In [None]:
# Load LLM
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp")

# Define vector search
from langchain.chains import RetrievalQA
def vector_search(query: str):
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return qa_chain.run(query)

# Define web search
def web_search(query: str):
    return web_search_tool(query)

In [None]:
# Create tool call for vector search and web search
from langchain.tools import tool, Tool

@tool
def vector_search_tool(query: str) -> str:
    """Tool for searching the vector store."""
    return vector_search(query)

@tool
def web_search_tool_func(query: str) -> str:
    """Tool for performing web search."""
    return web_search_tool(query)

# Define tools for the agent
tools = [
    Tool(
        name="VectorStoreSearch",
        func=vector_search_tool,
        description="Use this to search the vector store for information."
    ),
    Tool(
        name="WebSearch",
        func=web_search_tool_func,
        description="Use this to perform a web search for information."
    ),
]


In [None]:
# Define system prompt
system_prompt = """Respond to the human as helpfully and accurately as possible. You have access to the following tools: {tools}
Always try the \"VectorStoreSearch\" tool first. Only use \"WebSearch\" if the vector store does not contain the required information.
Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
Valid "action" values: "Final Answer" or {tool_names}
Provide only ONE action per
JSON_BLOB
Observation: action result ... (repeat Thought/Action/Observation N times) Thought: I know what to respond Action:

{{
  "action": "Final Answer",
  "action_input": "Final response to human"
}}
Begin! Reminder to ALWAYS respond with a valid json blob of a single action.
Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation"""

# Human prompt
human_prompt = """{input}
{agent_scratchpad}
(reminder to always respond in a JSON blob)"""


What does JSON_BLOB do?

It specifies the action the agent should take and the input for that action.
For example, if the agent needs to search the vector store, the JSON_BLOB will include the action "VectorStoreSearch" and the query to search for.
{
  "action": "VectorStoreSearch",
  "action_input": "Analyze the breach of fiduciary duty claims in the Lee vs. Kim case"
}

In [None]:
# Create prompt template
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", human_prompt),
    ]
)

In [None]:
# Tool render
from langchain.tools.render import render_text_description_and_args
prompt = prompt.partial(
    tools=render_text_description_and_args(list(tools)),
    tool_names=", ".join([t.name for t in tools]),
)

# Create RAG chain
from langchain.schema.runnable import RunnablePassthrough
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
chain = (
    RunnablePassthrough.assign(
        agent_scratchpad=lambda x: format_log_to_str(x["intermediate_steps"]),
    )
    | prompt
    | llm
    | JSONAgentOutputParser()
)


In [None]:
# Create agent
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(
    agent=chain,
    tools=tools,
    handle_parsing_errors=True,
    verbose=True
)

# Invoke the agent
agent_executor.invoke({"input": "Analyze the breach of fiduciary duty claims in the Lee vs. Kim case and compare them with similar cases in California and New South Wales"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
  "action": "VectorStoreSearch",
  "action_input": "Analyze the breach of fiduciary duty claims in the Lee vs. Kim case and compare them with similar cases in California and New South Wales"
}
```
[0m[36;1m[1;3mI am sorry, but the provided context does not contain information about the Lee vs. Kim case, nor does it contain information about cases in California and New South Wales. Therefore, I cannot analyze the breach of fiduciary duty claims in the Lee vs. Kim case or compare them with similar cases in California and New South Wales.
[0m[32;1m[1;3m```json
{
  "action": "WebSearch",
  "action_input": "Lee vs. Kim fiduciary duty case analysis and comparison with California and New South Wales cases"
}
```
[0m[33;1m[1;3mhttps://casetext.com/case/lee-v-kim-11
https://casetext.com/case/kim-v-lee-13
https://papers.ssrn.com/sol3/Delivery.cfm/SSRN_ID2928472_code1895056.pdf?abstractid=2928472&mirid=1
https://resea

{'input': 'Analyze the breach of fiduciary duty claims in the Lee vs. Kim case and compare them with similar cases in California and New South Wales',
 'output': "I've conducted a thorough search regarding the Lee v. Kim case and comparable fiduciary duty breaches in California and New South Wales. Unfortunately, the search results reveal that the case information is spread across various documents and legal databases, and there isn't one single source providing a complete analysis. However, here's a summary of what I've found and how it can be helpful:\n\n**Lee v. Kim Case:**\n\nThe search results indicate there are multiple cases related to 'Lee v. Kim' or 'Kim v. Lee', which suggests a complex legal dispute likely involving multiple proceedings and appeals. These cases appear to involve some level of financial dispute, and at least some of them appear to be related to a bankruptcy case. Without more specific details about the exact case you're interested in, a precise analysis of th