In [2]:
%pwd

'c:\\Code\\Legal-Chatbot-Generative-AI'

In [1]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
#Load the PDF
pdf_path = "data/international_law_handbook.pdf"
loader = PyMuPDFLoader(pdf_path)
documents = loader.load()
print(f"Loaded {len(documents)} pages from the PDF.")

Loaded 681 pages from the PDF.


In [6]:
#Split the document into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks.")

Split into 2642 chunks.


In [13]:
from dotenv import load_dotenv
load_dotenv()

True

In [14]:
import os
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

In [16]:
from langchain.embeddings import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)


In [18]:
query_result = embedding.embed_query("Hello world")
print("length of query result:", len(query_result))

length of query result: 384


  return forward_call(*args, **kwargs)


In [19]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "legalbot"
dimension = 384 

# Create index if it doesn't already exist
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",        # or "gcp"
            region="us-east-1"  # region from Pinecone dashboard
        )
    )





In [43]:
from langchain_pinecone import PineconeVectorStore

vectorstore = PineconeVectorStore.from_documents(
    documents=chunks,
    embedding=embedding,
    index_name=index_name,
    namespace="legalbot"  # optional
)



In [25]:
# loading Existing index

from langchain_pinecone import PineconeVectorStore
#Embed each chunk and upsert the embedding into your Pinecone index
vectorstore = PineconeVectorStore.from_existing_index(
    embedding=embedding,
    index_name=index_name
)

In [48]:
vectorstore

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x2ab64bc6b30>

In [50]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3, "namespace": "legalbot"})

In [51]:
retrieved_docs = retriever.invoke("What are the functions of a diplomatic mission?")
print(retrieved_docs)


  return forward_call(*args, **kwargs)


[Document(id='544a3ba1-140c-4c2f-bfeb-61ad5249457e', metadata={'author': '', 'creationDate': "D:20170504120101-04'00'", 'creationdate': '2017-05-04T12:01:01-04:00', 'creator': 'Adobe InDesign CS5.5 (7.5)', 'file_path': 'data/international_law_handbook.pdf', 'format': 'PDF 1.6', 'keywords': '', 'modDate': "D:20170505145851-04'00'", 'moddate': '2017-05-05T14:58:51-04:00', 'page': 164.0, 'producer': 'Adobe PDF Library 9.9', 'source': 'data/international_law_handbook.pdf', 'subject': '', 'title': '', 'total_pages': 681.0, 'trapped': ''}, page_content='diplomatic rank;\n(e)\t\na “diplomatic agent” is the head of the mission or a member of the diplomatic staff of the \nmission;\n(f)\t the “members of the administrative and technical staff” are the members of the staff of \nthe mission employed in the administrative and technical service of the mission;\n(g)\t the “members of the service staff” are the members of the staff of the mission in the \ndomestic service of the mission;\n(h)\t a “pri

In [54]:
retrieved_docs = retriever.get_relevant_documents("What is membership in United Nations")
print(len(retrieved_docs))



  return forward_call(*args, **kwargs)


3


In [55]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="models/gemini-1.5-pro",
    temperature=0.4,
    max_output_tokens=500,
    google_api_key=GOOGLE_API_KEY
)


In [57]:
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.chains import RetrievalQA
from langchain.schema.runnable import RunnableMap
from langchain_google_genai import ChatGoogleGenerativeAI

In [62]:
prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(
        "You are a knowledgeable and professional legal assistant. Respond using clear, concise, and formal legal language."
    ),
    HumanMessagePromptTemplate.from_template(
        "Answer the following legal question based on the retrieved context:\n\n{context}\n\nQuestion: {input}"
    )
])


In [63]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Step 1: Create a chain to combine documents with prompt
document_chain = create_stuff_documents_chain(llm, prompt)

# Step 2: Combine document chain with retriever
retrieval_chain = create_retrieval_chain(retriever, document_chain)


In [64]:
query = "What is membership in United Nations?"
response = retrieval_chain.invoke({"input": query})

print("\nAnswer:")
print(response["answer"])


  return forward_call(*args, **kwargs)
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 21
}
]