In [None]:
!pip install -U langchain
!pip install -U langchain-community
!pip install -U langchain-google-genai
!pip install -U chromadb
!pip install -U google-generativeai

In [None]:
from google.colab import userdata

# Access the Gemini API key from Colab secrets
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
import shutil
import os

In [None]:
!pip install pypdf

In [None]:
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
# Path to your PDF file
DATA_PATH = "/content/data.pdf"

# Path for Chroma database
CHROMA_PATH = "/content/chroma_db"

def main():
    generate_data_store()

def generate_data_store():
    documents = load_documents()
    chunks = split_text(documents)
    save_to_chroma(chunks)

def load_documents():
    loader = PyPDFLoader(DATA_PATH)
    documents = loader.load()
    return documents

def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
    document = chunks[10]
    print(document.page_content)
    print(document.metadata)
    return chunks

def save_to_chroma(chunks: list[Document]):
    # Clear out the database first.
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

    # Create a new DB from the documents.
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001" , google_api_key=GOOGLE_API_KEY)
    db = Chroma.from_documents(
        chunks, embeddings, persist_directory=CHROMA_PATH
    )
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

if __name__ == "__main__":
    main()

Split 222 documents into 2300 chunks.
Preamble  ...........................................................................................................................  1 
PART I .................................................................................................................... 3
{'source': '/content/data.pdf', 'page': 2, 'start_index': 258}
Saved 2300 chunks to /content/chroma_db.


  warn_deprecated(


In [None]:
# Query template

# Load the persisted database
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY))

# Perform a similarity search
query = "Who is the supreme power?"
results = db.similarity_search(query, k=3)  # k is the number of results to return

# Print the results
for doc in results:
    print(doc.page_content)
    print(doc.metadata)
    print("---")

In [None]:
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI

# Load the persisted database
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY))

# Create a retriever
retriever = db.as_retriever()

# Create a ChatGoogleGenerativeAI instance
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key=GOOGLE_API_KEY)

# Create a RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

# Ask a question
question = "puishment of murder?"
response = qa_chain.run(question)
print(response)

The provided text focuses on legal protections and doesn't explicitly mention the punishment for murder. It covers topics like compulsory service, retrospective punishment, double punishment, and self-incrimination. 

