In [None]:
# Environment Setup
import os
from dotenv import load_dotenv
# Load Multiple PDFs
from langchain.document_loaders import PyPDFLoader
# Split Documents
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Vector Store
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
# Chains
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

In [None]:
load_dotenv()

In [None]:
google_api_key = os.getenv("GOOGLE_API_KEY")

In [None]:
pdf_folder = "RAGData"
documents = []

In [None]:
for file in os.listdir(pdf_folder):
    if file.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(pdf_folder, file))
        documents.extend(loader.load()) 
        print(f"Loaded {len(documents)} documents from {file}")

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)

In [None]:
docs = text_splitter.split_documents(documents)

In [None]:
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key)

In [None]:
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    persist_directory="./chroma_db"  # Persistent local dir
)

In [None]:
llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest", google_api_key=google_api_key)

In [None]:
retriever = vectorstore.as_retriever()

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [None]:
query = "What Foreign Jurisdiction means?"

In [None]:
response = qa_chain.invoke(query)

In [None]:
import google.generativeai as genai

genai.configure(api_key=google_api_key)
models = genai.list_models()
for model in models:
    print(model.name)