In [None]:
#This is a generic chat RAG with pdf document. Keep your pdf document in the documents folder
#otherwises you change the folder name and keep the pdf documents there,
#It was run on a physics tutorial

In [None]:
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader

In [None]:
loader = DirectoryLoader(
    path="./documents",
    glob="*.pdf",
    loader_cls=PyPDFLoader
)

In [None]:
documents = loader.load()

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

In [None]:
chunks = splitter.split_documents(documents)

In [None]:
print(f"Total chunks: {len(chunks)}")

In [None]:
for chunk in chunks[1:5]:
    print(chunk)

In [None]:
%pip install -qU langchain-google-vertexai chromadb --user

In [None]:
!pip install blosc2

In [None]:
!pip install --upgrade Cython==0.29.21

In [None]:
from langchain_google_vertexai import VertexAIEmbeddings

In [None]:
# import os
# os.environ["GOOGLE_CLOUD_PROJECT"] = "747074493049"

from google.cloud import aiplatform

# Directly set the project ID and location
aiplatform.init(
    project="**********",   # your actual project ID
    location="*********"             # or your chosen region
)

In [None]:
embedding_model = "text-embedding-004"
embeddings = VertexAIEmbeddings(model_name=embedding_model)


In [None]:
from langchain.vectorstores import Chroma

chroma_store = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="./chroma_db"
)


In [None]:
retriever = chroma_store.as_retriever(search_kwargs={"k": 3})

In [None]:
results = retriever.invoke("Force")

In [None]:
for result in results:
    print(result.page_content)

In [None]:
from langchain_core.prompts import ChatPromptTemplate

template = """
Answer the question based only on this context if anything is outside the scope of the provided context go ahead and say it but do mention that it's outside the scope of the context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [None]:
from langchain.chat_models import init_chat_model

#llm to pass these chunks to
llm = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")

In [None]:
++from langchain_core.runnables import RunnablePassthrough

#Runnable passthrough passes any input to the next chain


# Assuming you already have:
# - retriever = chroma_store.as_retriever()
# - llm = ChatVertexAI(...)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
)

In [None]:
# Testing out ragchain

questions = ["What is a balanced and an unbalanced force",
             "Is mass the only quantifying factor of inertia",
             "Given a body that has 5N force in the right direction and 4N of force in the left direction is the object in equilibrium (ignore gravity)",
             "What is the Newton's first law, given the second law isn't this obvious why is there a necessity of the first law",
             "What is the formula for gravity",
             "Given the gravitational force using basic laws of force can u calculate the centripetal force required to sustain a circular orbit of radius u"
             ]

for question in questions:
    result = rag_chain.invoke(question)
    print(question)
    print(f"Ans : {result.content}")
    print("_"*50)


In [None]:
%pip install streamlit