In [4]:
import streamlit as st

from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from PyPDF2 import PdfReader

from langchain_core.documents import Document

In [2]:
!pip install PYPDF2

Collecting PYPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
Installing collected packages: PYPDF2
Successfully installed PYPDF2-3.0.1



[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
def split_documents(docs):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100
    )
    return splitter.split_documents(docs)

In [6]:
def load_embeddings():
    return HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2"
    )

In [7]:
def create_vectorstore(chunks, embeddings):
    return FAISS.from_documents(chunks, embeddings)

In [8]:
def create_retriever(vectorstore):
    return vectorstore.as_retriever(search_kwargs={"k": 3})

In [9]:
def load_llm():
    return Ollama(model="mistral", temperature=0)

In [10]:
def ask_question(llm, retriever, query):
    docs = retriever.invoke(query)

    context = "\n\n".join([doc.page_content for doc in docs])

    prompt = f"""
Answer the question using ONLY the context below.
If the answer is not in the context, say "I don't know".

Context:
{context}

Question:
{query}
"""

    return llm.invoke(prompt)


In [11]:
st.title("ðŸ“„ Free RAG Chatbot")

pdf = st.file_uploader("Upload a PDF", type="pdf")

2026-02-08 13:08:20.643 
  command:

    streamlit run c:\Users\HP\OneDrive\Desktop\RAG\venv\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [12]:
# Session state for vectorstore
if "vectorstore" not in st.session_state:
    st.session_state.vectorstore = None

2026-02-08 13:08:33.804 Session state does not function when running a script without `streamlit run`


In [13]:
if pdf and st.session_state.vectorstore is None:
    st.info("Processing PDF...")

    reader = PdfReader(pdf)
    text = ""
    for page in reader.pages:
        if page.extract_text():
            text += page.extract_text()

    docs = [Document(page_content=text)]

    chunks = split_documents(docs)
    embeddings = load_embeddings()
    st.session_state.vectorstore = FAISS.from_documents(chunks, embeddings)

    st.success("PDF processed successfully!")
    

In [14]:
query = st.text_input("Ask a question from your documents")



In [15]:
if query:
    docs = load_documents()
    chunks = split_documents(docs)
    embeddings = load_embeddings()
    vectorstore = create_vectorstore(chunks, embeddings)
    retriever = create_retriever(vectorstore)
    llm = load_llm()
    response = ask_question(llm, retriever, query)
    st.subheader("Answer")
    st.write(response)
