<a href="https://colab.research.google.com/github/Ommy11/RAG-LLM/blob/main/Multi_Document_RAG_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
pip install --upgrade langchain langchain-community langchain-huggingface langchain-chroma langchain-ollama pypdf langchain-google-genai huggingface-hub transformers sentence-transformers



In [7]:
import os
from langchain_community.document_loaders import PyPDFLoader
# folder_path = "/content/sample_data/sample_docs"

def load_docs(folder_path:str):
  if not os.path.exists(folder_path):
    raise FileNotFoundError(f"Folder {folder_path} does not exist")

  documents = []
  for file in os.listdir(folder_path):
    if file.endswith(".pdf"):
      file_path = os.path.join(folder_path,file)
      print(f"Loading {file_path}")
      try:
        loader = PyPDFLoader(file_path)
        documents.extend(loader.load())
      except Exception as e:
        print(f"Error loading {file}: {e}")

  return documents

2. Chunking

In [9]:
#We use RecursiveCharacterTextSplitter, which is a smart tool.
#It tries to split text by paragraphs first, then by sentences, so related text stays together

from langchain_text_splitters import RecursiveCharacterTextSplitter

def split_text(documents):
  splitter = RecursiveCharacterTextSplitter(
      chunk_size=1000,
      chunk_overlap=200,
      # length_function=len
  )

  chunks = splitter.split_documents(documents)
  print(f"Created {len(chunks)} chunks")
  return chunks

3. Embeddings


In [1]:
#we need to turn our text chunks into lists of numbers, called vectors or embeddings.
#We will use the 'all-MiniLM-L6-v2' open source model

from langchain_community.embeddings import HuggingFaceEmbeddings
import sentence_transformers

embedding_function = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

  embedding_function = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]



vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

4. Vector Store

In [2]:
"""we need somewhere to store these numbers for fast searching.
A regular SQL database isn’t good for this, so we’ll use a Vector Database. We’ll use Chroma"""

from langchain_chroma import Chroma

def new_vector_store(chunks):
  vector_store = Chroma.from_documents(
      documents = chunks,
      embedding = embedding_function,
      persist_directory="./chroma_db",
      collection_name="Sample_rag_docs"
  )

  return vector_store


5. LLM

In [18]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from google.colab import userdata

# Ensure your Google API key is set in Colab secrets as 'GOOGLE_API_KEY'
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)


def query_rag_system(query_text,vector_store):
  llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.3, api_key=GOOGLE_API_KEY)
  retriever = vector_store.as_retriever(search_kwargs = {'k':3})
  prompt= ChatPromptTemplate.from_template(
      """
        You are a helpful assistant.
        Answer ONLY using the context below.
        If the answer is not present, say "I don't know."

        Context:
        {context}

        Question:
        {question}
        """
  )

  chain = (
        {
            "context": retriever | format_docs,
            "question": RunnablePassthrough(),
        }
        | prompt
        | llm
        | StrOutputParser()
    )
  return chain.invoke(query_text)

In [None]:
def main():
  folder_path = "/content/sample_data/sample_docs"

  if not os.path.exists("./chroma_db"):
    print("No vector DB found, creating one")
    docs = load_docs(folder_path)
    chunks = split_text(docs)
    vector_store = new_vector_store(chunks)

    print("Vector DB created")
  else:
    print("Loading vector DB")
    vector_store = Chroma(
        persist_directory="./chroma_db",
        embedding_function=embedding_function,
        collection_name="Sample_rag_docs"
    )

  while True:
    query = input("Ask a questions : ")
    if query.lower() == "exit":
      break

    print("Thinking...")
    answer = query_rag_system(query,vector_store)
    print("\n Answer:  \n",answer)

if __name__ == "__main__":
  main()



Loading vector DB
Ask a questions : what is nlp
Thinking...

 Answer:  
 Natural Language Processing (NLP) is a field of artificial intelligence that allows computers to understand, interpret, and generate human language. It combines techniques from computer science, linguistics, and machine learning to process text or speech data.
Ask a questions : how nlp and gru are different
Thinking...

 Answer:  
 NLP (Natural Language Processing) is a broad field that enables computers to process, analyze, and manipulate text or speech, handling structural aspects of language and having applications like translation and chatbots.

GRU (Gated Recurrent Unit) is a specific type of neural network architecture, often used in NLP/sequence tasks. It is a model that processes sequential data by using a hidden state and gates (Reset Gate and Update Gate) to control how much past information is forgotten or updated at each time step. GRUs cannot directly understand words; words or sentences must be conve