In [None]:
!pip install -q langchain langchain-community pypdf sentence-transformers chromadb openai langchain-text-splitters

In [None]:
import os
from google.colab import userdata

# 1. Setup API Key
os.environ["API_KEY"] = userdata.get("API_KEY")

# Imports for RAG Pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from openai import OpenAI

In [None]:
# Choose your embedding Model
EMBEDDING_MODEL_NAME = "BAAI/bge-m3" # Ihis model is for Multilingual

print(f"Loading Embedding Model: {EMBEDDING_MODEL_NAME}...")
embedding_model = HuggingFaceEmbeddings(
    model_name = EMBEDDING_MODEL_NAME,
    model_kwargs = {'device': 'cpu'}, # Use cpu in colab
    encode_kwargs = {'normalize_embeddings': True} # For metric, Dot Product / Cosine Sim
)

In [None]:
# 1. Load PDF
pdf_path = "your.pdf" # Change your flie name
loader = PyPDFLoader(pdf_path)
docs = loader.load()

# 2. Split Text (Chunking)
# Chunk Size around: 500-1000
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(docs)

print(f"Split into {len(splits)} chunks.")

# 3. Store in Vector Database (ChromaDB)
# Store in temp Memory or persist_directory
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embedding_model,
    persist_directory="./chroma_db"
)

print("âœ… Vector Database Ready!")

In [None]:
from google.colab import drive #This is for making Colab read your database from Google Drive
drive.mount('/content/drive')

In [None]:
# Setup for generation
client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=os.environ["API_KEY"]
)

def query_rag(question):
    # Step 1: Retrieval
    # k=5 top 5 nearest
    results = vectorstore.similarity_search(question, k=5)

    # Combine result for Context
    context_text = "\n\n".join([doc.page_content for doc in results])

    # Step 2: Prompt Engineering
    system_prompt = """
    You are a helpful assistant. Use the provided context to answer the user's question.
    If the answer is not in the context, say you don't know.
    """

    user_prompt = f"""
    Context:
    {context_text}

    Question:
    {question}
    """

    # Step 3: Generation (Send to LLM)
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        model="llama-3.3-70b-versatile",
        temperature=0, # adjust for fancy output
    )

    return chat_completion.choices[0].message.content

# --- Run ---
my_question = "prompt"
answer = query_rag(my_question)

print(f"ðŸ¤– Answer:\n{answer}")