In [3]:
# 🧠 RAG Pipeline using LangChain + ChromaDB
# 📘 Hey there! This notebook is your guide to building a Retrieval-Augmented Generation (RAG) pipeline.
# 🌟 We'll use free tools to search a vector database and answer questions like a pro!

# Step 1: Install the tools we need
# We're grabbing LangChain for the pipeline, ChromaDB for the vector store, and a free embedding model.
!pip install -q langchain langchain-huggingface langchain-chroma sentence-transformers tiktoken langchain-community

# Step 2: Import our libraries
import torch
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline

# Step 3: Check if we can use a GPU (faster!) or stick with CPU
device = 0 if torch.cuda.is_available() else -1
print(f"🚀 Running on {'GPU' if device == 0 else 'CPU'} — let's make some magic!")

# Step 4: Load a free embedding model
# This turns text into numbers (vectors) for searching. Think of it like a librarian who knows every book!
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Step 5: Create a mini "library" of text snippets
# These are our sample documents — like pages in a book we’ll search later.
doc_texts = [
    "LangChain makes it easy to build LLM-powered applications.",
    "Chroma is a lightweight, open-source vector database.",
    "RAG stands for Retrieval-Augmented Generation.",
    "SentenceTransformers are great for semantic search tasks.",
    "OpenAI provides powerful language models, but we use free ones here."
]
documents = [Document(page_content=txt) for txt in doc_texts]

# Step 6: Build and save a vector store with ChromaDB
# This is like indexing our library so we can find stuff fast.
persist_dir = "chroma_store"
vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory=persist_dir
)
# No need to call persist() — Chroma auto-saves now!

# Step 7: Load the vector store (in case we restart)
vectordb = Chroma(persist_directory=persist_dir, embedding_function=embedding_model)

# Step 8: Try a similarity search
# Let’s ask a question and find the top 2 matching documents.
query = "What is Chroma?"
results = vectordb.similarity_search(query, k=2)
print("\n🔍 Search Results for 'What is Chroma?':")
for i, doc in enumerate(results):
    print(f"Match {i+1}: {doc.page_content}")

# Step 9: Set up a real LLM for RAG
# Using google/flan-t5-base (free, local) to answer questions based on our search results.
llm = HuggingFacePipeline.from_model_id(
    model_id="google/flan-t5-base",
    task="text2text-generation",
    pipeline_kwargs={"max_new_tokens": 100, "do_sample": False},
    device=device
)

# Step 10: Create a RAG pipeline
# This combines our vector search with the LLM to give smart answers.
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectordb.as_retriever(search_kwargs={"k": 2})
)

# Step 11: Ask a question and get a RAG-powered answer!
response = qa_chain.invoke({"query": "What is ChromaDB?"})["result"]
print(f"\n🤖 RAG Answer: {response}")

# 📚 Tips for Playing Around
# - Try new queries like "What is RAG?" or "What does LangChain do?".
# - Add more documents to doc_texts to make your "library" bigger.
# - Swap the LLM for google/flan-t5-large if you have more memory.
# - Check out the ChromaDB docs (https://docs.trychroma.com) for advanced tricks!



🚀 Running on CPU — let's make some magic!

🔍 Search Results for 'What is Chroma?':
Match 1: Chroma is a lightweight, open-source vector database.
Match 2: Chroma is a lightweight, open-source vector database.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu



🤖 RAG Answer: a lightweight, open-source vector database
