In [1]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.schema import Document
import os
from dotenv import load_dotenv

In [12]:
load_dotenv()
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")

In [13]:
# 1. Sample Wikipedia-style data
docs = [
    Document(page_content="Cats are small, carnivorous mammals that are often kept as pets. They typically live for 13-17 years."),
    Document(page_content="Dogs are domesticated mammals, not natural wild animals. They are known for their loyalty and live about 10-13 years."),
    Document(page_content="Elephants are the largest land animals. They communicate using low-frequency rumbles and can live for 60-70 years.")
]


In [None]:
+# 2. Split the text into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
split_docs = splitter.split_documents(docs)

In [15]:
# 3. Generate embeddings
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [16]:
# 4. Store in Chroma DB
vectorstore = Chroma.from_documents(split_docs, embedding_model, persist_directory="./rag_data")

In [None]:
# 5. Load Groq LLM
llm = ChatGroq(
    model = "gemma2-9b-it",
    temperature=0.1,
    max_tokens=512,
)

In [None]:
# 6. Create the RAG QA chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True
)

In [19]:
# 7. Ask some questions
questions = [
    "How do elephants communicate?",
    "What is the average lifespan of a cat?",
    "What are dogs known for?"
]

In [20]:
for q in questions:
    result = rag_chain({"query": q})
    print(f"\nQ: {q}")
    print(f"A: {result['result']}")

  result = rag_chain({"query": q})



Q: How do elephants communicate?
A: Elephants communicate using low-frequency rumbles. 


Q: What is the average lifespan of a cat?
A: The average lifespan of a cat is 13-17 years. 


Q: What are dogs known for?
A: Dogs are known for their loyalty.  

