In [None]:
pip install langchain langchain-community langchain-text-splitters langchain-huggingface langchain-groq chromadb sentence-transformers unstructured


Collecting langchain-community
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-0.3.1-py3-none-any.whl.metadata (996 bytes)
Collecting langchain-groq
  Downloading langchain_groq-0.3.7-py3-none-any.whl.metadata (2.6 kB)
Collecting chromadb
  Downloading chromadb-1.0.20-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting unstructured
  Downloading unstructured-0.18.14-py3-none-any.whl.metadata (24 kB)
Collecting requests<3,>=2 (from langchain)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting groq<1,>=0.30.0 (from langchain-groq)
  Downloading groq-0.31.1-py3-none-any.whl.metadata (16 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux

In [None]:
import os
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

GROQ_API_KEY = "your_api_key"
PERSIST_DIR = "./chroma_db"
URL = "https://en.wikipedia.org/wiki/San_Francisco"

# 1. Load and prepare documents
loader = UnstructuredURLLoader(urls=[URL])
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectordb = Chroma(persist_directory=PERSIST_DIR, embedding_function=embeddings)
vectordb.add_documents(chunks)
vectordb.persist()

retriever = vectordb.as_retriever(search_kwargs={"k": 3})

llm = ChatGroq(
    api_key=GROQ_API_KEY,
    model="qwen/qwen3-32b",
    temperature=0,
)

# ---- Corrective RAG ----

# Step A: Judge relevance
judge_prompt = PromptTemplate(
    template="""
You are an evaluator.
Given a question and retrieved context, decide if the context is sufficient
to answer the question. Respond with "YES" or "NO" only.

Question: {question}
Context: {context}
""",
    input_variables=["question", "context"],
)
judge_chain = LLMChain(llm=llm, prompt=judge_prompt)

# Step B: Normal answer
answer_prompt = PromptTemplate(
    template="""
Use the following context to answer the question.

Context:
{context}

Question: {question}

Answer clearly and concisely:
""",
    input_variables=["context", "question"],
)
answer_chain = LLMChain(llm=llm, prompt=answer_prompt)

# Step C: Corrective answer (when context is poor)
corrective_prompt = PromptTemplate(
    template="""
The retrieved context is insufficient or irrelevant.
Do your best to answer the question using your own knowledge,
but also point out the missing context.

Question: {question}

Answer carefully:
""",
    input_variables=["question"],
)
corrective_chain = LLMChain(llm=llm, prompt=corrective_prompt)

# ---- Run ----
query = "Tell me about the climate of San Francisco."

retrieved_docs = retriever.get_relevant_documents(query)
context = "\n".join([d.page_content for d in retrieved_docs])

# Step A: Judge
judge_result = judge_chain.run({"question": query, "context": context}).strip()

# ---- Print the context that was judged ----
print("\n--- Context Passed to Judge ---\n")
print(context[:1000] + ("..." if len(context) > 1000 else ""))  # limit long output

print("\n--- Judge ---\n")
print(judge_result)

# Step B/C
if judge_result.upper().startswith("YES"):
    final_answer = answer_chain.run({"question": query, "context": context})
else:
    final_answer = corrective_chain.run({"question": query})

print("\n--- Final Answer ---\n")
print(final_answer)



--- Context Passed to Judge ---

Please set a user-agent and respect our robot policy https://w.wiki/4wJS. See also T400119.
Please set a user-agent and respect our robot policy https://w.wiki/4wJS. See also T400119.

--- Judge ---

<think>
</think>

NO

--- Final Answer ---

<think>
Okay, the user is asking about the climate of San Francisco. Let me start by recalling what I know. San Francisco is on the West Coast of the United States, in California. I remember it's known for being foggy and having a cool climate compared to other parts of California, like Los Angeles.

First, I should mention the general climate type. I think it's a Mediterranean climate, which means mild, wet winters and dry summers. But wait, Mediterranean climates usually have hot summers, but San Francisco is more moderate. Maybe it's influenced by the ocean. The Pacific Ocean is nearby, so the marine influence probably keeps temperatures cooler. That's why it's not as hot as inland areas.

Fog is a big part of