## Initialization (2025-12-19-19-35)
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.schema import Document
from langchain.prompts import PromptTemplate

In [1]:
import os
from dotenv import load_dotenv

from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate

In [2]:
# ============================================================================
# ENV SETUP (fail-fast)
# ============================================================================
load_dotenv()

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL")  # e.g. https://openrouter.ai/api/v1
if not OPENROUTER_API_KEY:
    raise RuntimeError("Missing OPENROUTER_API_KEY in .env")
if not OPENROUTER_BASE_URL:
    raise RuntimeError("Missing OPENROUTER_BASE_URL in .env")

In [3]:
embedding_model = OpenAIEmbeddings(
    api_key=OPENROUTER_API_KEY,        # API key for authentication
    base_url=OPENROUTER_BASE_URL,      # OpenRouter endpoint URL
    model="text-embedding-3-small",    # Small, fast embedding model (1536 dimensions)
)

# Initialize LLM
chat_model = ChatOpenAI(
    api_key=OPENROUTER_API_KEY,        # API key for authentication
    base_url=OPENROUTER_BASE_URL,      # OpenRouter endpoint URL
    model="openai/gpt-4o",             # GPT-4o model for high-quality responses
    temperature=0,                   # Low temperature for more deterministic, focused answers
)

## Indexing pipeline for RAG (2025-12-19-19-35)

In [4]:

# Sample documents
documents = [
    Document(
        page_content="Quantum computing uses qubits instead of classical bits. Qubits can exist in superposition, meaning they can be both 0 and 1 simultaneously, enabling parallel processing of information.",
        metadata={"category": "science"}),
    Document(
        page_content="Quantum computers leverage quantum entanglement, where qubits become correlated and the state of one affects others instantly. This allows for complex calculations that classical computers cannot efficiently perform.",
        metadata={"category": "science"}),
    Document(
        page_content="Quantum gates manipulate qubits to perform quantum algorithms. These gates are analogous to classical logic gates but operate on quantum states.",
        metadata={"category": "science"}),
    Document(page_content="AI is revolutionizing healthcare with advanced diagnostics.",
             metadata={"category": "technology"}),
    Document(page_content="Stock markets are influenced by global economic policies.",
             metadata={"category": "finance"}),
    Document(page_content="Einstein’s theory of relativity changed our understanding of physics.",
             metadata={"category": "science"})
]

In [5]:
# Store documents in FAISS
vector_db = FAISS.from_documents(documents, embedding_model)

## Generation pipeline for RAG (2025-12-19-19-35)

In [6]:
# Define retriever
retriever = vector_db.as_retriever(search_kwargs={"k": 2})  # Retrieve top 2 most relevant docs

In [7]:
# Example Query - DEFINE FIRST
query = "How does quantum computing work?"
query_embedding = embedding_model.embed_query(query)  # Convert query to vector
retrieved_docs = retriever.invoke(query)

# Print retrieved results
print("\n--- Retrieved Documents ---")
for doc in retrieved_docs:
    print(f"Content: {doc.page_content}\nMetadata: {doc.metadata}")


--- Retrieved Documents ---
Content: Quantum computers leverage quantum entanglement, where qubits become correlated and the state of one affects others instantly. This allows for complex calculations that classical computers cannot efficiently perform.
Metadata: {'category': 'science'}
Content: Quantum computing uses qubits instead of classical bits. Qubits can exist in superposition, meaning they can be both 0 and 1 simultaneously, enabling parallel processing of information.
Metadata: {'category': 'science'}


In [8]:
# Format retrieved documents into context - NOW context is available
context = "\n".join([doc.page_content for doc in retrieved_docs])

In [9]:
prompt_template = PromptTemplate(
    input_variables=["query", "context"],
    template="""You are a helpful assistant that ONLY answers based on the provided context.

STRICT RULES:
- Use ONLY information from the context below
- If the context doesn't contain enough information, say "I don't have enough information in the provided context to answer this fully."
- Do NOT use your general knowledge

Context: {context}

Query: {query}

Answer:"""
)
final_prompt = prompt_template.format(query=query, context=context)

In [10]:
response = chat_model.invoke(final_prompt)

In [11]:
# Print LLM-generated response
print("\n--- LLM Response ---")
print(response.content)


--- LLM Response ---
Quantum computing works by leveraging quantum entanglement and superposition. In quantum computing, qubits are used instead of classical bits. Qubits can exist in a state of superposition, meaning they can be both 0 and 1 simultaneously. This allows for parallel processing of information. Additionally, quantum entanglement is utilized, where qubits become correlated, and the state of one qubit can affect others instantly. These properties enable quantum computers to perform complex calculations that classical computers cannot efficiently perform.


In [12]:
print("\n--- Faithfulness Check ---")
print(f"Query: {query}")
print(f"\nRetrieved Context:\n{context}")
print(f"\nLLM Response:\n{response.content}")
print("\n⚠️ Does the response only use information from the context above?")


--- Faithfulness Check ---
Query: How does quantum computing work?

Retrieved Context:
Quantum computers leverage quantum entanglement, where qubits become correlated and the state of one affects others instantly. This allows for complex calculations that classical computers cannot efficiently perform.
Quantum computing uses qubits instead of classical bits. Qubits can exist in superposition, meaning they can be both 0 and 1 simultaneously, enabling parallel processing of information.

LLM Response:
Quantum computing works by leveraging quantum entanglement and superposition. In quantum computing, qubits are used instead of classical bits. Qubits can exist in a state of superposition, meaning they can be both 0 and 1 simultaneously. This allows for parallel processing of information. Additionally, quantum entanglement is utilized, where qubits become correlated, and the state of one qubit can affect others instantly. These properties enable quantum computers to perform complex calcula