## Initialization (2025-12-19-19-35)
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.schema import Document
from langchain.prompts import PromptTemplate

In [1]:
import os
from dotenv import load_dotenv

from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate

In [2]:
# ============================================================================
# ENV SETUP (fail-fast)
# ============================================================================
load_dotenv()

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL")  # e.g. https://openrouter.ai/api/v1
if not OPENROUTER_API_KEY:
    raise RuntimeError("Missing OPENROUTER_API_KEY in .env")
if not OPENROUTER_BASE_URL:
    raise RuntimeError("Missing OPENROUTER_BASE_URL in .env")

In [3]:
embedding_model = OpenAIEmbeddings(
    api_key=OPENROUTER_API_KEY,        # API key for authentication
    base_url=OPENROUTER_BASE_URL,      # OpenRouter endpoint URL
    model="text-embedding-3-small",    # Small, fast embedding model (1536 dimensions)
)

# Initialize LLM
chat_model = ChatOpenAI(
    api_key=OPENROUTER_API_KEY,        # API key for authentication
    base_url=OPENROUTER_BASE_URL,      # OpenRouter endpoint URL
    model="openai/gpt-4o",             # GPT-4o model for high-quality responses
    temperature=0,                   # Low temperature for more deterministic, focused answers
)

## Indexing pipeline for RAG (2025-12-19-19-35)

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

DB_FAISS_PATH = "vector_store"  # Directory to save the index
INDEX_NAME = "index"  # Default index name used by FAISS
DATA_FILE_PATH = r"data\documents-demo-4.txt"

# Check if the vector store files actually exist
index_file = os.path.join(DB_FAISS_PATH, f"{INDEX_NAME}.faiss")
pkl_file = os.path.join(DB_FAISS_PATH, f"{INDEX_NAME}.pkl")

if os.path.exists(index_file) and os.path.exists(pkl_file):
    print(f"✓ Loading existing vector store from: {DB_FAISS_PATH}")

    vector_db = FAISS.load_local(
        folder_path=DB_FAISS_PATH,
        embeddings=embedding_model,
        allow_dangerous_deserialization=True
    )
    print(f"✓ Vector store loaded successfully!")
else:
    print("⚠ Index not found. Creating new vector store...")

    # 1. LOAD: Read documents from file
    with open(DATA_FILE_PATH, 'r', encoding='utf-8') as f:
        content = f.read()

    # Execute the file content to create the documents list
    # The file contains Python Document objects that we need to evaluate
    documents = []
    for line in content.strip().split('\n'):
        if line.strip():
            # Safely evaluate each Document line
            doc = eval(line.strip().rstrip(','))
            documents.append(doc)

    print(f"✓ Loaded {len(documents)} documents from {DATA_FILE_PATH}")

    # 3. EMBED & 4. STORE: Create vector store from documents
    vector_db = FAISS.from_documents(documents, embedding_model)

    # Create directory if it doesn't exist
    os.makedirs(DB_FAISS_PATH, exist_ok=True)

    # Save to disk
    vector_db.save_local(DB_FAISS_PATH)
    print(f"✓ Vector store saved to: {DB_FAISS_PATH}")

✓ Loading existing vector store from: vector_store
✓ Vector store loaded successfully!


### Print for verification

In [6]:
# Print all documents in the vector store for verification
if 'documents' in locals():
    print(f"\n{'='*80}")
    print(f"LOADED DOCUMENTS SUMMARY")
    print(f"{'='*80}")
    for i, doc in enumerate(documents):
        print(f"\nDocument {i+1}:")
        print(f"  Content: {doc.page_content}")
        print(f"  Metadata: {doc.metadata}")
    print(f"\n{'='*80}")
else:
    print("Documents were loaded from existing vector store (no 'documents' variable available)")


LOADED DOCUMENTS SUMMARY

Document 1:
  Content: Quantum computing uses qubits instead of classical bits. Qubits can exist in superposition, meaning they can be both 0 and 1 simultaneously, enabling parallel processing of information.
  Metadata: {'category': 'science'}

Document 2:
  Content: Quantum computers leverage quantum entanglement, where qubits become correlated and the state of one affects others instantly. This allows for complex calculations that classical computers cannot efficiently perform.
  Metadata: {'category': 'science'}

Document 3:
  Content: Quantum gates manipulate qubits to perform quantum algorithms. These gates are analogous to classical logic gates but operate on quantum states.
  Metadata: {'category': 'science'}

Document 4:
  Content: AI is revolutionizing healthcare with advanced diagnostics.
  Metadata: {'category': 'technology'}

Document 5:
  Content: Stock markets are influenced by global economic policies.
  Metadata: {'category': 'finance'}

Docu

## Generation pipeline for RAG (2025-12-19-09-14)

In [7]:
# 5. RETRIEVE: Initialize retriever for similarity search
retriever = vector_db.as_retriever(search_kwargs={"k": 2})  # Retrieve top 2 most relevant docs
print("✓ Retriever initialized")

✓ Retriever initialized


In [8]:
# Example Query - DEFINE FIRST
query = "How does quantum computing work?"
query_embedding = embedding_model.embed_query(query)  # Convert query to vector
retrieved_docs = retriever.invoke(query)

# Print retrieved results
print("\n--- Retrieved Documents ---")
for doc in retrieved_docs:
    print(f"Content: {doc.page_content}\nMetadata: {doc.metadata}")


--- Retrieved Documents ---
Content: Quantum computers leverage quantum entanglement, where qubits become correlated and the state of one affects others instantly. This allows for complex calculations that classical computers cannot efficiently perform.
Metadata: {'category': 'science'}
Content: Quantum computing uses qubits instead of classical bits. Qubits can exist in superposition, meaning they can be both 0 and 1 simultaneously, enabling parallel processing of information.
Metadata: {'category': 'science'}


In [9]:
# Format retrieved documents into context - NOW context is available
context = "\n".join([doc.page_content for doc in retrieved_docs])

In [10]:
prompt_template = PromptTemplate(
    input_variables=["query", "context"],
    template="""You are a helpful assistant that ONLY answers based on the provided context.

STRICT RULES:
- Use ONLY information from the context below
- If the context doesn't contain enough information, say "I don't have enough information in the provided context to answer this fully."
- Do NOT use your general knowledge

Context: {context}

Query: {query}

Answer:"""
)
final_prompt = prompt_template.format(query=query, context=context)

In [11]:
response = chat_model.invoke(final_prompt)

### Print for verification

In [12]:
# Print LLM-generated response
print("\n--- LLM Response ---")
print(response.content)


--- LLM Response ---
Quantum computing works by leveraging quantum entanglement and superposition. In quantum computing, qubits are used instead of classical bits. Qubits can exist in a state of superposition, meaning they can be both 0 and 1 simultaneously. This allows for parallel processing of information. Additionally, quantum entanglement is utilized, where qubits become correlated, and the state of one qubit can affect others instantly. This enables quantum computers to perform complex calculations that classical computers cannot efficiently perform.


In [13]:
print("\n--- Faithfulness Check ---")
print(f"Query: {query}")
print(f"\nRetrieved Context:\n{context}")
print(f"\nLLM Response:\n{response.content}")
print("\n⚠️ Does the response only use information from the context above?")


--- Faithfulness Check ---
Query: How does quantum computing work?

Retrieved Context:
Quantum computers leverage quantum entanglement, where qubits become correlated and the state of one affects others instantly. This allows for complex calculations that classical computers cannot efficiently perform.
Quantum computing uses qubits instead of classical bits. Qubits can exist in superposition, meaning they can be both 0 and 1 simultaneously, enabling parallel processing of information.

LLM Response:
Quantum computing works by leveraging quantum entanglement and superposition. In quantum computing, qubits are used instead of classical bits. Qubits can exist in a state of superposition, meaning they can be both 0 and 1 simultaneously. This allows for parallel processing of information. Additionally, quantum entanglement is utilized, where qubits become correlated, and the state of one qubit can affect others instantly. This enables quantum computers to perform complex calculations that 