In [1]:
print("hello world")

hello world


In [5]:
#llm
from langchain_google_genai import ChatGoogleGenerativeAI
import os
from dotenv import load_dotenv

# Load variables from .env into environment
load_dotenv()

# Access them
google_api_key = os.getenv("GOOGLE_API_KEY")
def llm_load():
    # Initialize chat LLM using API key
    try:
        llm = ChatGoogleGenerativeAI(
            google_api_key=google_api_key,
            model="gemini-2.5-flash-lite",  # Chat-focused model
            temperature=0.7
        )
    except Exception as e:
        print(e)
        llm=None
    return llm
llm=llm_load()

check_llm=llm.invoke("what is the capital of India")
check_llm.content


E0000 00:00:1759678088.834000  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


'The capital of India is **New Delhi**.'

In [7]:
#embeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import numpy as np

class Embedder:
    def __init__(self):
        self.model= GoogleGenerativeAIEmbeddings(
        google_api_key=google_api_key,
        model ="models/text-embedding-004"
        )

    def embed_texts(self,texts):
        embs =self.model.embed_documents(texts)
        #print(len(embs))
        return np.array(embs,dtype=np.float32)

In [8]:
#retriever
import os
import json
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec


load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
INDEX_NAME = os.getenv("INDEX_NAME", "self-critique-index")

def init_pinecone():
    """Initialize Pinecone client and create index if not exists (Serverless)."""
    pc = Pinecone(api_key=PINECONE_API_KEY)
    existing_indexes = [idx.name for idx in pc.list_indexes()]

    if INDEX_NAME not in existing_indexes:
        print(f"🪶 Creating Pinecone index: {INDEX_NAME}")
        pc.create_index(
            name=INDEX_NAME,
            dimension=768,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-east-1")
        )
    else:
        print(f"✅ Pinecone index already exists: {INDEX_NAME}")

    return pc.Index(INDEX_NAME)

def build_index():
    """Load dataset, create embeddings with Gemini, and upsert into Pinecone."""
    index = init_pinecone()
    embedder = Embedder()

    with open("self_critique_loop_dataset.json") as f:
        data = json.load(f)[:30]

    texts = [d["answer_snippet"] for d in data]
    ids = [d["doc_id"] for d in data]
    vectors = embedder.embed_texts(texts)

    to_upsert = [
        {"id": ids[i], "values": vectors[i].tolist(), "metadata": {"question": data[i]["question"]}}
        for i in range(len(ids))
    ]

    index.upsert(vectors=to_upsert)
    print(f"✅ Indexed {len(ids)} documents into Pinecone")

def pinecone_query(query: str, top_k: int = 5):
    """Query top-k snippets from Pinecone."""
    index = init_pinecone()
    embedder = Embedder()
    query_vec = embedder.embed_texts([query])[0].tolist()

    res = index.query(vector=query_vec, top_k=top_k, include_metadata=True)
    snippets = [f"{match['metadata']['question']} [KB{match['id']}]" for match in res['matches']]
    return snippets

if __name__ == "__main__":
    print("🚀 Starting Pinecone setup and indexing...")
    build_index()

🚀 Starting Pinecone setup and indexing...
✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759678212.854133  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759678212.857967  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


✅ Indexed 30 documents into Pinecone


In [37]:
#graph
from typing_extensions import TypedDict
from langgraph.graph import StateGraph, START, END
from llm_utils import llm
from retriever import pinecone_query

class State(TypedDict):
    user_query: str
    snippets: list[str]
    answer: str
    critique: str
    missing_keywords: str

def retrieve_kb(state: State):
    snippets = pinecone_query(state["user_query"], top_k=5)
    return {"snippets": snippets}

def generate_answer(state: State):
    context = "\n".join(state["snippets"])
    prompt = f"""Answer the question: {state['user_query']}\nUsing these KB snippets:\n{context}.  
Always cite snippets [KBxxx]
 """
    print("prompt",prompt)
    res = llm.invoke(prompt)
    return {"answer": res.content}

def critique_answer(state: State):
    prompt = f"""Critique this answer for completeness:
Question: {state['user_query']}
Answer: {state['answer']}
Respond only with:
- COMPLETE
- REFINE: <missing keywords>"""
    res = llm.invoke(prompt)
    text = res.content.strip()
    #print("critique content",text)
    if text.startswith("REFINE"):
        return {"critique": "REFINE", "missing_keywords": text.split(":", 1)[1].strip()}
    return {"critique": "COMPLETE"}

def refine_answer(state: State):
    new_query = f"{state['user_query']} {state['missing_keywords']}"
    new_snippets = pinecone_query(new_query, top_k=1)
    context = "\n".join(state["snippets"] + new_snippets)
    prompt = f"Refine your answer to: {state['user_query']}\nUsing this info:\n{context} and Always cite snippets [KBxxx]"
    res = llm.invoke(prompt)
    print("refine_answer",res.content)
    return {"answer": res.content}

def build_graph():
    builder = StateGraph(State)
    builder.add_node("retrieve_kb", retrieve_kb)
    builder.add_node("generate_answer", generate_answer)
    builder.add_node("critique_answer", critique_answer)
    builder.add_node("refine_answer", refine_answer)

    builder.add_edge(START, "retrieve_kb")
    builder.add_edge("retrieve_kb", "generate_answer")
    builder.add_edge("generate_answer", "critique_answer")

    def decide(state):
        return state["critique"]

    builder.add_conditional_edges(
        "critique_answer",
        decide,
        {"COMPLETE": END, "REFINE": "refine_answer"}
    )
    builder.add_edge("refine_answer", END)
    return builder.compile()


In [38]:
graph= build_graph()
query= "What are best practices for caching?"
result = graph.invoke({"user_query": query})
print("Response from bot:")
print("\n\n")
print(result['answer'])

✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680624.708402  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680624.792053  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


prompt Answer the question: What are best practices for caching?
Using these KB snippets:
What are best practices for caching? [KBKB003]
What are best practices for caching? [KBKB023]
What are best practices for caching? [KBKB013]
What are best practices for performance tuning? [KBKB012]
What are best practices for performance tuning? [KBKB002].  
Always cite snippets [KBxxx]
 
✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680632.351837  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680632.353691  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


refine_answer Based on the provided KB articles, here are the best practices for caching:

**Core Caching Principles:**

*   **Identify Frequently Accessed Data:** Cache data that is read often but changes infrequently. This is the most impactful use of caching. [KBKB003]
*   **Understand Data Volatility:** Cache items that have a low rate of change. If data is constantly updated, caching it can lead to serving stale information and negate performance benefits. [KBKB003]
*   **Determine Cache Scope:** Decide whether caching is needed at the application level, database level, or browser level. The appropriate scope depends on the specific performance bottleneck. [KBKB003]
*   **Implement Cache Invalidation Strategically:** Have a clear strategy for removing or updating cached data when the underlying data changes. This is crucial to prevent serving stale information. [KBKB003]
*   **Monitor Cache Performance:** Regularly track cache hit rates, miss rates, and memory usage. This helps id

In [39]:
query= "How should I set up CI/CD pipelines?"
result = graph.invoke({"user_query": query})
print("Response from bot:")
print("\n\n")
print(result['answer'])

✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680638.517292  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680638.518113  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


prompt Answer the question: How should I set up CI/CD pipelines?
Using these KB snippets:
What are best practices for CI/CD? [KBKB007]
What are best practices for CI/CD? [KBKB027]
What are best practices for CI/CD? [KBKB017]
What are best practices for unit testing? [KBKB016]
What are best practices for unit testing? [KBKB006].  
Always cite snippets [KBxxx]
 
✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680646.453074  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680646.455763  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


refine_answer Setting up effective CI/CD pipelines involves a strategic approach guided by established best practices. Here's a breakdown of how to set them up, drawing from key knowledge base articles:

**1. Foundation: Version Control and Continuous Integration**

*   **Centralized Version Control:** All code must reside in a robust version control system. This is the bedrock of CI/CD, enabling collaboration, history tracking, and automated builds. [KBKB007]
*   **Automated Builds:** Integrate your version control system with an automated build tool. Whenever code is committed, the CI server should automatically trigger a build. This ensures that code is always in a releasable state. [KBKB007]
*   **Frequent Commits:** Encourage developers to commit small, frequent changes. This minimizes the risk of merge conflicts and makes it easier to pinpoint the source of issues. [KBKB007]
*   **Automated Testing (Unit Tests are Crucial):** Integrate comprehensive automated tests, especially un

In [40]:
query= "What are performance tuning tips?"
result = graph.invoke({"user_query": query})
print("Response from bot:")
print("\n\n")
print(result['answer'])

✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680654.228199  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680654.229769  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


prompt Answer the question: What are performance tuning tips?
Using these KB snippets:
What are best practices for performance tuning? [KBKB002]
What are best practices for performance tuning? [KBKB022]
What are best practices for performance tuning? [KBKB012]
What are best practices for caching? [KBKB013]
What are best practices for caching? [KBKB003].  
Always cite snippets [KBxxx]
 
Response from bot:



Performance tuning is a multifaceted process that involves optimizing various aspects of a system to improve its speed and efficiency. Based on the provided knowledge base snippets, here are some key performance tuning tips:

**General Best Practices for Performance Tuning:**

*   **Identify and Address Bottlenecks:** A fundamental principle is to pinpoint the slowest parts of your system, often referred to as bottlenecks, and focus your tuning efforts there [KBKB002, KBKB022, KBKB012]. Without understanding where the performance issues lie, tuning efforts can be misdirected.
*   **

In [41]:
query= "How do I version my APIs?"
result = graph.invoke({"user_query": query})
print("Response from bot:")
print("\n\n")
print(result['answer'])

✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680661.636457  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680661.637937  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


prompt Answer the question: How do I version my APIs?
Using these KB snippets:
What are best practices for API versioning? [KBKB005]
What are best practices for API versioning? [KBKB025]
What are best practices for API versioning? [KBKB015]
What are best practices for CI/CD? [KBKB017]
What are best practices for CI/CD? [KBKB007].  
Always cite snippets [KBxxx]
 
✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680669.110144  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680669.111302  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


refine_answer Versioning your APIs is crucial for managing changes and ensuring backward compatibility for your consumers. Here's a refined approach based on best practices:

## Key Principles for API Versioning:

*   **Clarity and Communication:** Make your versioning strategy clear to your developers and API consumers. [KBKB005]
*   **Backward Compatibility:** Aim to maintain backward compatibility whenever possible to avoid breaking existing integrations. [KBKB025]
*   **Granularity:** Choose a versioning approach that balances granularity with ease of use. [KBKB015]
*   **Automation:** Integrate versioning into your CI/CD pipeline for consistent and reliable deployments. [KBKB017]

## Common Versioning Strategies:

There are several popular methods for versioning your APIs. The best choice often depends on your specific needs and the complexity of your API.

1.  **URI Versioning (URL Path Versioning):**
    *   **How it works:** The version number is included directly in the API en

In [42]:
query= "What should I consider for error handling?"
result = graph.invoke({"user_query": query})
print("Response from bot:")
print("\n\n")
print(result['answer'])

✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680677.359517  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680677.360498  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


prompt Answer the question: What should I consider for error handling?
Using these KB snippets:
What are best practices for error handling? [KBKB009]
What are best practices for error handling? [KBKB029]
What are best practices for error handling? [KBKB019]
What are best practices for debugging? [KBKB011]
What are best practices for debugging? [KBKB001].  
Always cite snippets [KBxxx]
 
✅ Pinecone index already exists: self-critique-index


E0000 00:00:1759680684.779536  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1759680684.780332  173915 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


refine_answer When considering error handling, it's crucial to adopt a systematic approach that prioritizes clarity, maintainability, and user experience. Here's a breakdown of key considerations, drawing upon best practices for both error handling and debugging:

**1. Comprehensive Error Detection and Capture:**

*   **Identify Potential Failure Points:** Proactively think about all the places in your code where things could go wrong. This includes external dependencies (APIs, databases, network calls), user input validation, file operations, and complex business logic.
*   **Catch Specific Exceptions:** Instead of a generic `catch (Exception e)`, aim to catch specific exception types. This allows for more targeted and meaningful handling. For example, distinguish between a `FileNotFoundException` and a `NetworkException`.
*   **Log All Errors:** Even if you handle an error gracefully for the user, ensure it's logged for debugging and monitoring purposes. [KBKB009] emphasizes the impo