In [48]:
# --- Imports and Setup ---
import os
import re
import logging
from typing import List, Dict
from langgraph.graph import StateGraph, END
from openai import AzureOpenAI
from dotenv import load_dotenv
import weaviate
from weaviate.classes.init import Auth
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type

load_dotenv()

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")

endpoint = "https://swedencentral.api.cognitive.microsoft.com/"
model_name = "gpt-4o"
deployment = "gpt-4o"

subscription_key = os.getenv("AZURE_OPENAI_API_KEY_3")
api_version = "2024-12-01-preview"

llm_client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=endpoint,
    api_key=subscription_key,
)

# Weaviate setup
weaviate_url = os.environ["WEAVIATE_URL"]
weaviate_api_key = os.environ["WEAVIATE_API_KEY"]
collection_name = "Docs"

weaviate_client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
)
collection = weaviate_client.collections.get(collection_name)

# --- Retry Decorators ---
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2), retry=retry_if_exception_type(Exception))
def safe_llm(prompt: str, temperature: float = 0) -> str:
    logging.info(f"Calling LLM with prompt: {prompt[:100]}...")
    completion = llm_client.chat.completions.create(
        model=deployment,
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature,
        max_tokens=512,
        stop=None
    )
    return completion.choices[0].message.content.strip()

@retry(stop=stop_after_attempt(3), wait=wait_fixed(2), retry=retry_if_exception_type(Exception))
def safe_weaviate_query(query_vector, limit):
    logging.info("Querying Weaviate for KB hits.")
    results = collection.query.near_vector(
        near=query_vector,
        limit=limit,
        return_metadata=["doc_id", "source", "last_updated", "answer_snippet"],
        return_properties=["doc_id", "source", "last_updated", "answer_snippet"],
    )
    return results

# --- Embedding (for retriever/refiner nodes) ---
from openai import OpenAI

embedding_client = OpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    base_url=os.getenv("MODEL_BASE_URL"),
    default_headers={"api-key": os.getenv("AZURE_OPENAI_API_KEY")},
    default_query={"api-version": os.getenv("AZURE_OPENAI_API_VERSION")}
)

def get_embedding(text):
    logging.info(f"Embedding text for retrieval: {text[:80]}")
    response = embedding_client.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding

# --- Nodes ---
def retrieve_kb_node(state: dict) -> dict:
    user_question = state["user_question"]
    try:
        query_vector = get_embedding(user_question)
        results = safe_weaviate_query(query_vector, 5)
        kb_hits = []
        for res in results.objects:
            kb_hits.append({
                "doc_id": res.properties.get("doc_id"),
                "answer_snippet": res.properties.get("answer_snippet", ""),
                "source": res.properties.get("source", ""),
            })
        logging.info(f"KB Hits: {[h['doc_id'] for h in kb_hits]}")
        return {**state, "kb_hits": kb_hits}
    except Exception as e:
        logging.error(f"KB retrieval failed: {e}")
        return {**state, "kb_hits": []}

def generate_answer_node(state: dict) -> dict:
    user_question = state["user_question"]
    kb_hits = state["kb_hits"]
    kb_snippets = "\n".join(
        f"[{hit['doc_id']}] {hit['answer_snippet']}" for hit in kb_hits
    )
    prompt = f"""
You are a software best-practices assistant.
User Question:
{user_question}
Retrieved Snippets:
{kb_snippets}
Task:
Based on these snippets, write a concise answer to the user’s question.
Cite each snippet you use by its doc_id in square brackets (e.g., [KB004]).
Return only the answer text.
"""
    initial_answer = safe_llm(prompt)
    logging.info(f"Initial answer: {initial_answer}")
    return {**state, "initial_answer": initial_answer}

def critique_answer_node(state: dict) -> dict:
    user_question = state["user_question"]
    initial_answer = state["initial_answer"]
    kb_hits = state["kb_hits"]
    kb_snippets = "\n".join(
        f"[{hit['doc_id']}] {hit['answer_snippet']}" for hit in kb_hits
    )
    prompt = f"""
You are a critical QA assistant. The user asked: {user_question}
Initial Answer:
{initial_answer}
KB Snippets:
{kb_snippets}
Task:
Determine if the initial answer fully addresses the question using only these snippets.
- If it does, respond exactly: COMPLETE
- If it misses any point or cites missing info, respond: REFINE: <short list of missing topic keywords>
Return exactly one line.
"""
    critique_result = safe_llm(prompt)
    logging.info(f"Critique result: {critique_result}")
    return {**state, "critique_result": critique_result}

def refine_answer_node(state: dict) -> dict:
    user_question = state["user_question"]
    initial_answer = state["initial_answer"]
    critique_result = state["critique_result"]

    # Extract missing-topic keywords from critique
    match = re.match(r"REFINE:\s*(.*)", critique_result, re.IGNORECASE)
    missing_keywords = match.group(1).strip() if match else ""
    new_query = f"{user_question} and information on {missing_keywords}" if missing_keywords else user_question

    # Retrieve one extra snippet for refinement
    try:
        extra_vector = get_embedding(new_query)
        results = safe_weaviate_query(extra_vector, 1)
        if results.objects:
            extra_hit = results.objects[0]
            extra_hit_doc_id = extra_hit.properties.get("doc_id", "")
            extra_hit_snippet = extra_hit.properties.get("answer_snippet", "")
        else:
            extra_hit_doc_id, extra_hit_snippet = "", ""
    except Exception as e:
        logging.error(f"Refine retrieval failed: {e}")
        extra_hit_doc_id, extra_hit_snippet = "", ""

    prompt = f"""
You are a software best-practices assistant refining your answer. The user asked: {user_question}
Initial Answer:
{initial_answer}
Critique: {critique_result}
Additional Snippet:
[{extra_hit_doc_id}] {extra_hit_snippet}
Task:
Incorporate this snippet into the answer, covering the missing points.
Cite any snippet you use by doc_id in square brackets.
Return only the final refined answer.
"""
    refined_answer = safe_llm(prompt)
    logging.info(f"Refined answer: {refined_answer}")
    return {**state, "refined_answer": refined_answer}

# --- Build the LangGraph graph ---
graph = StateGraph(dict)
graph.add_node("retrieve_kb", retrieve_kb_node)
graph.add_node("generate_answer", generate_answer_node)
graph.add_node("critique_answer", critique_answer_node)
graph.add_node("refine_answer", refine_answer_node)

graph.add_edge("retrieve_kb", "generate_answer")
graph.add_edge("generate_answer", "critique_answer")

def critique_decision(state: dict):
    critique_result = state.get("critique_result", "")
    if critique_result.strip().upper() == "COMPLETE":
        return END
    elif critique_result.strip().upper().startswith("REFINE"):
        return "refine_answer"
    else:
        return END

graph.add_conditional_edges("critique_answer", critique_decision, {"refine_answer", END})
graph.add_edge("refine_answer", END)
graph.set_entry_point("retrieve_kb")

# --- Pipeline Driver Function ---
def agentic_rag_qa(user_question: str):
    initial_state = {
        "user_question": user_question,
        "kb_hits": [],
        "initial_answer": "",
        "critique_result": "",
        "refined_answer": ""
    }
    runnable = graph.compile()
    final_state = runnable.invoke(initial_state)

    # Output diagnostics
    hits = final_state.get("kb_hits", [])
    print(f"KB Hits: {[h['doc_id'] for h in hits]}")
    print(f"Initial Answer: {final_state.get('initial_answer')}")
    print(f"Critique: {final_state.get('critique_result')}")
    if final_state.get("refined_answer"):
        print("Answer was refined.")
        print(f"Refined Answer: {final_state.get('refined_answer')}")
    else:
        print("Answer was not refined.")

    answer = final_state.get("refined_answer") or final_state.get("initial_answer")
    return {"answer": answer}



2025-06-11 10:23:19,418 | INFO | HTTP Request: GET https://blkxb2wttx6xuwtoqqww.c0.asia-southeast1.gcp.weaviate.cloud/v1/meta "HTTP/1.1 200 OK"
2025-06-11 10:23:19,658 | INFO | HTTP Request: GET https://pypi.org/pypi/weaviate-client/json "HTTP/1.1 200 OK"
            Please make sure to close the connection using `client.close()`.
  collection = weaviate_client.collections.get(collection_name)


In [None]:

question = "How should I set up CI/CD pipelines ? "
response = agentic_rag_qa(question)
print(response)

2025-06-11 10:24:18,993 | INFO | Embedding text for retrieval: How should I set up CI/CD pipelines ? 
2025-06-11 10:24:20,055 | INFO | HTTP Request: POST https://eastus.api.cognitive.microsoft.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-02-01 "HTTP/1.1 200 OK"
2025-06-11 10:24:20,056 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:24:22,057 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:24:24,058 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:24:24,059 | ERROR | KB retrieval failed: RetryError[<Future at 0x792ec01b4b90 state=finished raised TypeError>]
2025-06-11 10:24:24,059 | INFO | Calling LLM with prompt: 
You are a software best-practices assistant.
User Question:
How should I set up CI/CD pipelines ? 
...
2025-06-11 10:24:25,999 | INFO | HTTP Request: POST https://swedencentral.api.cognitive.microsoft.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2025-06-11 10:24:26,002 | INFO | In

KB Hits: []
Initial Answer: To set up CI/CD pipelines, start by defining clear stages such as build, test, and deploy to ensure code quality and reliability [KB001]. Use version control systems like Git to trigger pipeline workflows automatically upon code changes [KB002]. Incorporate automated testing to catch issues early and ensure consistent deployments [KB003]. Secure your pipeline by managing secrets and access controls effectively [KB005]. Finally, monitor and optimize pipeline performance regularly to adapt to evolving project needs [KB006].
Critique: REFINE: pipeline setup steps
Answer was refined.
Refined Answer: To set up CI/CD pipelines, start by defining clear stages such as build, test, and deploy to ensure code quality and reliability [KB001]. Use version control systems like Git to trigger pipeline workflows automatically upon code changes [KB002]. Incorporate automated testing to catch issues early and ensure consistent deployments [KB003]. Secure your pipeline by mana

In [50]:
question = "What are performance tuning steps ? "
response = agentic_rag_qa(question)
print(response)

2025-06-11 10:25:03,093 | INFO | Embedding text for retrieval: What are performance tuning steps ? 
2025-06-11 10:25:04,071 | INFO | HTTP Request: POST https://eastus.api.cognitive.microsoft.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-02-01 "HTTP/1.1 200 OK"
2025-06-11 10:25:04,072 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:25:06,073 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:25:08,075 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:25:08,076 | ERROR | KB retrieval failed: RetryError[<Future at 0x792ec01b3950 state=finished raised TypeError>]
2025-06-11 10:25:08,077 | INFO | Calling LLM with prompt: 
You are a software best-practices assistant.
User Question:
What are performance tuning steps ? 
Re...
2025-06-11 10:25:09,428 | INFO | HTTP Request: POST https://swedencentral.api.cognitive.microsoft.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2025-06-11 10:25:09,430 | INFO | Init

KB Hits: []
Initial Answer: Performance tuning steps typically include identifying bottlenecks, optimizing code, improving database queries, caching frequently accessed data, and monitoring system performance. Additionally, profiling tools can be used to analyze resource usage and adjust configurations for better efficiency [KB004].
Critique: REFINE: hardware optimization, load balancing
Answer was refined.
Refined Answer: Performance tuning steps typically include identifying bottlenecks, optimizing code, improving database queries, caching frequently accessed data, and monitoring system performance. Additionally, profiling tools can be used to analyze resource usage and adjust configurations for better efficiency [KB004]. Hardware optimization, such as upgrading CPUs, increasing memory, or using faster storage solutions, can also significantly improve performance. Implementing load balancing ensures that workloads are distributed evenly across servers, reducing the risk of overloadin

In [46]:
question = "how do I version my APIs?"
response = agentic_rag_qa(question)
print(response)

2025-06-11 10:20:52,827 | INFO | Embedding text for retrieval: how do I version my APIs?
2025-06-11 10:20:53,926 | INFO | HTTP Request: POST https://eastus.api.cognitive.microsoft.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-02-01 "HTTP/1.1 200 OK"
2025-06-11 10:20:53,927 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:20:55,928 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:20:57,929 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:20:57,930 | ERROR | KB retrieval failed: RetryError[<Future at 0x792ec3ef9a30 state=finished raised TypeError>]
2025-06-11 10:20:57,931 | INFO | Calling LLM with prompt: 
You are a software best-practices assistant.
User Question:
how do I version my APIs?
Retrieved Sni...
2025-06-11 10:21:00,284 | INFO | HTTP Request: POST https://swedencentral.api.cognitive.microsoft.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2025-06-11 10:21:00,285 | INFO | Initial answer:

KB Hits: []
Initial Answer: To version your APIs, use a clear and consistent strategy such as including the version number in the URL (e.g., `/v1/resource`) or in the request header. Ensure backward compatibility for existing clients and document changes thoroughly. Semantic versioning (e.g., v1.0.0) can help communicate the scope of changes, with major versions indicating breaking changes. Avoid overloading endpoints with multiple versions to maintain simplicity and clarity [KB004], [KB007].
Critique: REFINE: backward compatibility, endpoint overloading
Answer was refined.
Refined Answer: To version your APIs, use a clear and consistent strategy such as including the version number in the URL (e.g., `/v1/resource`) or in the request header. Ensure backward compatibility by maintaining support for older versions as long as feasible, allowing existing clients to continue functioning without disruption. Avoid overloading endpoints with multiple versions by keeping each version isolated a

In [51]:
question = "How do I version my APIs?"
response = agentic_rag_qa(question)
print(response)

2025-06-11 10:25:35,779 | INFO | Embedding text for retrieval: How do I version my APIs?
2025-06-11 10:25:36,782 | INFO | HTTP Request: POST https://eastus.api.cognitive.microsoft.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-02-01 "HTTP/1.1 200 OK"
2025-06-11 10:25:36,783 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:25:38,784 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:25:40,785 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:25:40,785 | ERROR | KB retrieval failed: RetryError[<Future at 0x792ec3ef9af0 state=finished raised TypeError>]
2025-06-11 10:25:40,786 | INFO | Calling LLM with prompt: 
You are a software best-practices assistant.
User Question:
How do I version my APIs?
Retrieved Sni...
2025-06-11 10:25:42,746 | INFO | HTTP Request: POST https://swedencentral.api.cognitive.microsoft.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2025-06-11 10:25:42,748 | INFO | Initial answer:

KB Hits: []
Initial Answer: To version your APIs, use a clear and consistent strategy such as including the version number in the URL (e.g., `/api/v1/resource`) or in the request header. Ensure backward compatibility for existing clients and document changes thoroughly. Semantic versioning (e.g., major.minor.patch) is recommended to indicate the scope of changes. Major versions should be used for breaking changes, minor for new features, and patch for bug fixes. [KB004] [KB007]
Critique: COMPLETE
Answer was not refined.
{'answer': 'To version your APIs, use a clear and consistent strategy such as including the version number in the URL (e.g., `/api/v1/resource`) or in the request header. Ensure backward compatibility for existing clients and document changes thoroughly. Semantic versioning (e.g., major.minor.patch) is recommended to indicate the scope of changes. Major versions should be used for breaking changes, minor for new features, and patch for bug fixes. [KB004] [KB007]'}


In [52]:
question = "What should I consider for error handling?"
response = agentic_rag_qa(question)
print(response)

2025-06-11 10:26:12,033 | INFO | Embedding text for retrieval: What should I consider for error handling?
2025-06-11 10:26:13,025 | INFO | HTTP Request: POST https://eastus.api.cognitive.microsoft.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-02-01 "HTTP/1.1 200 OK"
2025-06-11 10:26:13,027 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:26:15,029 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:26:17,030 | INFO | Querying Weaviate for KB hits.
2025-06-11 10:26:17,030 | ERROR | KB retrieval failed: RetryError[<Future at 0x792eaeb5e5a0 state=finished raised TypeError>]
2025-06-11 10:26:17,033 | INFO | Calling LLM with prompt: 
You are a software best-practices assistant.
User Question:
What should I consider for error handli...
2025-06-11 10:26:19,005 | INFO | HTTP Request: POST https://swedencentral.api.cognitive.microsoft.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-12-01-preview "HTTP/1.1 200 OK"
2025-06-11 10:26:19,006 | INFO 

KB Hits: []
Initial Answer: To ensure effective error handling, consider the following:

1. **Use clear and descriptive error messages** to help users and developers understand the issue and how to resolve it [KB001].
2. **Log errors appropriately** to aid debugging and monitoring, ensuring sensitive information is excluded [KB002].
3. **Implement graceful degradation** to maintain partial functionality when errors occur [KB003].
4. **Avoid swallowing exceptions**; instead, propagate or handle them meaningfully [KB004].
5. **Validate inputs rigorously** to prevent errors caused by invalid data [KB005].
6. **Test error scenarios** thoroughly to ensure robustness [KB006].
Critique: COMPLETE
Answer was not refined.
{'answer': 'To ensure effective error handling, consider the following:\n\n1. **Use clear and descriptive error messages** to help users and developers understand the issue and how to resolve it [KB001].\n2. **Log errors appropriately** to aid debugging and monitoring, ensuring