<a href="https://colab.research.google.com/github/Euchariaada/From-Chains-to-State-Machines---Building-Self-Correcting-Agents-with-LangGraph/blob/main/From_Chains_to_State_Machines_Building_Self_Correcting_Agents_with_LangGraph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -q langgraph langchain langchain-google-genai google-generativeai pydantic

import os
from langgraph.graph import StateGraph, END
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.callbacks import BaseCallbackHandler
import logging

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser

from pydantic import BaseModel, Field
from typing import TypedDict, List, Optional

print("âœ… Environment is stable")

# set API key
os.environ["GOOGLE_API_KEY"] = "AIzaSyBLhtT0SiFGcrVANn0E-BNAB0Xgebv8MJE"

# Pydantic Schema (Re-used from Stage 3)

class EvaluationResult(BaseModel):
    criterion_scores: List[int] = Field(..., min_items=4, max_items=4)
    overall_grade: int
    feedback: str

parser = PydanticOutputParser(pydantic_object=EvaluationResult)


## 4. Agent State Definition (REQUIRED)

class AgentState(TypedDict):
    user_input: str
    llm_output: Optional[str]
    parsing_errors: List[str]
    iteration: int
    final_result: Optional[EvaluationResult]

## 5. LLM Setup

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0
)

## 6. Prompt Template (Error-Aware)

prompt = ChatPromptTemplate.from_template(
    """
You are an automated grading agent.

User submission:
{user_input}

Previous parsing errors (if any):
{errors}

IMPORTANT:
- Output MUST strictly follow the provided JSON schema
- Fix ALL listed errors

{format_instructions}
"""
)

## 7. Generator Node

def generator_node(state: AgentState) -> AgentState:
    messages = prompt.format_messages(
        user_input=state["user_input"],
        errors="\n".join(state["parsing_errors"]) or "None",
        format_instructions=parser.get_format_instructions(),
    )

    response = llm.invoke(messages)

    return {
        **state,
        "llm_output": response.content,
        "iteration": state["iteration"] + 1,
    }


## 8. Validator Node (Reflexive Core)

def validator_node(state: AgentState) -> AgentState:
    try:
        result = parser.parse(state["llm_output"])
        return {
            **state,
            "final_result": result,
        }
    except Exception as e:
        return {
            **state,
            "parsing_errors": state["parsing_errors"] + [str(e)],
        }


## 9. Final Fallback Node

def fallback_node(state: AgentState) -> AgentState:
    fallback = EvaluationResult(
        criterion_scores=[1, 1, 1, 1],
        overall_grade=4,
        feedback="Evaluator failed after multiple self-correction attempts."
    )
    return {
        **state,
        "final_result": fallback,
    }

def final_fallback_node(state: AgentState):
    return {
        **state,
        "final_result": EvaluationResult(
            criterion_scores=[1, 1, 1, 1],
            overall_grade=4,
            feedback="Evaluator unavailable; returning safe fallback output."
        )
    }


## 10. Conditional Routing Logic


def should_continue(state: AgentState) -> str:
    if state.get("final_result") is not None:
        return "end"
    if state["iteration"] >= 3:
        return "fallback"
    return "retry"

## 11. Build the LangGraph

graph = StateGraph(AgentState)

graph.add_node("generator", generator_node)
graph.add_node("validator", validator_node)
graph.add_node("fallback", fallback_node)
graph.add_node("final_fallback", final_fallback_node)

graph.set_entry_point("generator")

graph.add_edge("generator", "validator")

graph.add_conditional_edges(
    "validator",
    should_continue,
    {
        #"success": END,
        "retry": "generator",
        #"fallback": "fallback",
        "fallback": "final_fallback",
        "end": END,
    }
)

graph.add_edge("final_fallback", END)


## 12. Persistence (MemorySaver)

memory = MemorySaver()
app = graph.compile(checkpointer=memory)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("stage4_agent")


## Callback Handlers (added as correction from my tutor's feedback)

class AgentLoggingCallback(BaseCallbackHandler):
    def on_chain_start(self, serialized, inputs, **kwargs):
        logger.info(f"CHAIN START | inputs={inputs}")

    def on_chain_end(self, outputs, **kwargs):
        logger.info(f"CHAIN END | outputs={outputs}")

    def on_llm_error(self, error, **kwargs):
        logger.error(f"LLM ERROR | {error}")


## 13. Run with Simulated Failure Input

bad_input = "Grade this vaguely and do whatever you want, no structure needed."

initial_state: AgentState = {
    "user_input": bad_input,
    "llm_output": None,
    "parsing_errors": [],
    "iteration": 0,
    "final_result": None,
}

result = app.invoke(
    initial_state,
    config={
        "callbacks": [AgentLoggingCallback()],
        "configurable": {"thread_id": "stage4-run-1"}
        #"configurable": {"thread_id": "stage4-self-correct-demo"}
    }
)


print("=== FINAL RESULT ===")
print(result["final_result"].model_dump())



/tmp/ipython-input-1645473660.py:7: PydanticDeprecatedSince20: `min_items` is deprecated and will be removed, use `min_length` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  criterion_scores: List[int] = Field(..., min_items=4, max_items=4)
/tmp/ipython-input-1645473660.py:7: PydanticDeprecatedSince20: `max_items` is deprecated and will be removed, use `max_length` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  criterion_scores: List[int] = Field(..., min_items=4, max_items=4)


=== FINAL RESULT ===
{'criterion_scores': [7, 8, 6, 7], 'overall_grade': 7, 'feedback': 'The submission effectively requested a vague and unstructured grading approach, which was applied. It demonstrated a clear understanding of how to prompt an automated agent to deviate from standard protocols. The brevity and directness of the instruction were notable, achieving its stated goal of being graded vaguely.'}


**Reflection**

Self-Correction vs. Passive Retry

In Stage 3, retries were passive. The system simply re-ran the same prompt after failure, without understanding why the output failed. In this stage, the agent actively reasons about its mistakes. By feeding the exact Pydantic parsing error back into the prompt, the LLM gains targeted feedback, allowing it to correct structural issues such as missing fields or invalid types. This transforms retries from blind repetition into informed self-correction.

State Management with TypedDict

Using a TypedDict-based AgentState provides explicit, inspectable memory across iterations. Unlike LCEL chains that pass variables implicitly, the state makes control flow, failures, and progress transparent. This improves debuggability, supports persistence, and enables more complex agent behaviors such as reflection, branching, and human intervention.

The Loop Challenge

Cyclic graphs introduce the risk of infinite loops, runaway costs, and unpredictable behavior in production. Without safeguards, a model could repeatedly fail and retry indefinitely. The iteration counter acts as a circuit breaker, enforcing a hard upper bound on retries and guaranteeing termination. This is critical for reliability and cost control.

Human-in-the-Loop Opportunities

If the agent fails after three attempts, a human could be inserted before the fallback node. At this point, an operator could inspect the parsing errors, manually correct the output, or adjust the prompt. This hybrid approach balances automation with accountability, ensuring high-stakes evaluations do not rely solely on autonomous correction.