In [2]:
from langgraph.graph import START, END, StateGraph
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from typing import Annotated, TypedDict, List
import os
import json

print(" All imports successful")

# Load environment variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found!")

# Initialize LLM
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7, api_key=openai_api_key)
print(f" LLM initialized: {llm.model_name}")

 All imports successful
 LLM initialized: gpt-4o-mini


In [3]:
# Pydantic models for structured quality scoring
class QualityScore(BaseModel):
    """Quality assessment for a draft with three criteria."""
    clarity: int = Field(
        ge=1, le=5,
        description="How clear and easy to understand is the response? 1=confusing, 5=crystal clear"
    )
    completeness: int = Field(
        ge=1, le=5,
        description="Does it cover all aspects of the request? 1=incomplete, 5=comprehensive"
    )
    accuracy: int = Field(
        ge=1, le=5,
        description="Is the content factually correct? 1=incorrect, 5=highly accurate"
    )
    feedback: str = Field(
        description="Specific feedback on what needs improvement"
    )
    
    @property
    def average_score(self) -> float:
        """Calculate average score across all criteria."""
        return (self.clarity + self.completeness + self.accuracy) / 3
    
    @property
    def all_above_threshold(self) -> bool:
        """Check if all scores are >= 4."""
        return all(score >= 4 for score in [self.clarity, self.completeness, self.accuracy])
    
    def __str__(self) -> str:
        """Pretty print the scores."""
        avg = self.average_score
        threshold_status = " PASS" if self.all_above_threshold else " NEEDS REFINEMENT"
        return f"""Quality Scores: {threshold_status}
  Clarity:      {self.clarity}/5
  Completeness: {self.completeness}/5
  Accuracy:     {self.accuracy}/5
  Average:      {avg:.2f}/5
  Feedback:     {self.feedback}"""

# Define state for the graph
class ReflectionState(TypedDict):
    """State for the adaptive reflection process."""
    topic: str
    draft: str
    iteration: int
    scores_history: List[QualityScore]
    refinement_needed: bool

print(" Pydantic models created")
print(f"   - QualityScore: Structured scoring model")
print(f"   - ReflectionState: Graph state management")

 Pydantic models created
   - QualityScore: Structured scoring model
   - ReflectionState: Graph state management


In [4]:
# Node 1: Initial Draft Generation
def generate_draft(state: ReflectionState) -> ReflectionState:
    """Generate initial draft from topic."""
    if state["iteration"] == 1:
        prompt = f"""You are a professional writer. Write a high-quality response about the following topic:

Topic: {state['topic']}

Write a clear, comprehensive response (150-250 words) that thoroughly addresses the topic."""
    else:
        # On iterations > 1, refine based on feedback
        feedback = state["scores_history"][-1].feedback
        prompt = f"""Improve the following draft based on this feedback:

Feedback: {feedback}

Original draft:
{state['draft']}

Write an improved version that addresses the feedback while maintaining the original structure. Keep it 150-250 words."""
    
    response = llm.invoke([HumanMessage(content=prompt)])
    state["draft"] = response.content
    
    return state

# Node 2: Quality Scoring (Critic)
def score_draft(state: ReflectionState) -> ReflectionState:
    """Critic evaluates draft and provides structured scores."""
    prompt = f"""You are an expert quality critic. Evaluate this draft on three criteria:
1. Clarity (1-5): How clear and well-written is it?
2. Completeness (1-5): Does it comprehensively cover the topic?
3. Accuracy (1-5): Is the information factually correct?

Draft to evaluate:
{state['draft']}

Respond with JSON containing only these fields:
{{
  "clarity": <int 1-5>,
  "completeness": <int 1-5>,
  "accuracy": <int 1-5>,
  "feedback": "<string with specific improvement suggestions>"
}}"""
    
    response = llm.invoke([HumanMessage(content=prompt)])
    
    # Parse JSON response
    try:
        score_dict = json.loads(response.content)
        score = QualityScore(**score_dict)
    except:
        # Fallback if parsing fails
        score = QualityScore(
            clarity=3,
            completeness=3,
            accuracy=3,
            feedback="Unable to parse response"
        )
    
    state["scores_history"].append(score)
    state["refinement_needed"] = not score.all_above_threshold
    
    return state

# Node 3: Refinement Decision
def should_refine(state: ReflectionState) -> str:
    """Decide whether to refine or end."""
    max_iterations = 3
    
    if state["refinement_needed"] and state["iteration"] < max_iterations:
        return "refine"
    else:
        return "end"

print(" Graph nodes defined")
print("   - generate_draft: Creates or refines content")
print("   - score_draft: Critic evaluation with scoring")
print("   - should_refine: Decision logic")

 Graph nodes defined
   - generate_draft: Creates or refines content
   - score_draft: Critic evaluation with scoring
   - should_refine: Decision logic


In [5]:
def update_iteration(state: ReflectionState) -> ReflectionState:
    """Increment iteration counter."""
    state["iteration"] += 1
    return state

# Build the graph
builder = StateGraph(ReflectionState)

# Add nodes
builder.add_node("generate", generate_draft)
builder.add_node("score", score_draft)
builder.add_node("update", update_iteration)

# Define edges
builder.add_edge(START, "generate")  # Start with draft generation
builder.add_edge("generate", "score")  # Always score after generating

# Conditional edge: refine or end
builder.add_conditional_edges(
    "score",
    should_refine,
    {
        "refine": "update",      # If refinement needed, increment and loop
        "end": END               # Otherwise, done
    }
)

# Loop back to generate if refining
builder.add_edge("update", "generate")

# Compile the graph
reflection_agent = builder.compile()

print(" Graph built with conditional routing")
print("   Flow: generate ‚Üí score ‚Üí [conditional] ‚Üí update ‚Üí generate (loop) or END")

 Graph built with conditional routing
   Flow: generate ‚Üí score ‚Üí [conditional] ‚Üí update ‚Üí generate (loop) or END


In [9]:

test_count = 0
while True:
    topic = input("üë§ Enter a topic (or 'exit'): ").strip()
    
    if topic.lower() == "exit":
        print("\n Test session ended")
        break
    
    if not topic:
        print(" Please enter a topic\n")
        continue
    
    test_count += 1
    print(f"\n{'='*80}")
    print(f"TEST #{test_count}: {topic.upper()}")
    print(f"{'='*80}\n")
    
    # Initialize state
    initial_state: ReflectionState = {
        "topic": topic,
        "draft": "",
        "iteration": 1,
        "scores_history": [],
        "refinement_needed": True
    }
    
    # Run the reflection loop
    result = reflection_agent.invoke(initial_state)
    
    # Display results
    print(f"\n{'='*80}")
    print(f" FINAL RESULT (Iteration {result['iteration']})")
    print(f"{'='*80}\n")
    
    print(" Final Draft:")
    print("-" * 80)
    print(result["draft"])
    print("-" * 80)
    
    # Display all iterations' scores
    print(f"\n SCORE PROGRESSION ({len(result['scores_history'])} iterations):\n")
    
    for i, score in enumerate(result["scores_history"], 1):
        print(f"Iteration {i}:")
        print(score)
        print()
    
    # Summary statistics
    if result["scores_history"]:
        final_score = result["scores_history"][-1]
        avg_scores = [
            result["scores_history"][0].average_score,
            result["scores_history"][-1].average_score
        ]
        improvement = avg_scores[-1] - avg_scores[0]
        
        print(f"{'='*80}")
        print(f" SUMMARY:")
        print(f"  Initial average score: {avg_scores[0]:.2f}/5.0")
        print(f"  Final average score:   {avg_scores[-1]:.2f}/5.0")
        print(f"  Improvement: {improvement:+.2f} points")
        print(f"  Total iterations: {len(result['scores_history'])}")
        print(f"  Status: {' PASSED' if final_score.all_above_threshold else '‚ùå NEEDS MORE WORK'}")
        print(f"{'='*80}\n")
    
    print("-" * 80 + "\n")


TEST #1: WHAT IS PYTHON


 FINAL RESULT (Iteration 1)

 Final Draft:
--------------------------------------------------------------------------------
Python is a high-level, interpreted programming language known for its simplicity and readability, making it an excellent choice for both beginners and experienced developers. Created by Guido van Rossum and released in 1991, Python emphasizes code clarity and syntax that allows programmers to express concepts in fewer lines of code compared to languages like C++ or Java.

One of Python's key features is its versatility; it supports multiple programming paradigms, including procedural, object-oriented, and functional programming. This flexibility enables developers to tackle a wide range of applications, from web development and data analysis to artificial intelligence and scientific computing. Python's extensive standard library and a rich ecosystem of third-party packages, such as NumPy for numerical computations and Django for web dev