In [31]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage, HumanMessage

In [32]:
load_dotenv()

True

In [33]:
llm =ChatOpenAI(
    model= "gpt-4o-mini",
    temperature= 0, 
    top_p= 0.75, 
    frequency_penalty= 1.2
)

In [34]:
task_prompt = """
Task: Provide step-by-step instructions for creating a Python program that calculates and outputs the factorial of a number n.

Requirements:
- The program should accept a non-negative integer n as input
- It should calculate the factorial of n (n! = n × (n-1) × (n-2) × ... × 1)
- The factorial of 0 is defined as 1 (0! = 1)
- The program should handle edge cases appropriately
- The output should be the calculated factorial value

Important Information:
- Factorial definition: The product of all positive integers less than or equal to n
- Example: 5! = 5 × 4 × 3 × 2 × 1 = 120
- Base case: 0! = 1 and 1! = 1
- The input should be validated (only non-negative integers are valid)
- Consider both iterative and recursive approaches

Expected Output Format:
Please provide clear, numbered step-by-step instructions that explain:
1. How to structure the program
2. How to handle input
3. How to implement the factorial calculation logic
4. How to handle edge cases and validation
5. How to display the output

The instructions should be detailed enough for a beginner programmer to follow.
"""

In [35]:
#The reflection loop 
def run_reflection_loop():
    max_iteration = 3
    current_code = ""
    message_history = [HumanMessage(content=task_prompt)]
    
    for i in range(max_iteration):
        print(f"Iteration number -  {i+1}")
        
        #Generate 
        if i == 0 :
            print("Inital generation")
            response = llm.invoke(message_history)
            current_code = response.content
        else :
            print("Refining")
            message_history.append(HumanMessage(content="Please refine the code using the critiques provided."))
            response = llm.invoke(message_history)
            current_code =response.content
            print(f"Code generated for iteration number {i+1}")
            message_history.append(response)
        
        #Reflection 
        print("Reflection")
        reflector_prompt = [
            SystemMessage(content = """
                        You are an expert code reviewer and quality assurance specialist. Your role is to critically analyze Python code against the original task requirements.
                            Your responsibilities:
                            1. Verify that the code fully satisfies all stated requirements
                            2. Check for correctness and logical errors
                            3. Identify edge cases that may not be handled properly
                            4. Evaluate code quality, readability, and best practices
                            5. Check for potential bugs, security issues, or performance problems
                            6. Assess whether error handling and input validation are adequate

                            When reviewing code, provide:
                            - A clear assessment of whether the code meets the task requirements
                            - Specific issues found (if any), with line references
                            - Suggestions for improvements or fixes
                            - Praise for well-implemented aspects
                            - A severity rating for each issue (Critical, Important, Minor)

                            Be thorough, objective, and constructive in your feedback. Focus on:
                            - Functional correctness
                            - Edge case handling
                            - Code clarity and maintainability
                            - Adherence to Python best practices
                            - Input validation and error handling

                            Format your response as a structured review with sections for:
                            1. Overall Assessment (Pass/Needs Revision)
                            2. Issues Found (if any)
                            3. Strengths
                            4. Recommendations

                            STOPPING CONDITION:
                            If the code is perfect and meets all requirements with no issues, start your response with exactly: CODE_IS_PERFECT
                            This signals that no further iterations are needed.
                        """),
            HumanMessage(content= f"Original task {task_prompt}\n Code to reivew : {current_code}")
        ]
        
        critique_response = llm.invoke(reflector_prompt)
        
        if "CODE_IS_PERFECT" in critique_response.content :
            print("No further changes needed. Code is perfect.")
            break
        
        print(f"Critique found, adding it to history - {critique_response.content} ")
        message_history.append(HumanMessage(content=  f"Critique of the previous code - {critique_response.content} "))
        
    print("Final result - ")
    print(current_code)
        

In [36]:
run_reflection_loop()

Iteration number -  1
Inital generation
Reflection
Critique found, adding it to history - ### Overall Assessment
Needs Revision

### Issues Found
1. **Indentation Errors**: 
   - The indentation in the final code example is inconsistent, particularly in the `main()` function and error handling sections (lines 38-50). This will lead to a `SyntaxError` when executed.
   - Severity: Critical

2. **Incorrect Output Message**:
   - The output message states "Please enter only positive integers." However, it should state "Please enter a non-negative integer." to align with the task requirements (line 30).
   - Severity: Minor

3. **Uncommented Recursive Function**:
   - The recursive function is commented out without any explanation on how to switch between iterative and recursive methods effectively. It would be better if there was guidance on how users can choose which method they want to use.
   - Severity: Minor

4. **Redundant Code for Edge Case Handling**:
   - In both factorial functi