In [2]:
from typing import TypedDict, Annotated, Optional, List, Dict, Any
import operator
from pathlib import Path
import os
import logging
import json
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
from langgraph.graph import StateGraph, END
from datetime import datetime
from langchain_core.language_models.chat_models import BaseChatModel
from langgraph.checkpoint.sqlite import SqliteSaver
import transformers
import torch
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_core.messages import AIMessage
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.outputs import ChatGenerationChunk, ChatResult

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
)
logger = logging.getLogger(__name__)

def create_llama_pipeline(model_id: str = "meta-llama/Llama-3.1-8B"):
    """Create a HuggingFace pipeline for Llama 3.1 with 8-bit quantization"""
    try:
        logger.info(f"Loading model {model_id} with 8-bit quantization")
        
        # Load model with 8-bit quantization
        model = transformers.AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map="auto",
            load_in_8bit=True,  # Enable 8-bit quantization
            torch_dtype=torch.float16,
        )
        
        tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
        
        # Create the pipeline
        pipeline = transformers.pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            device_map="auto",
            trust_remote_code=True
        )
        
        # Log memory usage
        if torch.cuda.is_available():
            memory_allocated = torch.cuda.memory_allocated(0) / 1024**2
            memory_reserved = torch.cuda.memory_reserved(0) / 1024**2
            logger.info(f"GPU Memory Allocated: {memory_allocated:.2f}MB")
            logger.info(f"GPU Memory Reserved: {memory_reserved:.2f}MB")
        
        # Wrap pipeline in LangChain's HuggingFacePipeline
        llm = HuggingFacePipeline(
            pipeline=pipeline,
            model_kwargs={
                "temperature": 0.7,
                "max_length": 4096,
                "top_p": 0.95,
                "repetition_penalty": 1.1
            }
        )
        
        logger.info("Model loaded successfully with 8-bit quantization")
        return llm
        
    except Exception as e:
        logger.error(f"Error loading model: {str(e)}", exc_info=True)
        raise

class TransformersChatModel(BaseChatModel):
    """Wrapper class to make HuggingFacePipeline work with chat interface"""
    
    def __init__(self, llm_model):
        """Initialize with a HuggingFacePipeline."""
        super().__init__()
        self._llm_model = llm_model

    @property
    def _llm_type(self) -> str:
        """Return type of llm."""
        return "transformers_chat_model"

    def _generate(self, messages, stop=None, run_manager=None, **kwargs):
        """Generate response from the model"""
        try:
            # Convert chat messages to prompt string
            prompt = ""
            for message in messages:
                if isinstance(message, SystemMessage):
                    prompt += f"System: {message.content}\n\n"
                elif isinstance(message, HumanMessage):
                    prompt += f"Human: {message.content}\n\n"
                else:
                    prompt += f"{message.content}\n\n"
            
            # Get response from model
            response_text = self._llm_model.invoke(prompt, stop=stop)
            
            # Create generation chunks
            generation = ChatGenerationChunk(
                message=AIMessage(content=response_text),
                generation_info={"finish_reason": "stop"}
            )
            
            # Return a ChatResult
            return ChatResult(generations=[generation])
            
        except Exception as e:
            logger.error(f"Error in model generation: {str(e)}", exc_info=True)
            # Return empty response in case of error
            return ChatResult(generations=[
                ChatGenerationChunk(
                    message=AIMessage(content="Error generating response"),
                    generation_info={"finish_reason": "error"}
                )
            ])

class CodeGenerationState(TypedDict):
    """State management for code generation process"""
    messages: Annotated[list[AnyMessage], operator.add]
    current_code: Optional[str]
    validation_status: Optional[bool]
    error_messages: Optional[list[str]]
    attempt_number: int

class CodeGenerator:
    """Main class for generating, validating, and correcting code"""
    
    def __init__(self, model: TransformersChatModel, checkpointer, base_output_dir: str):
        self.model = model
        self.base_output_dir = base_output_dir
        self.current_attempt = 0
        self.max_attempts = 15
        self.max_tokens = 4000
        
        # Create timestamped output directory
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.output_dir = os.path.join(base_output_dir, f"generation_{timestamp}")
        os.makedirs(self.output_dir, exist_ok=True)
        
        # Set up logging for this generation
        self.setup_logging()
        
        # Initialize graph
        self.init_graph(checkpointer)

    def setup_logging(self):
        """Set up logging for this generation instance"""
        self.log_file = os.path.join(self.output_dir, "generation.log")
        file_handler = logging.FileHandler(self.log_file)
        file_handler.setFormatter(logging.Formatter(
            '%(asctime)s - %(levelname)s - %(message)s'
        ))
        logger.addHandler(file_handler)
        
    def init_graph(self, checkpointer):
        """Initialize the state graph"""
        graph = StateGraph(CodeGenerationState)
        
        # Add nodes
        graph.add_node("developer", self.developer)
        graph.add_node("validator", self.validator)
        graph.add_node("correction", self.correction)
        
        # Add edges
        graph.add_edge("developer", "validator")
        graph.add_conditional_edges(
            "validator",
            lambda state: self.validator(state)["validation_status"],
            {
                True: END,
                False: "correction"
            }
        )
        graph.add_edge("correction", "validator")
        
        # Set entry point
        graph.set_entry_point("developer")
        self.graph = graph.compile(checkpointer=checkpointer)

    def save_code_attempt(self, code: str, attempt_number: int, status: str = "initial") -> str:
        """Save code attempt and return directory path"""
        attempt_dir = os.path.join(self.output_dir, f"attempt_{attempt_number}_{status}")
        os.makedirs(attempt_dir, exist_ok=True)
        
        # Save code
        code_file = os.path.join(attempt_dir, "code.py")
        with open(code_file, 'w') as f:
            f.write(code)
        
        # Save summary
        summary_file = os.path.join(attempt_dir, "summary.txt")
        with open(summary_file, 'w') as f:
            f.write(f"Attempt: {attempt_number}\n")
            f.write(f"Status: {status}\n")
            f.write(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        
        logger.info(f"Saved code attempt {attempt_number} to {code_file}")
        return attempt_dir

    def save_validation_results(self, validation_result: Dict[str, Any], attempt_number: int) -> None:
        """Save validation results"""
        attempt_dir = os.path.join(self.output_dir, f"attempt_{attempt_number}_validation")
        os.makedirs(attempt_dir, exist_ok=True)
        
        # Save JSON results
        validation_file = os.path.join(attempt_dir, "validation.json")
        with open(validation_file, 'w') as f:
            json.dump(validation_result, f, indent=2)
        
        # Save human-readable summary
        summary_file = os.path.join(attempt_dir, "validation_summary.txt")
        with open(summary_file, 'w') as f:
            f.write(f"Validation Results for Attempt {attempt_number}\n")
            f.write("=" * 50 + "\n\n")
            f.write(f"Valid: {validation_result['valid']}\n\n")
            if validation_result.get('errors'):
                f.write("Errors Found:\n")
                for error in validation_result['errors']:
                    f.write(f"- [{error['severity']}] {error['description']}\n")
        
        logger.info(f"Saved validation results to {validation_file}")

    def developer(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Generate initial code or process corrections"""
        try:
            messages = state['messages']
            requirements = json.loads(messages[0].content)
            
            developer_prompt = f"""
Role: Python Developer for Image Processing System
Task: Generate complete Python code for:

Title: {requirements.get('Title')}
Description: {requirements.get('Description')}

Requirements:
{chr(10).join(f'- {criterion}' for criterion in requirements.get('AcceptanceCriteria', []))}

Include:
1. All necessary imports (numpy, cv2, etc.)
2. Complete class implementation with:
   - Configuration management (dataclass)
   - Image processing methods
   - Error handling
   - Type hints
   - Logging
3. Example usage

Return ONLY the complete Python code.
"""
            
            messages = [SystemMessage(content=developer_prompt)]
            
            # Generate code
            try:
                response = self.model._generate(messages)
                if response and response.generations:
                    generated_code = response.generations[0].message.content
                else:
                    raise ValueError("No response generated")
                
                # Save code
                self.current_attempt += 1
                self.save_code_attempt(generated_code, self.current_attempt)
                
                return {
                    'messages': messages,
                    'current_code': generated_code,
                    'validation_status': None,
                    'error_messages': [],
                    'attempt_number': self.current_attempt
                }
                
            except Exception as e:
                logger.error(f"Error generating code: {str(e)}", exc_info=True)
                return {
                    'messages': messages,
                    'error_messages': [str(e)],
                    'attempt_number': self.current_attempt
                }
                
        except Exception as e:
            logger.error(f"Error in developer node: {str(e)}", exc_info=True)
            return {
                'messages': messages if 'messages' in locals() else [],
                'error_messages': [str(e)],
                'attempt_number': self.current_attempt
            }

    def validator(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Validate generated code"""
        try:
            current_code = state.get("current_code", "")
            
            validator_prompt = """
Role: Code Reviewer
Task: Validate the provided Python code.

Check:
1. All imports and classes present
2. Proper error handling
3. Type hints and docstrings
4. Functionality implementation
5. Code quality

Return validation result as JSON:
{
    "valid": boolean,
    "errors": [
        {
            "description": "Issue description",
            "severity": "high|medium|low"
        }
    ]
}
"""
            
            messages = [
                SystemMessage(content=validator_prompt),
                HumanMessage(content=current_code)
            ]
            
            try:
                # Get validation feedback
                response = self.model._generate(messages)
                if response and response.generations:
                    validation_text = response.generations[0].message.content
                    validation_result = json.loads(validation_text)
                else:
                    raise ValueError("No validation response generated")
                
                # Save results
                self.save_validation_results(validation_result, state['attempt_number'])
                self.save_code_attempt(
                    current_code, 
                    state['attempt_number'],
                    f"validated_{'pass' if validation_result['valid'] else 'fail'}"
                )
                
                return {
                    'messages': messages,
                    'current_code': current_code,
                    'validation_status': validation_result['valid'],
                    'error_messages': [e['description'] for e in validation_result.get('errors', [])],
                    'attempt_number': state['attempt_number']
                }
                
            except Exception as e:
                logger.error(f"Error in validation: {str(e)}", exc_info=True)
                return {
                    'messages': messages if 'messages' in locals() else [],
                    'validation_status': False,
                    'error_messages': [str(e)],
                    'attempt_number': state['attempt_number']
                }
                
        except Exception as e:
            logger.error(f"Error in validator node: {str(e)}", exc_info=True)
            return {
                'messages': [],
                'validation_status': False,
                'error_messages': [str(e)],
                'attempt_number': state['attempt_number']
            }

    def correction(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Correct code based on validation feedback"""
        try:
            if self.current_attempt >= self.max_attempts:
                logger.error("Maximum correction attempts reached")
                return {
                    'messages': [],
                    'validation_status': False,
                    'error_messages': ["Maximum correction attempts reached"],
                    'attempt_number': state['attempt_number']
                }
            
            error_messages = state.get('error_messages', [])
            current_code = state.get('current_code', '')
            
            correction_prompt = f"""
Role: Python Developer
Task: Fix the code. Issues found:

{chr(10).join(f'- {error}' for error in error_messages)}

Important:
1. Keep existing structure
2. Fix all issues
3. Ensure code is complete
4. Add proper documentation

Return the complete corrected code.
"""
            
            messages = [
                SystemMessage(content=correction_prompt),
                HumanMessage(content=current_code)
            ]
            
            try:
                # Generate corrected code
                response = self.model._generate(messages)
                if response and response.generations:
                    corrected_code = response.generations[0].message.content
                else:
                    raise ValueError("No correction response generated")
                
                # Save correction attempt
                self.current_attempt += 1
                self.save_code_attempt(corrected_code, self.current_attempt, "correction")
                
                return {
                    'messages': messages,
                    'current_code': corrected_code,
                    'validation_status': None,
                    'error_messages': [],
                    'attempt_number': self.current_attempt
                }
                
            except Exception as e:
                logger.error(f"Error in correction: {str(e)}", exc_info=True)
                return {
                    'messages': messages if 'messages' in locals() else [],
                    'error_messages': [str(e)],
                    'attempt_number': state['attempt_number']
                }
                
        except Exception as e:
            logger.error(f"Error in correction node: {str(e)}", exc_info=True)
            return {
                'messages': [],
                'error_messages': [str(e)],
                'attempt_number': state['attempt_number']
            }

def generate_code(requirements: Dict[str, Any], output_dir: str = "code_generation") -> Dict[str, Any]:
    """Main function to generate code based on requirements"""
    
    try:
        # Create output directory
        os.makedirs(output_dir, exist_ok=True)
        
        # Check CUDA availability and print GPU info
        if torch.cuda.is_available():
            gpu_name = torch.cuda.get_device_name(0)
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
            logger.info(f"Using GPU: {gpu_name} with {gpu_memory:.2f}GB VRAM")
        else:
            logger.warning("No GPU detected - using CPU only")
        
        # Initialize the Llama pipeline with 8-bit quantization
        logger.info("Initializing Llama pipeline with 8-bit quantization...")
        llm = create_llama_pipeline()
        model = TransformersChatModel(llm_model=llm)
        
        # Create a results summary file
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        results_dir = os.path.join(output_dir, f"run_{timestamp}")
        os.makedirs(results_dir, exist_ok=True)
        
        summary_file = os.path.join(results_dir, "generation_summary.txt")
        
        with open(summary_file, 'w') as f:
            f.write("Code Generation Summary\n")
            f.write("=" * 50 + "\n\n")
            f.write("Requirements:\n")
            f.write(json.dumps(requirements, indent=2) + "\n\n")
        
        # Initialize code generator
        with SqliteSaver.from_conn_string(":memory:") as checkpointer:
            logger.info("Initializing code generator...")
            generator = CodeGenerator(model, checkpointer, results_dir)
            
            # Create initial message with requirements
            user_message = HumanMessage(content=json.dumps(requirements))
            
            # Generate and validate code
            thread = {"configurable": {"thread_id": "1"}}
            final_result = None
            
            logger.info("Starting code generation process...")
            for event in generator.graph.stream({"messages": [user_message]}, thread):
                final_result = event
                
                # Update summary file
                with open(summary_file, 'a') as f:
                    f.write(f"\nAttempt {event['attempt_number']}:\n")
                    if event.get('error_messages'):
                        f.write("Issues found:\n")
                        for error in event['error_messages']:
                            f.write(f"- {error}\n")
                    
                    if event.get('validation_status') == True:
                        f.write("\nSuccessfully generated valid code!\n")
                
                if event.get('validation_status') == True:
                    logger.info(f"Successfully generated valid code on attempt {event['attempt_number']}")
                    
                    # Save final code to root of results directory
                    final_code_path = os.path.join(results_dir, "final_code.py")
                    with open(final_code_path, 'w') as f:
                        f.write(event['current_code'])
                    
                    logger.info(f"Final code saved to: {final_code_path}")
                    break
            
            return {
                'success': final_result.get('validation_status', False),
                'attempts': final_result.get('attempt_number', 0),
                'output_directory': results_dir,
                'final_code_path': final_code_path if final_result.get('validation_status') else None
            }
            
    except Exception as e:
        logger.error(f"Error in code generation: {str(e)}", exc_info=True)
        return {
            'success': False,
            'error': str(e),
            'output_directory': results_dir if 'results_dir' in locals() else None
        }

if __name__ == "__main__":
    # Example requirements
    requirements = {
        "Title": "Image Preprocessing as per Model",
        "Description": "As a user, I want the system to perform image preprocessing (standardizing spatial and bit resolution through resizing and normalization) so that input images are ready for AI model training or inferencing (AI prediction).",
        "AcceptanceCriteria": [
            "Pre-processing should successfully be implemented on images with the following bit representations: 1-bit, 8-bit,16-bit, 24-bit, 32-bit.",
            "The pre-processing system shall be able to resize the input images into the resolution required by the AI model.",
            "The pre-processing system shall be able to normalize the input image data into appropriate pixel datatype required by the AI model.",
            "When pre-processing is successful for the training dataset, the data shall be sent further for batch creation.",
            "When pre-processing is successful for inferencing (prediction) dataset, the data shall be sent for model inferencing."
        ]
    }
    
    # Generate code
    result = generate_code(requirements)
    
    # Print final results
    if result['success']:
        logger.info("Code generation successful!")
        logger.info(f"Output directory: {result['output_directory']}")
        logger.info(f"Final code: {result['final_code_path']}")
    else:
        logger.error("Failed to generate valid code")
        if 'error' in result:
            logger.error(f"Error: {result['error']}")

2025-02-22 13:44:23,113 - INFO - Using GPU: NVIDIA GeForce RTX 4090 with 23.61GB VRAM
2025-02-22 13:44:23,113 - INFO - Initializing Llama pipeline with 8-bit quantization...
2025-02-22 13:44:23,113 - INFO - Loading model meta-llama/Llama-3.1-8B with 8-bit quantization


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
2025-02-22 13:44:23,734 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Loading checkpoint shards: 100%|██████████| 4/4 [02:34<00:00, 38.67s/it]
Device set to use cuda:0
2025-02-22 13:46:59,650 - INFO - GPU Memory Allocated: 8665.77MB
2025-02-22 13:46:59,651 - INFO - GPU Memory Reserved: 8718.00MB
  llm = HuggingFacePipeline(
2025-02-22 13:46:59,651 - INFO - Model loaded successfully with 8-bit quantization
2025-02-22 13:46:59,651 - INFO - Initializing code generator...
2025-02-22 13:46:59,652 - INFO - Starting code generation process...
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
2025-02-22 13:47:00,540 - ERROR - Error in

In [None]:
from typing import TypedDict, Annotated, Optional, List, Dict, Any
import operator
from pathlib import Path
import os
import logging
import json
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
from langgraph.graph import StateGraph, END
from datetime import datetime
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langgraph.checkpoint.sqlite import SqliteSaver

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class CodeGenerationState(TypedDict):
    """State management for code generation process"""
    messages: Annotated[list[AnyMessage], operator.add]
    current_code: Optional[str]
    validation_status: Optional[bool]
    error_messages: Optional[list[str]]
    attempt_number: int

class CodeGenerator:
    """Main class for generating, validating, and correcting code"""
    
    def __init__(self, model, checkpointer, base_output_dir: str):
        self.model = model
        self.base_output_dir = base_output_dir
        self.current_attempt = 0
        self.max_attempts = 15
        self.max_tokens = 4000  # Set max tokens for safety
        
        # Create base output directory with timestamp
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.base_output_dir = os.path.join(base_output_dir, f"generation_{timestamp}")
        os.makedirs(self.base_output_dir, exist_ok=True)
        
        # Initialize graph
        graph = StateGraph(CodeGenerationState)
        
        # Add nodes
        graph.add_node("developer", self.developer)
        graph.add_node("validator", self.validator)
        graph.add_node("correction", self.correction)
        
        # Add edges
        graph.add_edge("developer", "validator")
        graph.add_conditional_edges(
            "validator",
            lambda state: self.validator(state)["validation_status"],
            {
                True: END,
                False: "correction"
            }
        )
        graph.add_edge("correction", "validator")
        
        # Set entry point
        graph.set_entry_point("developer")
        self.graph = graph.compile(checkpointer=checkpointer)

    def save_code_attempt(self, code: str, attempt_number: int, status: str = "initial") -> str:
        """Save code attempt to file and return the directory path"""
        attempt_dir = os.path.join(self.base_output_dir, f"attempt_{attempt_number}_{status}")
        os.makedirs(attempt_dir, exist_ok=True)
        
        code_file = os.path.join(attempt_dir, "code.py")
        with open(code_file, 'w') as f:
            f.write(code)
        
        logger.info(f"Saved code attempt {attempt_number} to {code_file}")
        return attempt_dir

    def save_validation_results(self, validation_result: Dict[str, Any], attempt_number: int) -> None:
        """Save validation results to file"""
        validation_file = os.path.join(
            self.base_output_dir, 
            f"attempt_{attempt_number}_validation.json"
        )
        with open(validation_file, 'w') as f:
            json.dump(validation_result, f, indent=2)
        
        logger.info(f"Saved validation results to {validation_file}")

    def developer(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Generate initial code or process corrections"""
        try:
            messages = state['messages']
            requirements = json.loads(messages[0].content)
            
            developer_prompt = f"""
Role: Python Developer for Image Processing System
Task: Generate complete Python code for:

Title: {requirements.get('Title')}
Description: {requirements.get('Description')}

Requirements:
{chr(10).join(f'- {criterion}' for criterion in requirements.get('AcceptanceCriteria', []))}

Include:
1. All necessary imports (numpy, cv2, etc.)
2. Complete class implementation with:
   - Configuration management (dataclass)
   - Image processing methods
   - Error handling
   - Type hints
   - Logging
3. Example usage

Return ONLY the complete Python code.
"""
            
            messages = [SystemMessage(content=developer_prompt)]
            
            # Generate code
            response = self.model.invoke(messages)
            generated_code = response.content
            
            # Save initial code
            self.current_attempt += 1
            self.save_code_attempt(generated_code, self.current_attempt)
            
            return {
                'messages': messages,
                'current_code': generated_code,
                'validation_status': None,
                'error_messages': [],
                'attempt_number': self.current_attempt
            }
            
        except Exception as e:
            logger.error(f"Error in developer node: {str(e)}")
            return {
                'messages': messages,
                'error_messages': [str(e)],
                'attempt_number': self.current_attempt
            }

    def validator(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Validate generated code"""
        try:
            current_code = state.get("current_code", "")
            
            validator_prompt = """
Role: Code Reviewer
Task: Validate the provided Python code.

Check:
1. All imports and classes present
2. Proper error handling
3. Type hints and docstrings
4. Functionality implementation
5. Code quality

Return validation result as JSON:
{
    "valid": boolean,
    "errors": [
        {
            "description": "Issue description",
            "severity": "high|medium|low"
        }
    ]
}
"""
            
            messages = [
                SystemMessage(content=validator_prompt),
                HumanMessage(content=current_code)
            ]
            
            # Get validation feedback
            response = self.model.invoke(messages)
            validation_result = json.loads(response.content)
            
            # Save validation results
            self.save_validation_results(validation_result, state['attempt_number'])
            self.save_code_attempt(
                current_code, 
                state['attempt_number'],
                f"validated_{'pass' if validation_result['valid'] else 'fail'}"
            )
            
            return {
                'messages': messages,
                'current_code': current_code,
                'validation_status': validation_result['valid'],
                'error_messages': [e['description'] for e in validation_result.get('errors', [])],
                'attempt_number': state['attempt_number']
            }
            
        except Exception as e:
            logger.error(f"Error in validator node: {str(e)}")
            return {
                'messages': [],
                'validation_status': False,
                'error_messages': [str(e)],
                'attempt_number': state['attempt_number']
            }

    def correction(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Correct code based on validation feedback"""
        try:
            if self.current_attempt >= self.max_attempts:
                logger.error("Maximum correction attempts reached")
                return {
                    'messages': [],
                    'validation_status': False,
                    'error_messages': ["Maximum correction attempts reached"],
                    'attempt_number': state['attempt_number']
                }
            
            error_messages = state.get('error_messages', [])
            current_code = state.get('current_code', '')
            
            correction_prompt = f"""
Role: Python Developer
Task: Fix the code. Issues found:

{chr(10).join(f'- {error}' for error in error_messages)}

Important:
1. Keep existing structure
2. Fix all issues
3. Ensure code is complete
4. Add proper documentation

Return the complete corrected code.
"""
            
            messages = [
                SystemMessage(content=correction_prompt),
                HumanMessage(content=current_code)
            ]
            
            # Generate corrected code
            response = self.model.invoke(messages)
            corrected_code = response.content
            
            # Save correction attempt
            self.current_attempt += 1
            self.save_code_attempt(corrected_code, self.current_attempt, "correction")
            
            return {
                'messages': messages,
                'current_code': corrected_code,
                'validation_status': None,
                'error_messages': [],
                'attempt_number': self.current_attempt
            }
            
        except Exception as e:
            logger.error(f"Error in correction node: {str(e)}")
            return {
                'messages': [],
                'error_messages': [str(e)],
                'attempt_number': state['attempt_number']
            }

def generate_code(requirements: Dict[str, Any], output_dir: str = "code_generation"):
    """Main function to generate code based on requirements"""
    
    # Initialize the model
    llm = HuggingFaceEndpoint(
        repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
        task="text-generation",
        max_new_tokens=4000,
        do_sample=False,
        temperature=0.7
    )
    
    model = ChatHuggingFace(llm=llm, verbose=True)
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Initialize code generator
    with SqliteSaver.from_conn_string(":memory:") as checkpointer:
        generator = CodeGenerator(model, checkpointer, output_dir)
        
        # Create initial message with requirements
        user_message = HumanMessage(content=json.dumps(requirements))
        
        # Generate and validate code
        thread = {"configurable": {"thread_id": "1"}}
        final_result = None
        
        for event in generator.graph.stream({"messages": [user_message]}, thread):
            final_result = event
            
            # Log progress
            if event.get('error_messages'):
                logger.info(f"Attempt {event['attempt_number']} - Issues found:")
                for error in event['error_messages']:
                    logger.info(f"- {error}")
            
            if event.get('validation_status') == True:
                logger.info(f"Successfully generated valid code on attempt {event['attempt_number']}")
                break
        
        return final_result

if __name__ == "__main__":
    # Example requirements
    requirements = {
        "Title": "Image Preprocessing as per Model",
        "Description": "As a user, I want the system to perform image preprocessing (standardizing spatial and bit resolution through resizing and normalization) so that input images are ready for AI model training or inferencing (AI prediction).",
        "AcceptanceCriteria": [
            "Pre-processing should successfully be implemented on images with the following bit representations: 1-bit, 8-bit,16-bit, 24-bit, 32-bit.",
            "The pre-processing system shall be able to resize the input images into the resolution required by the AI model.",
            "The pre-processing system shall be able to normalize the input image data into appropriate pixel datatype required by the AI model.",
            "When pre-processing is successful for the training dataset, the data shall be sent further for batch creation.",
            "When pre-processing is successful for inferencing (prediction) dataset, the data shall be sent for model inferencing."
        ]
    }
    
    # Generate code
    result = generate_code(requirements)
    
    if result and result.get('validation_status'):
        logger.info("Code generation successful!")
        logger.info(f"Check the generated code in the output directory")
    else:
        logger.error("Failed to generate valid code after maximum attempts")

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
from typing import TypedDict, Annotated, Optional, List, Dict, Any
import operator
from pathlib import Path
import os
import logging
import json
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
from langgraph.graph import StateGraph, END
from datetime import datetime
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langgraph.checkpoint.sqlite import SqliteSaver

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class CodeGenerationState(TypedDict):
    """State management for code generation process"""
    messages: Annotated[list[AnyMessage], operator.add]
    current_code: Optional[str]
    validation_status: Optional[bool]
    error_messages: Optional[list[str]]
    attempt_number: int

class CodeGenerator:
    """Main class for generating, validating, and correcting code"""
    
    def __init__(self, model, checkpointer, base_output_dir: str):
        self.model = model
        self.base_output_dir = base_output_dir
        self.current_attempt = 0
        self.max_attempts = 15
        self.max_tokens = 4000  # Set max tokens for safety
        
        # Create base output directory with timestamp
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.base_output_dir = os.path.join(base_output_dir, f"generation_{timestamp}")
        os.makedirs(self.base_output_dir, exist_ok=True)
        
        # Initialize graph
        graph = StateGraph(CodeGenerationState)
        
        # Add nodes
        graph.add_node("developer", self.developer)
        graph.add_node("validator", self.validator)
        graph.add_node("correction", self.correction)
        
        # Add edges
        graph.add_edge("developer", "validator")
        graph.add_conditional_edges(
            "validator",
            lambda state: self.validator(state)["validation_status"],
            {
                True: END,
                False: "correction"
            }
        )
        graph.add_edge("correction", "validator")
        
        # Set entry point
        graph.set_entry_point("developer")
        self.graph = graph.compile(checkpointer=checkpointer)

    def save_code_attempt(self, code: str, attempt_number: int, status: str = "initial") -> str:
        """Save code attempt to file and return the directory path"""
        attempt_dir = os.path.join(self.base_output_dir, f"attempt_{attempt_number}_{status}")
        os.makedirs(attempt_dir, exist_ok=True)
        
        code_file = os.path.join(attempt_dir, "code.py")
        with open(code_file, 'w') as f:
            f.write(code)
        
        logger.info(f"Saved code attempt {attempt_number} to {code_file}")
        return attempt_dir

    def save_validation_results(self, validation_result: Dict[str, Any], attempt_number: int) -> None:
        """Save validation results to file"""
        validation_file = os.path.join(
            self.base_output_dir, 
            f"attempt_{attempt_number}_validation.json"
        )
        with open(validation_file, 'w') as f:
            json.dump(validation_result, f, indent=2)
        
        logger.info(f"Saved validation results to {validation_file}")

    def developer(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Generate initial code or process corrections"""
        try:
            messages = state['messages']
            requirements = json.loads(messages[0].content)
            
            developer_prompt = f"""
Role: Python Developer for Image Processing System
Task: Generate complete Python code for:

Title: {requirements.get('Title')}
Description: {requirements.get('Description')}

Requirements:
{chr(10).join(f'- {criterion}' for criterion in requirements.get('AcceptanceCriteria', []))}

Include:
1. All necessary imports (numpy, cv2, etc.)
2. Complete class implementation with:
   - Configuration management (dataclass)
   - Image processing methods
   - Error handling
   - Type hints
   - Logging
3. Example usage

Return ONLY the complete Python code.
"""
            
            messages = [SystemMessage(content=developer_prompt)]
            
            # Generate code
            response = self.model.invoke(messages)
            generated_code = response.content
            
            # Save initial code
            self.current_attempt += 1
            self.save_code_attempt(generated_code, self.current_attempt)
            
            return {
                'messages': messages,
                'current_code': generated_code,
                'validation_status': None,
                'error_messages': [],
                'attempt_number': self.current_attempt
            }
            
        except Exception as e:
            logger.error(f"Error in developer node: {str(e)}")
            return {
                'messages': messages,
                'error_messages': [str(e)],
                'attempt_number': self.current_attempt
            }

    def validator(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Validate generated code"""
        try:
            current_code = state.get("current_code", "")
            
            validator_prompt = """
Role: Code Reviewer
Task: Validate the provided Python code.

Check:
1. All imports and classes present
2. Proper error handling
3. Type hints and docstrings
4. Functionality implementation
5. Code quality

Return validation result as JSON:
{
    "valid": boolean,
    "errors": [
        {
            "description": "Issue description",
            "severity": "high|medium|low"
        }
    ]
}
"""
            
            messages = [
                SystemMessage(content=validator_prompt),
                HumanMessage(content=current_code)
            ]
            
            # Get validation feedback
            response = self.model.invoke(messages)
            validation_result = json.loads(response.content)
            
            # Save validation results
            self.save_validation_results(validation_result, state['attempt_number'])
            self.save_code_attempt(
                current_code, 
                state['attempt_number'],
                f"validated_{'pass' if validation_result['valid'] else 'fail'}"
            )
            
            return {
                'messages': messages,
                'current_code': current_code,
                'validation_status': validation_result['valid'],
                'error_messages': [e['description'] for e in validation_result.get('errors', [])],
                'attempt_number': state['attempt_number']
            }
            
        except Exception as e:
            logger.error(f"Error in validator node: {str(e)}")
            return {
                'messages': [],
                'validation_status': False,
                'error_messages': [str(e)],
                'attempt_number': state['attempt_number']
            }

    def correction(self, state: CodeGenerationState) -> Dict[str, Any]:
        """Correct code based on validation feedback"""
        try:
            if self.current_attempt >= self.max_attempts:
                logger.error("Maximum correction attempts reached")
                return {
                    'messages': [],
                    'validation_status': False,
                    'error_messages': ["Maximum correction attempts reached"],
                    'attempt_number': state['attempt_number']
                }
            
            error_messages = state.get('error_messages', [])
            current_code = state.get('current_code', '')
            
            correction_prompt = f"""
Role: Python Developer
Task: Fix the code. Issues found:

{chr(10).join(f'- {error}' for error in error_messages)}

Important:
1. Keep existing structure
2. Fix all issues
3. Ensure code is complete
4. Add proper documentation

Return the complete corrected code.
"""
            
            messages = [
                SystemMessage(content=correction_prompt),
                HumanMessage(content=current_code)
            ]
            
            # Generate corrected code
            response = self.model.invoke(messages)
            corrected_code = response.content
            
            # Save correction attempt
            self.current_attempt += 1
            self.save_code_attempt(corrected_code, self.current_attempt, "correction")
            
            return {
                'messages': messages,
                'current_code': corrected_code,
                'validation_status': None,
                'error_messages': [],
                'attempt_number': self.current_attempt
            }
            
        except Exception as e:
            logger.error(f"Error in correction node: {str(e)}")
            return {
                'messages': [],
                'error_messages': [str(e)],
                'attempt_number': state['attempt_number']
            }

def generate_code(requirements: Dict[str, Any], output_dir: str = "code_generation"):
    """Main function to generate code based on requirements"""
    
    # Initialize the model
    llm = HuggingFaceEndpoint(
        repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
        task="text-generation",
        max_new_tokens=4000,
        do_sample=False,
        temperature=0.7
    )
    
    model = ChatHuggingFace(llm=llm, verbose=True)
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Initialize code generator
    with SqliteSaver.from_conn_string(":memory:") as checkpointer:
        generator = CodeGenerator(model, checkpointer, output_dir)
        
        # Create initial message with requirements
        user_message = HumanMessage(content=json.dumps(requirements))
        
        # Generate and validate code
        thread = {"configurable": {"thread_id": "1"}}
        final_result = None
        
        for event in generator.graph.stream({"messages": [user_message]}, thread):
            final_result = event
            
            # Log progress
            if event.get('error_messages'):
                logger.info(f"Attempt {event['attempt_number']} - Issues found:")
                for error in event['error_messages']:
                    logger.info(f"- {error}")
            
            if event.get('validation_status') == True:
                logger.info(f"Successfully generated valid code on attempt {event['attempt_number']}")
                break
        
        return final_result

if __name__ == "__main__":
    # Example requirements
    requirements = {
        "Title": "Image Preprocessing as per Model",
        "Description": "As a user, I want the system to perform image preprocessing (standardizing spatial and bit resolution through resizing and normalization) so that input images are ready for AI model training or inferencing (AI prediction).",
        "AcceptanceCriteria": [
            "Pre-processing should successfully be implemented on images with the following bit representations: 1-bit, 8-bit,16-bit, 24-bit, 32-bit.",
            "The pre-processing system shall be able to resize the input images into the resolution required by the AI model.",
            "The pre-processing system shall be able to normalize the input image data into appropriate pixel datatype required by the AI model.",
            "When pre-processing is successful for the training dataset, the data shall be sent further for batch creation.",
            "When pre-processing is successful for inferencing (prediction) dataset, the data shall be sent for model inferencing."
        ]
    }
    
    # Generate code
    result = generate_code(requirements)
    
    if result and result.get('validation_status'):
        logger.info("Code generation successful!")
        logger.info(f"Check the generated code in the output directory")
    else:
        logger.error("Failed to generate valid code after maximum attempts")

  from .autonotebook import tqdm as notebook_tqdm
2025-02-22 13:40:59,313 - INFO - Saved code attempt 1 to code_generation/generation_20250222_134057/attempt_1_initial/code.py
2025-02-22 13:41:03,730 - ERROR - Error in validator node: Expecting value: line 1 column 1 (char 0)
2025-02-22 13:41:03,965 - ERROR - Error in validator node: Expecting value: line 1 column 1 (char 0)
2025-02-22 13:41:06,687 - INFO - Saved code attempt 2 to code_generation/generation_20250222_134057/attempt_2_correction/code.py
2025-02-22 13:41:07,040 - ERROR - Error in validator node: Expecting value: line 1 column 1 (char 0)
2025-02-22 13:41:07,274 - ERROR - Error in validator node: Expecting value: line 1 column 1 (char 0)
2025-02-22 13:41:07,580 - INFO - Saved code attempt 3 to code_generation/generation_20250222_134057/attempt_3_correction/code.py
2025-02-22 13:41:07,959 - ERROR - Error in validator node: Expecting value: line 1 column 1 (char 0)
2025-02-22 13:41:08,194 - ERROR - Error in validator node: Ex

GraphRecursionError: Recursion limit of 25 reached without hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/GRAPH_RECURSION_LIMIT