In [7]:
from langchain.agents import Tool
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import SystemMessage, HumanMessage
import os
import json
import getpass
import tempfile
import subprocess
import re

# Set up Gemini API key
if not os.environ.get("GOOGLE_API_KEY"):
    try:
        from google.colab import userdata
        os.environ["GOOGLE_API_KEY"] = userdata.get('GEMINI_API_KEY')  # Update secret name
    except:
        import getpass
        os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter Google API Key: ")

class PlannerAgent:
    def __init__(self):
        # Initialize Gemini model
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash-latest",
            temperature=0.3,
            convert_system_message_to_human=True
        )

        self.system_prompt = """
        As an expert solution architect, decompose complex problems into executable sub-tasks.
        Use this JSON structure:
        {
            "subtasks": [
                {
                    "id": <unique integer>,
                    "desc": "<clear description>",
                    "dependencies": [<list of prerequisite task IDs>]
                }
            ]
        }
        """

    def decompose(self, task):
        try:
            # Create message sequence
            messages = [
                SystemMessage(content=self.system_prompt),
                HumanMessage(content=f"TASK: {task}")
            ]

            # Get structured JSON response
            llm_response = self.llm.invoke(messages)

            # Extract JSON content
            response_content = llm_response.content

            # Handle potential formatting variations
            if '```json' in response_content:
                json_match = re.search(r'```json(.*?)```', response_content, re.DOTALL)
                if json_match:
                    response_content = json_match.group(1).strip()

            # Parse and validate response
            plan = json.loads(response_content)

            # Validate structure
            if "subtasks" not in plan or not isinstance(plan["subtasks"], list):
                raise ValueError("Invalid response format: Missing 'subtasks' list")

            for subtask in plan["subtasks"]:
                if "id" not in subtask or "desc" not in subtask:
                    raise ValueError("Subtask missing required fields: 'id' or 'desc'")

            return plan

        except json.JSONDecodeError:
            print("Failed to parse JSON response. Attempting recovery...")
            try:
                # Fallback: Extract JSON from text
                json_match = re.search(r'\{.*\}', response_content, re.DOTALL)
                if json_match:
                    return json.loads(json_match.group())
                raise
            except:
                print("Could not recover valid JSON. Using fallback plan.")
                return self._fallback_plan(task)

        except Exception as e:
            print(f"Decomposition error: {str(e)}")
            return self._fallback_plan(task)

    def _fallback_plan(self, task):
        """Fallback plan for error recovery"""
        return {
            "subtasks": [
                {
                    "id": 1,
                    "desc": f"Analyze requirements: {task}",
                    "dependencies": []
                },
                {
                    "id": 2,
                    "desc": f"Design solution architecture for {task}",
                    "dependencies": [1]
                },
                {
                    "id": 3,
                    "desc": f"Implement core functionality for {task}",
                    "dependencies": [2]
                },
                {
                    "id": 4,
                    "desc": f"Test and validate solution for {task}",
                    "dependencies": [3]
                }
            ]
        }

class ExecutorAgent:
    def __init__(self):
        # Initialize Gemini model
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash-latest",
            temperature=0.2,
            convert_system_message_to_human=True
        )
        self.tools = [
            Tool(
                name="CodeWriter",
                func=self.write_code,
                description="Generates code for given requirements"
            ),
            Tool(
                name="TestRunner",
                func=self.run_tests,
                description="Executes test cases and reports results"
            )
        ]

    def write_code(self, requirements: str) -> dict:
        """
        Uses the LLM to generate Python code for the given requirements.
        Returns a dict with 'code' (the Python source) and 'files' (list of filenames).
        """
        prompt = (
            f"You are an expert Python developer. "
            f"Write complete, self-contained Python code to {requirements}. "
            f"Return only the code block, wrapped in triple backticks for Python."
        )
        llm_response = self.llm.invoke(prompt).content

        # Extract code from triple backticks
        match = re.search(r"```(?:python)?\n([\s\S]*?)```", llm_response)
        code = match.group(1) if match else llm_response

        # Save code to a temp file
        tmp_dir = tempfile.mkdtemp()
        file_path = os.path.join(tmp_dir, "main.py")
        with open(file_path, "w") as f:
            f.write(code)

        return {"code": code, "files": [file_path]}

    def run_tests(self, code_info: dict) -> dict:
        """
        Executes the generated Python file in a sandboxed subprocess.
        Captures pass/fail status and any stderr output.
        """
        files = code_info.get("files", [])
        if not files:
            return {"passed": False, "errors": ["No file to execute."]}

        file_to_run = files[0]

        # Run the Python file
        try:
            result = subprocess.run(
                ["python", file_to_run],
                capture_output=True,
                text=True,
                timeout=10
            )
            if result.returncode == 0:
                return {"passed": True, "errors": []}
            else:
                return {"passed": False, "errors": [result.stderr.strip()]}
        except Exception as e:
            return {"passed": False, "errors": [str(e)]}

class ReviewerAgent:
    def __init__(self):
        # Initialize Gemini model
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash-latest",
            temperature=0.2,
            convert_system_message_to_human=True
        )

    def analyze(self, code_result: dict, test_result: dict) -> dict:
        """
        Analyze the generated code and test results, providing feedback and critical issues.
        Returns a dict with:
          - 'feedback': A summary string of overall quality
          - 'critical_issues': A list of any issues that must be addressed
        """

        code = code_result.get("code", "")
        tests = test_result.get("errors", []) or []
        tests_output = "\n".join(tests) if tests else "All tests passed."

        # Construct LLM prompt requesting JSON output
        prompt = (
            "You are a senior software engineer and code reviewer. "
            "Review the following Python code snippet and its test results. "
            "Provide a JSON object with two keys:\n"
            " - feedback: A concise summary of overall code quality.\n"
            " - critical_issues: A list of strings describing any bugs, logical errors, "
            "or style issues that should be fixed.\n\n"
            f"Code:\n```python\n{code}\n```\n\n"
            f"Test Results:\n{tests_output}\n\n"
            "Output JSON only."
        )

        llm_response = self.llm.invoke(prompt).content

        # Extract JSON from LLM response
        try:
            # Sometimes the model wraps JSON in code fences
            json_str = re.search(r"\{[\s\S]*\}", llm_response).group(0)
            result = json.loads(json_str)
            # Ensure keys exist
            feedback = result.get("feedback", "")
            critical_issues = result.get("critical_issues", [])
        except Exception:
            # Fallback: return the raw response as feedback
            feedback = llm_response.strip()
            critical_issues = []

        return {
            "feedback": feedback,
            "critical_issues": critical_issues
        }

class Orchestrator:
    def __init__(self):
        self.planner = PlannerAgent()
        self.executor = ExecutorAgent()
        self.reviewer = ReviewerAgent()

    def compile_final_output(self):
        return "Project execution completed."

    def execute_project(self, user_request):
        # Agent collaboration workflow
        plan = self.planner.decompose(user_request)

        # Check if the plan is valid before proceeding
        if not plan or "subtasks" not in plan or not isinstance(plan["subtasks"], list):
            print("Planner returned an invalid plan.")
            return "Project execution failed: Invalid plan."

        for subtask in plan["subtasks"]:
            # Add checks for subtask format if necessary
            if not isinstance(subtask, dict) or "desc" not in subtask:
                print(f"Skipping invalid subtask: {subtask}")
                continue

            print(f"\n🔧 Executing subtask: {subtask['desc']}")
            code_result = self.executor.write_code(subtask["desc"])

            # Check if code_result is valid before proceeding
            if not code_result or "code" not in code_result:
                print(f"Executor failed to generate code for subtask: {subtask['desc']}")
                continue

            print("🧪 Running tests...")
            test_result = self.executor.run_tests(code_result)

            # Check if test_result is valid
            if not test_result or "passed" not in test_result:
                print(f"Executor failed to run tests for subtask: {subtask['desc']}")
                continue

            if not test_result["passed"]:
                print("🔍 Reviewing failed tests...")
                review = self.reviewer.analyze(code_result, test_result)
                print(f"📝 Review feedback: {review.get('feedback', 'No feedback')}")
                if review.get("critical_issues"):
                    print(f"❌ Critical issues: {review['critical_issues']}")

        return self.compile_final_output()

# Usage
if __name__ == "__main__":
    system = Orchestrator()
    response = system.execute_project("Build a REST API for user management")
    print(f"\n✅ {response}")




🔧 Executing subtask: Define API specifications (endpoints, request/response formats, authentication methods)




🧪 Running tests...

🔧 Executing subtask: Choose a suitable framework/library (e.g., Spring Boot, Flask, Node.js with Express)




🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: The code is functional but has several areas for improvement.
❌ Critical issues: ['The application uses an in-memory data store which is not suitable for production.  A persistent database (e.g., SQLite, PostgreSQL) should be used.', 'ID generation is simplistic and prone to issues if IDs are not strictly sequential.  A more robust ID generation strategy (e.g., UUIDs) is needed for production.', 'Error handling is basic. More detailed and informative error messages should be provided to the client.', 'The code lacks input validation beyond checking for the presence of required fields.  Input sanitization and validation should be more comprehensive to prevent security vulnerabilities (e.g., SQL injection if a database were used).', 'The timeout during testing suggests performance issues, likely due to the in-memory data store and lack of optimization for larger datasets.  Profiling and optimization are necessary.', 'The `debug=True` setting should be removed from prod



🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: The code has a good structure and attempts to handle errors. However, it currently fails due to a runtime issue.
❌ Critical issues: ["The `venv` creation fails with a non-zero exit status (1).  The code catches and re-raises this error, but doesn't provide any information about *why* `venv` failed.  The error message should be improved to include the output and stderr from the `subprocess.run` call to help diagnose the problem.  Possible causes include insufficient permissions, a problem with the Python installation, or a missing `venv` module.", 'The error handling for `subprocess.run` is redundant. The `except subprocess.CalledProcessError as e:` block simply re-raises the exception without adding any value.  Consider logging the error details or providing a more user-friendly message before re-raising.', 'The code assumes that `python3` is available. It might be more robust to use `sys.executable` to get the path to the currently running Python interpreter, ensuri



🧪 Running tests...

🔧 Executing subtask: Implement database interactions (CRUD operations) using an ORM or database driver




🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: The code has a critical bug preventing the database table from being created.
❌ Critical issues: ["The `_create_table` method is not reliably creating the 'users' table.  The test results show a `sqlite3.OperationalError: no such table: users`. This is likely because the `self._create_table()` call in the `__init__` method might not be executing correctly or the database connection is failing.  Ensure the database connection is established and the table creation command is executed successfully before any other database operations."]

🔧 Executing subtask: Implement user registration endpoint (POST /users)




🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: The code has a good structure and uses appropriate libraries for security (password hashing). However, it relies on an in-memory database which is unsuitable for production and lacks comprehensive error handling and input validation.
❌ Critical issues: ['Uses an in-memory dictionary `users` for storing user data, making it unsuitable for production.  A persistent database (e.g., SQLite, PostgreSQL) is required.', "Error handling is insufficient. While a generic `except Exception` block catches errors, it doesn't provide specific error handling or logging for different types of exceptions.  More granular exception handling is needed.", 'Input validation is minimal.  More robust validation is needed for usernames and passwords (e.g., checking for special characters, SQL injection prevention, etc.).', 'The test result indicates a port conflict.  The application should handle this gracefully, perhaps by allowing the port to be specified as a command-line argument or conf



🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: The code is functional but has security and scalability issues.
❌ Critical issues: ['The in-memory `users` dictionary is insecure and not scalable.  User credentials should be stored in a database with appropriate security measures (salting, hashing, etc.).', 'The placeholder token is extremely insecure.  A robust authentication mechanism like JWTs should be implemented.', 'The broad `except Exception` clause is bad practice.  Specific exceptions should be caught and handled appropriately.  Unexpected errors should be logged with sufficient detail for debugging.', 'The `debug=True` setting should be removed from production deployments.  It exposes sensitive information and creates security vulnerabilities.', "Error handling could be improved by providing more specific error messages to the client.  Generic error messages like 'Invalid username or password' don't help the user.", 'The application lacks input validation for password strength.  Passwords should meet min



🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: The code is well-structured and mostly correct, but has a minor redundancy and a critical deployment issue.
❌ Critical issues: ["The type validation for `user_id` inside the `get_user` function is redundant because Flask's route definition (`/users/<int:user_id>`) already handles type checking and conversion.  Remove the `if not isinstance(user_id, int):` block.", 'The test result indicates a port conflict. The application should be configured to use a different port or handle port conflicts gracefully, perhaps by checking for availability before binding to the port.  The `debug=True` setting should also be removed from production deployments.']

🔧 Executing subtask: Implement user update endpoint (PUT /users/{id})




🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: The code has a good structure and includes important security considerations like password hashing and input validation. However, there are critical issues that prevent it from running and need to be addressed.
❌ Critical issues: ["The code fails to run due to a missing dependency ('flask_httpauth').  The test environment needs to be set up correctly by installing the required package using `pip install flask-httpauth`.", "The use of a hardcoded secret key ('app.config['SECRET_KEY'] = 'thisissecret'') is a major security risk.  In a production environment, a strong, randomly generated key must be used.  Consider using environment variables to store sensitive information.", "The in-memory storage of users ('users' dictionary) is not suitable for production. A persistent database should be used to store user data.", 'Error handling could be improved. While the code handles `json.JSONDecodeError`, a more generic `Exception` catch-all is too broad and could mask other im



🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: The code has a critical dependency issue and a potential security vulnerability.  The in-memory database is unsuitable for production.
❌ Critical issues: ["The code fails to run due to a missing dependency ('flask_httpauth').  The test environment needs to be properly configured to include this library.", 'The use of an in-memory SQLite database is inappropriate for production.  A persistent database solution (e.g., PostgreSQL, MySQL) must be implemented.', "The sample user ('admin'/'admin') should be removed from production code.  A secure user creation and management system is required.", 'The code uses a global `users` dictionary which is redundant given the database. This should be removed.', 'Error handling is good, but the exception messages revealed to the client might leak sensitive information in a production environment. Consider logging the error details instead of returning them directly to the client.']

🔧 Executing subtask: Implement authentication and 



🧪 Running tests...
🔍 Reviewing failed tests...




📝 Review feedback: ```json
{
  "feedback": "The code has several critical security and design flaws that need to be addressed before deployment.  The use of hardcoded secrets and an in-memory database is unacceptable for production.",
  "critical_issues": [
    "**Hardcoded Secrets:** JWT_SECRET_KEY, CLIENT_ID, and CLIENT_SECRET are hardcoded.  This is a major security vulnerability.  These values must be stored securely, ideally using environment variables.",
    "**In-Memory Database:** The `users` dictionary is an in-memory database. This is not suitable for production; a persistent database (e.g., PostgreSQL, MySQL, MongoDB) is required.",
    "**Password Storage:** Passwords are stored in plain text. This is extremely insecure.  Passwords must be securely hashed using a strong, one-way hashing algorithm (e.g., bcrypt, Argon2) before storage.",
    "**Error Handling:** While the code handles some JWT exceptions, more robust error handling is needed throughout the application.  Cons



🧪 Running tests...

🔧 Executing subtask: Implement error handling and logging




🧪 Running tests...

🔧 Executing subtask: Write unit and integration tests




🧪 Running tests...

🔧 Executing subtask: Deploy the API to a staging environment




🧪 Running tests...

🔧 Executing subtask: Perform staging testing and bug fixing




🧪 Running tests...

🔧 Executing subtask: Deploy the API to production




🧪 Running tests...

🔧 Executing subtask: Implement monitoring and logging for production




🧪 Running tests...

✅ Project execution completed.
