In [119]:
import anthropic
import os
import json
import subprocess
import glob
import logging
import getpass
from typing import List, Dict
from dotenv import load_dotenv
from langchain_community.llms import HuggingFaceEndpoint
from langchain_mistralai import ChatMistralAI


load_dotenv()

True

In [37]:
# Setup logging
# Configure logging
logging.basicConfig(filename="code_agent.log", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [70]:
ANTHROPIC_API_KEY = getpass.getpass('ANTHROPIC_API_KEY')

In [None]:
from mistralai import Mistral

api_key = os.environ["MISTRAL_API_KEY"]
model = "mistral-large-latest"

client = Mistral(api_key=api_key)

In [77]:
# Setup 
model_name = "claude-3-sonnet-20240229"
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
mixtral = HuggingFaceEndpoint(repo_id=repo_id, temperature=0.3)

llm = ChatMistralAI(
    model="mistral-large-latest",
    temperature=0.3,
    max_retries=2,
)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Spectra\.cache\huggingface\token
Login successful


In [78]:
model_name = "claude-3-sonnet-20240229"
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

In [80]:
client.messages.create(
    model=model_name,
    messages=[{"role": "user", "content":"Tell me your name"}],
    max_tokens=4096
)

Message(id='msg_011DRvk3DaGCqN9bH2yJJ74h', content=[TextBlock(text="My name is Claude. It's nice to meet you!", type='text')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=11, output_tokens=15))

## Define Tools

In [147]:
# Define Functions
def clone_github_repo(url: str, folder: str):
    """
    Clone a GitHub repository to a specified folder.
    """
    try:
        logging.info(f"Cloning GitHub repository from {url} to {folder}...")
        subprocess.run(["git", "clone", url, folder], check=True)
        logging.info("Repository cloned successfully.")
        return {"success": True, "folder": f"repo successfully cloned into {folder}"}
    except subprocess.CalledProcessError as e:
        logging.error(f"Failed to clone repository: {e}")
        return {"success": False, "folder": f"repo unsuccessfully cloned into {folder}"}
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        return {"success": False, "folder": f"repo unsuccessfully cloned into {folder}"}
def save_folder_structure(folder: str, output_file: str):
    """
    Save the folder structure to a markdown file.
    """
    try:
        logging.info(f"Saving folder structure of {folder} to {output_file}...")
        with open(output_file, "w") as f:
            for root, dirs, files in os.walk(folder):
                level = root.replace(folder, "").count(os.sep)
                indent = " " * 4 * level
                f.write(f"{indent}{os.path.basename(root)}/\n")
                sub_indent = " " * 4 * (level + 1)
                for file in files:
                    f.write(f"{sub_indent}{file}\n")
        logging.info("Folder structure saved successfully.")
        return {"success": True, "output_file": f"Folder structure successfully written into {output_file}"}
    except Exception as e:
        logging.error(f"An error occurred while saving folder structure: {e}")
        return {"success": False, "output_file": f"Folder structure unsuccessfully written into {output_file}"}
def list_files(folder: str, patterns: list):
    """
    List all files in the specified folder that match the given patterns.
    """
    matching_files = []
    for pattern in patterns:
        matching_files.extend(glob.glob(os.path.join(folder, pattern), recursive=True))
    return matching_files

def summarize_files(folder: str, output_file: str):
    """
    Summarize code and markdown files in the folder and append to the output file.
    """
    file_patterns = ["**/*.py", "**/*.js", "**/*.java", "**/*.cpp", "**/*.md"]
    try:
        # Step 1: List all files that match the patterns
        files_to_summarize = list_files(folder, file_patterns)

        logging.info(f"Summarizing files in {folder} and appending to {output_file}...")

        with open(output_file, "a") as f:
            f.write("\n## File Summaries\n\n")

            # Step 2: Read and summarize each file
            for file_path in files_to_summarize:
                try:
                    with open(file_path, "r") as file:
                        content = file.read()
                        if not content.strip():
                            logging.warning(f"File {file_path} is empty. Skipping.")
                            f.write(f"### {file_path}\nWarning: File is empty. No summary generated.\n\n")
                            continue

                        # Summarize the content
                        summary = summarize_content(content)
                        if summary.get("success"):
                            f.write(f"### {file_path}\n{summary['summary']}\n\n")
                        else:
                            logging.error(f"Summary generation failed for {file_path}.")
                            f.write(f"### {file_path}\nWarning: Summary generation failed.\n\n")
                
                except Exception as file_error:
                    logging.error(f"An error occurred while reading file {file_path}: {file_error}")
                    f.write(f"### {file_path}\nWarning: Could not read the file due to an error.\n\n")
        
        logging.info("File summaries saved successfully.")
        return {"success": True, "output_file": f"Summary successfully written into {output_file}"}
    except Exception as e:
        logging.error(f"An error occurred while summarizing files: {e}")
        return {"success": False, "output_file": f"Summary could not be written into {output_file}"}

def summarize_content(content: str) -> str:
    """
    Use an LLM to summarize the content of a file.
    """
    try:
        system = """
        You are an advanced code assistant specialized in summarizing technical content, including source code and documentation. Your task is:
            - Given the content of a code file or documentation, generate a concise yet informative summary that highlights the primary purpose and key components.
            - Ensure that the summary is well-written and can be understood by someone who needs an overview without diving into the details of the code.
            - The summary should be between 2 to 5 sentences, focusing on the main purpose and key functions provided by the code or document.
            - Return a success or failure message depending on whether a summary could be generated, and include error handling for unexpected content formats.

        """
        messages = [
            ("system", system),
            ("human", f"Please summarize the following content:\n\n{content}"),
        ]
        logging.info("Requesting LLM to summarize content...")
        ai_msg = llm.invoke(messages)
        return {"success": True, "summary": f"Summary successfully generated {ai_msg.content}"}
    except Exception as e:
        logging.error(f"An error occurred while summarizing content: {e}")
        return {"success": False, "summary": f"Summary not generated"}

def generate_documentation(input_file: str, output_file: str):
    """
    Generate comprehensive documentation based on the preliminary analysis.
    """
    try:
        logging.info(f"Generating documentation based on {input_file}...")
        with open(input_file, "r") as f:
            content = f.read()
        system = """
        You are a highly knowledgeable technical assistant that generates comprehensive documentation for software codebases. Your task is:
            - Given a preliminary analysis of the codebase, generate detailed documentation that is suitable for a README file.
            - The documentation should cover an overview of the project, its purpose, features, dependencies, installation instructions, and usage.
            - Focus on making the documentation informative, easy to follow, and clear, ensuring it provides sufficient information for a new developer or user to understand the project.
            - Do not include any code—only include narrative text that explains the project.
            - The documentation should be well-structured, using headers and subheaders where appropriate, and save it to the specified output file.
        """
        messages = [
            ("system", system),
            ("human", f"Based on the following preliminary analysis, generate comprehensive documentation for the entire codebase:\n\n{content}"),
        ]
        ai_msg = llm.invoke(messages)
        with open(output_file, "w") as f:
            f.write(ai_msg.content)
        logging.info("Documentation generated successfully.")
        return {"success": True, "output": f"Documentation written in {output_file}"}
    except Exception as e:
        logging.error(f"An error occurred while generating documentation: {e}")
        return {"success": False, "output": f"Documentation not written in {output_file}"}
def generate_instructions(input_file: str, output_file: str):
    """
    Convert documentation into clear instructions for recreating the codebase.
    """
    try:
        logging.info(f"Generating instructions from {input_file}...")
        with open(input_file, "r") as f:
            content = f.read()
        system =""" 
        You are an expert assistant in converting software documentation into actionable instructions. Your task is:
            - Based on the provided documentation, generate a set of clear, step-by-step instructions for recreating the codebase.
            - The instructions should be written clearly enough for a software engineer to recreate the entire codebase from scratch.
            - Do not include any actual code—focus entirely on the steps required, such as "Create a file named X", "Include Y dependencies", "Write function Z to do A", etc.
            - Ensure the instructions are comprehensive, cover all steps in detail, and are structured in a logical sequence.
        """
        messages = [
            ("system", system),
            ("human", f"Based on the following documentation, provide clear instructions to recreate the codebase:\n\n{content}"),
        ]
        ai_msg = llm.invoke(messages)
        with open(output_file, "w") as f:
            f.write(ai_msg.content)
        logging.info("Instructions generated successfully.")
        return {"success": True, "output": f"Instructions written in {output_file}"}
    except Exception as e:
        logging.error(f"An error occurred while generating instructions: {e}")
        return {"success": False, "output": f"Instructions not written in {output_file}"}


def recreate_codebase(instructions_file: str, output_folder: str):
    """
    Recreate the codebase based on the instructions.
    """
    try:
        logging.info(f"Recreating codebase based on {instructions_file}...")
        
        # Read the instructions file
        with open(instructions_file, "r") as f:
            instructions = f.read()
        system = """ 
        You are an expert software developer responsible for recreating software codebases based on given instructions. Your task is:
            - Based on the provided instructions, generate the full codebase.
            - For each file, start by indicating the file name using the format 'file: <filename>', followed by the file content.
            - Only generate the exact content that belongs in each file—do not include additional commentary, instructions, or explanatory notes.
            - Ensure that the output is structured clearly so that it can be directly parsed and saved into the respective files.
            - Return the generated code in a format that can be used to create all necessary files for the project.

        
        """
        # Send instructions to LLM to generate the codebase
        messages = [
                    ("system", system),
                    ("human", f"Please recreate the codebase based on these instructions:\n\n{instructions}"),
                ]

        ai_msg = llm.invoke(messages)
        generated_code = ai_msg.content
        print(generated_code)
        # Check if any code was generated
        if not generated_code.strip():
            logging.error("No code was generated from the instructions.")
            return {"success": False, "output": "No code generated."}
        
        logging.info("Generated code successfully received from LLM.")

        # Parse the generated code and create files
        current_file = None
        current_content = []

        for line in generated_code.split("\n"):
            # Start of a new file
            if line.startswith("file:") and ":" in line:
                # Write the previous file's content (if any)
                if current_file:
                    file_path = os.path.join(output_folder, current_file)
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    with open(file_path, "w") as f:
                        f.write("\n".join(current_content))
                    logging.info(f"Created file: {file_path}")
                
                # Prepare to capture the new file
                current_file = line.split(":", 1)[1].strip()
                current_content = []

            # Continue capturing the content of the current file
            elif current_file:
                current_content.append(line)

        # Write the final file's content (if any)
        if current_file:
            file_path = os.path.join(output_folder, current_file)
            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            with open(file_path, "w") as f:
                f.write("\n".join(current_content))
            logging.info(f"Created file: {file_path}")

        logging.info("Codebase recreated successfully.")
        return {"success": True, "output": f"Code recreated in {output_folder}"}

    except Exception as e:
        logging.error(f"An error occurred while recreating the codebase: {e}")
        return {"success": False, "output": f"Code not recreated in {output_folder}"}



In [123]:
tools = [
    {
        "name": "clone_github_repo",
        "description": "Clones a GitHub repository to a specified folder.",
        "input_schema": {
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL of the GitHub repository to clone."
                },
                "folder": {
                    "type": "string",
                    "description": "The folder where the repository will be cloned."
                }
            },
            "required": ["url", "folder"]
        }
    },
    {
        "name": "save_folder_structure",
        "description": "Saves the folder structure of a specified folder to a markdown file.",
        "input_schema": {
            "type": "object",
            "properties": {
                "folder": {
                    "type": "string",
                    "description": "The folder to be analyzed for its structure."
                },
                "output_file": {
                    "type": "string",
                    "description": "The output markdown file to save the folder structure."
                }
            },
            "required": ["folder", "output_file"]
        }
    },
    {
        "name": "summarize_files",
        "description": "Summarizes code and markdown files in a folder and appends the summary to an output file.",
        "input_schema": {
            "type": "object",
            "properties": {
                "folder": {
                    "type": "string",
                    "description": "The folder containing the files to summarize."
                },
                "output_file": {
                    "type": "string",
                    "description": "The output file where summaries will be appended."
                }
            },
            "required": ["folder", "output_file"]
        }
    },
    {
        "name": "generate_documentation",
        "description": "Generates comprehensive documentation based on the preliminary analysis of a given input file.",
        "input_schema": {
            "type": "object",
            "properties": {
                "input_file": {
                    "type": "string",
                    "description": "The input file containing the preliminary analysis."
                },
                "output_file": {
                    "type": "string",
                    "description": "The output file where the generated documentation will be saved."
                }
            },
            "required": ["input_file", "output_file"]
        }
    },
    {
        "name": "generate_instructions",
        "description": "Generates clear instructions for recreating the codebase based on the provided documentation.",
        "input_schema": {
            "type": "object",
            "properties": {
                "input_file": {
                    "type": "string",
                    "description": "The input file containing the codebase documentation."
                },
                "output_file": {
                    "type": "string",
                    "description": "The output file where the generated instructions will be saved."
                }
            },
            "required": ["input_file", "output_file"]
        }
    },
    {
        "name": "recreate_codebase",
        "description": "Recreates the codebase based on the instructions from the input file.",
        "input_schema": {
            "type": "object",
            "properties": {
                "instructions_file": {
                    "type": "string",
                    "description": "The input file containing instructions to recreate the codebase."
                },
                "output_folder": {
                    "type": "string",
                    "description": "The folder where the recreated codebase will be saved."
                }
            },
            "required": ["instructions_file", "output_folder"]
        }
    }
]


In [124]:
# Handle tool call function

def process_tool_call(tool_block):
    tool_name = tool_block.name
    arguments = tool_block.input if tool_block.input else {}
    if tool_name == "clone_github_repo":
        return clone_github_repo(arguments["url"], arguments["folder"])
    elif tool_name == "save_folder_structure":
        return save_folder_structure(arguments["folder"], arguments["output_file"])
    elif tool_name == "summarize_files":
        return summarize_files(arguments["folder"], arguments["output_file"])
    elif tool_name == "generate_documentation":
        return generate_documentation(arguments["input_file"], arguments["output_file"])
    elif tool_name == "generate_instructions":
        return generate_instructions(arguments["input_file"], arguments["output_file"])
    elif tool_name == "recreate_codebase":
        return recreate_codebase(arguments["instructions_file"], arguments["output_folder"])
    else:
        raise ValueError(f"Tool '{tool_name}' is not recognized.")


In [148]:
def single_llm_invocation(github_url: str):
    # State dictionary to track which steps have been completed
    state = {
        "repo_cloned": False,
        "structure_saved": False,
        "files_summarized": False,
        "documentation_generated": False,
        "instructions_generated": False,
        "codebase_recreated": False
    }

    # System prompt describing the overall context and goal
    system_prompt = f"""
    You are a helpful assistant that handles software development workflows. You can use tools to interact with GitHub repositories, summarize code, generate documentation, and recreate codebases. Your task is to guide the process by invoking the necessary tools in sequence to complete the workflow based on the user's request. You should:
    You are provided with the following GitHub repository URL: {github_url}.

    Your goal is to execute the following steps in sequence using the available tools:
    1. Clone the GitHub repository into the specified folder.
    2. Save the folder structure of the cloned repository into a markdown file.
    3. Summarize the content of the relevant code and markdown files and append these summaries to an output file.
    4. Generate comprehensive documentation for the codebase based on the summaries.
    5. Convert the generated documentation into clear instructions for recreating the codebase.
    6. Recreate the codebase into a separate specified folder using the generated instructions.

    You are allowed to use the following folder names and file paths for each step:

    - **Cloning the Repository**:
      - GitHub URL: `{github_url}`
      - Target folder: `cloned_repo`

    - **Saving Folder Structure**:
      - Folder to analyze: `cloned_repo`
      - Output markdown file: `folder_structure.md`

    - **Summarizing Files**:
      - Folder to summarize: `cloned_repo`
      - Output file for summaries: `file_summaries.md`

    - **Generating Documentation**:
      - Input file for analysis: `file_summaries.md`
      - Output documentation file: `documentation.md`

    - **Generating Instructions for Recreating Codebase**:
      - Input documentation file: `documentation.md`
      - Output instructions file: `instructions.md`

    - **Recreating the Codebase**:
      - Input instructions file: `instructions.md`
      - Output folder for recreated codebase: `recreated_codebase`

    At each step, make sure to use the available tools logically and complete each part of the workflow before moving on to the next. Continue until the entire workflow is completed successfully.
    """

    user_prompt = f"""
    Recreate the code for me: The GitHub URL is {github_url}
    """

    # Initial messages list without system prompt
    messages = [
        {"role": "user", "content": user_prompt}
    ]

    # Loop until all steps are completed
    while not all(state.values()):
        # Invoke the LLM to determine the flow
        response = client.messages.create(
            model=model_name,
            max_tokens=4096,
            tools=tools,  # The list of tool definitions provided earlier
            system=system_prompt,  # Pass system prompt separately
            messages=messages
        )

        # Process the response and tool invocations until the entire workflow is complete
        if response.stop_reason == "tool_use":
            tool_use = next((block for block in response.content if block.type == "tool_use"), None)
            if tool_use:
                tool_name = tool_use.name
                tool_input = tool_use.input

                # Invoke the tool and get the output
                tool_output = process_tool_call(tool_use)
                print(f"Tool Output for '{tool_name}': {tool_output}")

                # Update state based on the tool used
                if tool_name == "clone_github_repo":
                    state["repo_cloned"] = True
                elif tool_name == "save_folder_structure":
                    state["structure_saved"] = True
                elif tool_name == "summarize_files":
                    state["files_summarized"] = True
                elif tool_name == "generate_documentation":
                    state["documentation_generated"] = True
                elif tool_name == "generate_instructions":
                    state["instructions_generated"] = True
                elif tool_name == "recreate_codebase":
                    state["codebase_recreated"] = True

                # Append the tool's result to messages to provide context to the LLM
                messages.append({
                    "role": "user",
                    "content": f"Tool '{tool_name}' was used successfully. Here is the output: {json.dumps(tool_output)}. Please proceed to the next step."
                })

            else:
                print("No valid tool use was found in the response content.")
        else:
            # Break the loop if no more tools are needed
            break

    # Final response after tool usage is complete
    print("\n===== Final Output from LLM =====")
    print(response.content)

In [149]:
GITHUB_REPO_URL = "https://github.com/KimaniKibuthu/iris-flowers-app.git"
single_llm_invocation(GITHUB_REPO_URL)

Tool Output for 'clone_github_repo': {'success': True, 'folder': 'repo successfully cloned into cloned_repo'}
Tool Output for 'save_folder_structure': {'success': True, 'output_file': 'Folder structure successfully written into folder_structure.md'}
Tool Output for 'summarize_files': {'success': True, 'output_file': 'Summary successfully written into file_summaries.md'}
Tool Output for 'generate_documentation': {'success': True, 'output': 'Documentation written in documentation.md'}
Tool Output for 'generate_instructions': {'success': True, 'output': 'Instructions written in instructions.md'}
```
file: app.py
import streamlit as st
import pandas as pd
import joblib

model = joblib.load('iris_model.pkl')

st.title('Iris Flowers Classification App')

# Input features using sliders
sepal_length = st.slider('Sepal Length', 0.0, 10.0, 5.0)
sepal_width = st.slider('Sepal Width', 0.0, 10.0, 5.0)
petal_length = st.slider('Petal Length', 0.0, 10.0, 5.0)
petal_width = st.slider('Petal Width', 0.