In [8]:
import os
from dotenv import load_dotenv

In [6]:
import os
import re
from typing import List, Optional

def is_path_matched(path: str, patterns: List[str]) -> bool:
    """
    Check if a path matches any of the given patterns.
    """
    return any(re.match(pattern, path) for pattern in patterns)

def convert_to_regex(pattern: str) -> str:
    """
    Convert a gitignore-like pattern to a regex pattern.
    """
    pattern = pattern.replace(".", r"\.")
    pattern = pattern.replace("*", ".*")
    pattern = pattern.replace("?", ".")
    return f"^{pattern}$"

def process_dir_patterns(dir_patterns: Optional[List[str]]) -> List[str]:
    """
    Process directory patterns and convert them to regex patterns.
    """
    if dir_patterns is None:
        return []
    return [convert_to_regex(pattern) for pattern in dir_patterns]

def should_process_directory(dirpath: str, include_patterns: List[str], exclude_patterns: List[str]) -> bool:
    """
    Determine if a directory should be processed based on include and exclude patterns.
    """
    if include_patterns and not is_path_matched(dirpath, include_patterns):
        return False
    if exclude_patterns and is_path_matched(dirpath, exclude_patterns):
        return False
    return True

def read_repository_files(
    path: str,
    include_dir: Optional[List[str]] = None,
    exclude_dir: Optional[List[str]] = None
) -> List[str]:
    """
    Read all files in a repository, optionally filtering by included or excluded directories.

    Args:
    path (str): The path of the repository.
    include_dir (Optional[List[str]]): List of directories to include (supports regex-like patterns).
    exclude_dir (Optional[List[str]]): List of directories to exclude (supports regex-like patterns).

    Returns:
    List[str]: A list of file paths found in the repository.
    """
    include_patterns = process_dir_patterns(include_dir)
    exclude_patterns = process_dir_patterns(exclude_dir)
    
    files = []

    for dirpath, dirnames, filenames in os.walk(path):
        if should_process_directory(dirpath, include_patterns, exclude_patterns):
            for filename in filenames:
                files.append(os.path.join(dirpath, filename))

    return files

In [12]:
with open("../.git/objects/01/142595d7c2fd0a24d10427e56f9d885d51451f", "r") as f:
    print(f.read())

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x88 in position 18: invalid start byte

In [9]:
read_repository_files("..", exclude_dir=[".*"])

[]

In [9]:
load_dotenv()

True

In [5]:
d = {"a":1,"b":2}
d.pop(list(d.keys())[0])
d

{'b': 2}

In [10]:
# fix asyncio in notebook
import nest_asyncio
nest_asyncio.apply()

In [11]:
# reload module in notebook
%load_ext autoreload
%autoreload 2

In [252]:
from codeag.core.agent import Agent
from codeag.core.configs.api_params import GPT35_BASE_PARAMS, GPT4_BASE_PARAMS, GPT4_NO_JSON

In [246]:
agent = Agent(repo_path="../../SWE-agent")

# Documentation

### Extract documentation labels

In [13]:
label_extractor = """
I want to generate some documentation for an entire repository.
In order to do that, I need to define which sections the documentation should have based on the repository content.
Given that the repository is very large, I first want to give each file a label that reflects its content.
Based on these labels, I will define the sections of the documentation. 
Here are some examples of relevant labels that could be used to define the sections of the documentation:
configuration, deployment, security, authentification, front-end, back-end, database, testing, CI/CD, etc.
These are just examples, don't hesitate to include additional labels more specific to what the file does

*IMPORTANT*:
Each file can have multiple labels.
If you think a file is not relevant for the documentation, don't give it any label. 

Define the labels for the following file:
{files_content}

Return your answer in JSON format as such:
{{"labels": ["label1", "label2", "label3"]}}
"""

In [14]:
messages = agent.get_messages(prompt=label_extractor)

In [15]:
agent.calculate_cost(messages, openai_params=GPT35_BASE_PARAMS)

0.046892

In [16]:
responses = agent.generate_responses(messages, GPT35_BASE_PARAMS)

In [17]:
files_labels = {}
for path, response in zip(messages.keys(),responses):
    files_labels[path] = eval(response["content"])["labels"]

def retrieve_files(labels):
    files = []
    for path, labels_ in files_labels.items():
        if any(label in labels_ for label in labels):
            files.append(path)
    return files

In [18]:
retrieve_files(["documentation"])

['../../SWE-agent/sweagent/agent/commands.py']

In [19]:
def count_labels(responses):
    label_counts = {}
    for response in responses:
        labels = eval(response["content"])['labels']
        for label in labels:
            if label in label_counts:
                label_counts[label] += 1
            else:
                label_counts[label] = 1
    return label_counts

label_count = count_labels(responses)


In [20]:
sorted_label_count = {k: v for k, v in sorted(label_count.items(), key=lambda item: item[1], reverse=True)}

In [21]:
sorted_label_count

{'deployment': 29,
 'testing': 28,
 'front-end': 21,
 'back-end': 20,
 'configuration': 14,
 'UI': 7,
 'CI/CD': 6,
 'React': 5,
 'security': 4,
 'react': 4,
 'scripting': 2,
 'routing': 2,
 'component': 2,
 'docker': 1,
 'bash': 1,
 'bash script': 1,
 'performance': 1,
 'state management': 1,
 'javascript': 1,
 'syntax-highlighting': 1,
 'authentification': 1,
 'data-processing': 1,
 'parsing': 1,
 'dataclass': 1,
 'abstract class': 1,
 'json parsing': 1,
 'error handling': 1,
 'documentation': 1,
 'socketio': 1,
 'web interface': 1,
 'data processing': 1,
 'data analysis': 1,
 'csv generation': 1,
 'experiment results': 1,
 'data visualization': 1}

In [22]:
label_count_str = ""

for label, count in sorted_label_count.items():
    label_count_str += f"{label}: {count}\n"

### Extract descriptions

In [209]:
extract_descriptions = """
I want to generate some documentation for an entire repository.
In order to do that, I need to define which sections the documentation should have based on the repository content.
Given that the repository is very large, I first want to extract short and long descriptions about what each file in the repo does.
Based on these descriptions, I will define the sections of the documentation.

*IMPORTANT*:
The short descriptions should be as concise as possible, with a maximum of around 50 tokens.
The longer descriptions should use bullet points to list the main points of the file content.

Write the descriptions for following file:
{files_content}

Return your answer in JSON format as such:
{{
    "description": "short description here",
    "details": "long description here"
}}
"""

In [310]:
descriptions_messages = agent.get_messages(prompt=extract_descriptions)

In [211]:
agent.calculate_cost(descriptions_messages, openai_params=GPT35_BASE_PARAMS)

0.0453485

In [319]:
responses = agent.generate_responses(descriptions_messages, GPT35_BASE_PARAMS)

In [320]:
descriptions_responses = {}
for path, response in zip(descriptions_messages.keys(),responses):
    descriptions_responses[path] = eval(response["content"])

In [335]:
def get_full_descriptions(paths = None):
    full_descriptions_str = ""
    for path, response in descriptions_responses.items():
        if paths and path not in paths:
            pass
        else:
            full_descriptions_str += f'{path}:\n{response["description"]}\n{response["details"]}\n\n'
    return full_descriptions_str

In [71]:
count_tokens(full_descriptions_str, model=GPT35_BASE_PARAMS["model"])

6323

### Document directories 

In [174]:
document_directories = """
I want to document the main directories found inside a repository.
A description for each file found inside this repository has been generated.

Here are the descriptions for each file:
{descriptions_str}

Write a brief description for the most relevant directories found in the repository.

**IMPORTANT**:
The descriptions should be as concise as possible, with a maximum of around 50 tokens.
Include as many directories and subdirectories as you can.

Return your answer in JSON format as such:
"directory_name_1": "directory description here", 
"directory_name_2": "directory description here"
"""

In [199]:
document_directories_prompt = document_directories.format(descriptions_str=descriptions_str)

In [200]:
messages = agent.get_messages(prompt=document_directories_prompt)

In [177]:
responses = agent.generate_responses(messages, GPT4_BASE_PARAMS)

In [178]:
print(responses["content"])

{
    "SWE-agent": "Root directory containing scripts, configuration files, and main components for the SWE-agent.",
    "SWE-agent/docker": "Contains helper scripts for Docker setup and management.",
    "SWE-agent/config/commands": "Shell scripts for various command functionalities like editing, searching, and cursor manipulation.",
    "SWE-agent/inspector": "Files for handling HTTP requests and viewing file content in the trajectory viewer.",
    "SWE-agent/tests": "Test files and fixtures for validating different functionalities of the SWE-agent.",
    "SWE-agent/tests/test_data/trajectories": "Contains test trajectory files used for testing.",
    "SWE-agent/sweagent/frontend/src": "Source files for the frontend React application.",
    "SWE-agent/sweagent/frontend/src/components": "React components for the frontend UI.",
    "SWE-agent/sweagent/frontend/src/components/panels": "React components for displaying different panels in the frontend.",
    "SWE-agent/sweagent/frontend/s

### Define documentation sections

In [23]:
doc_sections = """
I want to generate some documentation for an entire repository.
To do so, I want to define the sections of the documentation based on some labels that were generated for each file.
Here are the labels that were generated and the number of files that have each label:
{label_count}

Define the sections of the documentation that you think are most relevant to the codebase based on the labels.
Don't use all of the labels, only those you think are relevant to writing the documentation.

Return your results in JSON format as follows:
"sections": ["section1", "section2", "section3"]
"""

In [24]:
doc_sections_prompt = doc_sections.format(label_count=label_count_str)

In [25]:
messages = agent.get_messages(prompt=doc_sections_prompt)

In [26]:
responses = agent.generate_responses(messages, GPT4_BASE_PARAMS)

In [27]:
sections = eval(responses["content"])["sections"]

In [28]:
sections_str = ""
for section in sections:
    sections_str += f"- {section}\n"

In [29]:
print(sections_str)

- Deployment
- Testing
- Front-End
- Back-End
- Configuration
- UI
- CI/CD
- Security
- Scripting
- Routing
- Components
- Performance
- State Management
- Documentation



### Search for files with specific labels

In [30]:
search_files = """
I want to document different sections from a repository.
In order to do so, I need to retrieve the relevant files for each section using the labels that were generated for each file.

Here are the different labels that were generated and the number of files that have each label:
{label_count_str}

Here are the different sections that were defined for the documentation:
{sections_str}

For each section, identify the relevant labels that should be used for searching the files that belong to that section.

**IMPORTANT**:
Different sections may use the same labels.

Return your answer in JSON format as such:
section1: ["label1", "label2", "label3"],
section2: ["label4", "label5", "label1"]
"""

In [31]:
search_files_prompt = search_files.format(label_count_str=label_count_str, sections_str=sections_str)

In [33]:
messages = agent.get_messages(prompt=search_files_prompt)

In [34]:
responses = agent.generate_responses(messages, GPT4_BASE_PARAMS)

In [37]:
section_labels = eval(responses["content"])

In [39]:
def get_section_files(section):
    labels = section_labels[section]
    return retrieve_files(labels)

In [87]:
def get_sections_contents():
    section_contents = {}
    for section in section_labels.keys():
        section_content_str = ""
        file_paths = get_section_files(section)
        section_content_str = get_full_descriptions(file_paths)
        section_contents[section] = section_content_str
    return section_contents
    

In [88]:
section_contents = get_sections_contents()

In [89]:
print(section_contents["Front-End"])

../../SWE-agent/inspector/fileViewer.js:
JavaScript file containing functions to fetch and display files and their content in a web viewer
['Defines variables to store current file name, directory, and timeout IDs for pending operations', 'Includes functions to get the base URL, fetch files list, get role text, view file content, refresh current file, and fetch directory info', 'Utilizes fetch API to retrieve data from server and display file content in a structured manner', 'Implements role mapping for different types of agents and content display', 'Handles error cases and updates UI elements accordingly', 'Executes necessary functions on window load to initialize file fetching and directory info retrieval']

../../SWE-agent/inspector/static.py:
Python module containing functions for generating static HTML viewers for trajectory files.
- Defines a template HTML structure for displaying conversation history from trajectory files.
- Loads style sheet and handles exceptions if style she

In [50]:
from codeag.core.utils.costs import count_tokens

In [86]:
count_tokens(section_contents["Front-End"],model=GPT35_BASE_PARAMS["model"])

1939

### Generate documentation sections

In [121]:
document_sections = """
I want to generate some documentation for an entire repository.
I want to do this by generating each section of the documentation separately.

Here are the different sections that were defined for the documentation:
{sections_str}

Generate the documentation for the following section:
{section}

Here are some file information that should be relevant for that section:
{section_content}

**IMPORTANT**:
Not all files are necessarily relevant to the section.
Don't try to include all of the file's information inside the documentation, only focus on those that are relevant.
The documentation you generate should be broken down into different subsections whenever necessary.
Follow an html-like structure for the documentation, using "h1", "h2", "h3" as section titles and "p" as content.

Return your answer in JSON format following this structure:
"0 - h1": "section title here",
"1 - h2": "subsection title here",
"2 - p": "content here"
"3 - h2": "subsection title here",
"4 - h3": "subsubsection title here",
"5 - p": "content here"

NOTE: the 0 - 1 - 2 - 3 - 4 - 5 are just indexes to make the JSON keys unique.

If you think that not enough context is provided to generate the documentation, return the answer as follows:
"error": "not enough context provided"
"""

In [122]:
messages = {}
for section in sections:
    messages[section] = agent.get_messages(prompt=document_sections.format(sections_str=sections_str, section=section, section_content=section_contents[section]))
    # messages = agent.get_messages(prompt=document_sections_prompt)
    # responses = agent.generate_responses(messages, GPT4_BASE_PARAMS)
    # print(responses["content"])

In [123]:
responses = agent.generate_responses(messages, GPT4_BASE_PARAMS)


In [124]:
def parse_json_to_md(response_content):
    for tag, content in response_content.items():
        if "h1" in tag:
            print(f"# {content}")
        elif "h2" in tag:
            print(f"## {content}")
        elif "h3" in tag:
            print(f"### {content}")
        elif "p" in tag:
            print(f"{content}\n")
        else:
            print("Error: unrecognized tag")

In [127]:
parse_json_to_md(eval(responses[0]["content"]))

# Deployment
## Overview
This section covers the deployment process for the SWE-agent, including setting up Docker images, building and pushing official Docker images, and running the software agent. The deployment scripts ensure that the software is correctly built, packaged, and deployed to the appropriate environments.

## Setup Script
### setup.sh
The `setup.sh` script is used to build Docker images for the SWE-agent and evaluation components. It follows bash strict mode and sets up the Docker image for SWE-agent by building it with the specified Dockerfile and setting the TARGETARCH variable. It then proceeds to set up the Docker image for evaluation using a separate Dockerfile. Finally, the script prints 'Done with setup!' to indicate completion.

## Building and Pushing Docker Images
### release_dockerhub.sh
The `release_dockerhub.sh` script is responsible for building official Docker images and pushing them to DockerHub after user confirmation. It builds images for `swe-agent`,

## TESTING

### Identify files to test

In [146]:
count_tokens(full_descriptions_str, model=GPT35_BASE_PARAMS["model"])

6323

In [159]:
identify_test_files = """
I want to generate some tests for a repository.
In order to do that, I first need to identify which files from the repository are relevant for testing.

The relevance of these tests should be categorized into three main categories:
- High: files that are critical for the application and should be tested thoroughly
- Medium: files that are important for the application but not critical
- Low: files that are not critical and can be tested with less priority

Here are the descriptions for each file:
{full_descriptions_str}

**IMPORTANT**:
Focus on files which are relevant for testing and relatively easy to test (i.e unit testing).
Files that are more difficult to test (UI, integration tests, scripts, etc.) should be excluded or ranked as low priority.
IGNORE TEST FILES.

Return your answer in JSON format as such:
High: ["file1", "file2", "file3"],
Medium: ["file4", "file5", "file6"],
Low: ["file7", "file8", "file9"]
"""

In [160]:
identify_test_files_prompt = identify_test_files.format(full_descriptions_str=full_descriptions_str)

In [161]:
messages = agent.get_messages(prompt=identify_test_files_prompt)

In [162]:
agent.calculate_cost(messages, openai_params=GPT4_BASE_PARAMS)



0.03267

In [163]:
responses = agent.generate_responses(messages, GPT4_BASE_PARAMS)

In [164]:
files_to_test = eval(responses["content"])

In [165]:
for priority, files in files_to_test.items():
    print(f"{priority}")
    for file in files:
        print(f"\t- {file}")

High
	- ../../SWE-agent/run.py
	- ../../SWE-agent/inspector/server.py
	- ../../SWE-agent/sweagent/agent/models.py
	- ../../SWE-agent/sweagent/agent/agents.py
	- ../../SWE-agent/sweagent/agent/parsing.py
	- ../../SWE-agent/sweagent/environment/swe_env.py
	- ../../SWE-agent/sweagent/environment/utils.py
Medium
	- ../../SWE-agent/sweagent/api/server.py
	- ../../SWE-agent/sweagent/api/hooks.py
	- ../../SWE-agent/sweagent/api/utils.py
	- ../../SWE-agent/sweagent/utils/config.py
	- ../../SWE-agent/sweagent/agent/commands.py
	- ../../SWE-agent/sweagent/agent/history_processors.py
	- ../../SWE-agent/evaluation/aggregate_results.py
	- ../../SWE-agent/evaluation/evaluation.py
Low
	- ../../SWE-agent/setup.sh
	- ../../SWE-agent/release_dockerhub.sh
	- ../../SWE-agent/docker/getconda.sh
	- ../../SWE-agent/build_deploy.sh
	- ../../SWE-agent/config/commands/edit_linting.sh
	- ../../SWE-agent/config/commands/defaults.sh
	- ../../SWE-agent/config/commands/search.sh
	- ../../SWE-agent/config/commands/cu


### Extract test cases

In [178]:
extract_test_cases = """
I want to generate some tests for an entire repository.
For each of the file in the repository, I first want to define the different test cases (i.e. behaviors to cover) that should be implemented.
The tests will then be generated based on these test cases.

Define some test cases for the following file:
{file_content}

**IMPORTANT**:
Test cases should be as specific as possible and written in a concise way with a maximum of around 50 tokens.

Return your answer in JSON format as such:
{{
    "test_file": "yes",
    "test_cases": {{
        "Class or Function name 1": ["test case description", "test case description", "test case description"],
        "Class or Function name 2 ": ["test case description", "test case description", "test case description"],
    }}
}}

In case you think the file is not relevant for testing, return it as such:
{{
    "test_file": "no",
    "test_cases": {{}}
}}
"""

In [179]:
extract_test_cases.format(file_content=agent.context.codebase.get_file_content(path))



In [182]:
messages = {}
for priority, files in files_to_test.items():
    if priority != "Low":
        for path in files:
            prompt = extract_test_cases.format(file_content=agent.context.codebase.get_file_content(path))
            messages[path] = agent.get_messages(prompt=prompt, add_context=False)
            # responses = agent.generate_responses(messages, GPT4_BASE_PARAMS)
            # print(responses["content"])

In [185]:
agent.calculate_cost(messages, openai_params=GPT35_BASE_PARAMS)

0.026674

In [186]:
responses = agent.generate_responses(messages, GPT35_BASE_PARAMS)

In [199]:
cases = {}
for path, response in zip(messages.keys(), responses):
    cases[path] = {}
    for class_or_function, test_cases in eval(response["content"])["test_cases"].items():
        cases[path][class_or_function] = test_cases

In [188]:

for path, response in zip(messages.keys(), responses):
    print(path)
    for class_or_function, test_cases in eval(response["content"])["test_cases"].items():
        print(f"\t{class_or_function}")
        for test_case in test_cases:
            print(f"\t\t- {test_case}")

../../SWE-agent/run.py
	ActionsArguments
		- Test that the post-init method raises a ValueError if push_gh_repo_url is provided
	ScriptArguments
		- Test the run_name property to ensure it generates a unique name based on the arguments
	SaveApplyPatchHook
		- Test the _save_patch method to ensure it saves patch files correctly
		- Test the _apply_patch method to ensure it applies patches to a local directory correctly
	OpenPRHook
		- Test the should_open_pr method to determine if opening a PR makes sense based on the submission and issue status
../../SWE-agent/inspector/server.py
	append_exit
		- should return content as is if last entry role is 'system'
		- should append submission to history if exit status starts with 'submitted'
		- should raise ValueError if no submission in history or info
	append_patch
		- should append gold patch if exit status is not None and instance ID in gold patches
		- should append test patch if exit status is not None and instance ID in test patches
	app

### Prioritize test cases

In [347]:
prioritize_test_cases = """I want to generate some tests for an entire repository.
The issue is that the repository is very large and I need to prioritize which tests should be generated first.
I have already identified some test cases for the most important files in the repository and need you to prioritize which test cases should be implemented first.

Here are the test cases that were generated for the most important files in the repository:
{test_cases}

Here are some descriptions of the content of these files:
{full_descriptions_str}

Based on the test cases and the file descriptions, prioritize which test cases should be implemented first.

Return your answer in JSON format using the IDs from the test cases numbering as such:
"High": [0, 1, 2],
"Medium": [3, 4, 5],
"Low": [6, 7, 8]
"""

In [348]:
high_prio_files = files_to_test["High"]

In [349]:
high_prio_test_cases = {k: v for k, v in cases.items() if k in high_prio_files}

AttributeError: 'list' object has no attribute 'items'

In [345]:
high_prio_test_cases_ids = ""
id_nr = 0
for path, test_cases_dict in high_prio_test_cases.items():
    high_prio_test_cases_ids += f"{path}:\n"
    for class_or_function, test_cases in test_cases_dict.items():
        high_prio_test_cases_ids += f"\t{class_or_function}:\n"
        for case in test_cases:
            high_prio_test_cases_ids += f"\t\t{id_nr} - {case}\n"
            id_nr += 1

In [337]:
high_prio_descriptions = get_full_descriptions(high_prio_files)

In [350]:
prioritize_test_cases = prioritize_test_cases.format(test_cases=high_prio_test_cases_ids, full_descriptions_str=high_prio_descriptions)

In [351]:
messages = agent.get_messages(prompt=prioritize_test_cases, add_context=False)

In [352]:
agent.calculate_cost(messages, openai_params=GPT4_BASE_PARAMS)



0.015965

In [353]:
responses = agent.generate_responses(messages, GPT4_BASE_PARAMS)

In [354]:
print(responses["content"])

{
  "High": [
    1, 2, 3, 4, 5, 10, 11, 16, 17, 18, 20, 21, 26, 27, 28, 29, 53, 54, 68, 69, 70, 71, 72, 73, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 99, 100, 101, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145
  ],
  "Medium": [
    0, 6, 7, 8, 9, 12, 13, 14, 15, 19, 22, 23, 24, 25, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 74, 75, 76, 77, 102, 103, 104, 105, 106, 107
  ],
  "Low": [
    9, 12, 13, 14, 15, 19, 22, 23, 24, 25, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 74, 75, 76, 77, 102, 103, 104, 105, 106, 107
  ]
}


### Define testing guidelines 

In [269]:
define_guidelines = """I want to generate a set of tests for an entire code repository.
In order for these tests to be somewhat standardized, I first want to define some guidelines on how to generate these tests.
These guidelines should include the testing framework to be used as well as any other information you think is relevant (naming, structure, etc.).

Define the guidelines for the following files:
{descriptions_and_tests}

**IMPORTANT**:
ONLY WRITE THE GUIDELINES. Be as concised as possible. Write the guidelines as bullet points. Try and limit them to 5-10 points.
If you think multiple frameworks or guidelines should be used for different types of files, explain when each should be used.
DO NOT write guidelines for each file separately, write them as general guidelines for the entire repository.
Try not to exceed 200-300 tokens.
"""

In [270]:
descriptions_and_tests = ""
for path in files_to_test["High"]:
    descriptions_and_tests += path + ":\n"
    descriptions_and_tests += "Description:\n\t" + descriptions_responses[path]["description"] + "\nTest cases:\n"
    for class_or_function, test_cases in cases[path].items():
        descriptions_and_tests += f"\t{class_or_function}:\n"
        for test_case in test_cases:
            descriptions_and_tests += f"\t\t- {test_case}\n"
    descriptions_and_tests += "\n"

In [271]:
define_guidelines_prompt = define_guidelines.format(descriptions_and_tests=descriptions_and_tests)

In [272]:
messages = agent.get_messages(prompt=define_guidelines_prompt)

In [273]:
responses = agent.generate_responses(messages, GPT4_NO_JSON)

In [275]:
test_guidelines = responses["content"]

In [277]:
print(test_guidelines)

- **Testing Framework**: Use `pytest` for all test cases. Utilize `unittest.mock` for mocking dependencies.
- **Naming Conventions**: 
  - Test files should be named `test_<module_name>.py`.
  - Test functions should be named `test_<functionality>`.
- **Test Structure**:
  - Group related tests using classes or modules.
  - Use fixtures for setup and teardown.
- **Assertions**: Use `assert` statements for checking expected outcomes.
- **Coverage**: Aim for high code coverage; ensure critical paths and edge cases are tested.
- **Mocking**: Mock external dependencies like API calls, file I/O, and subprocesses.
- **Documentation**: Include docstrings in test functions to describe the purpose of the test.
- **Isolation**: Ensure tests are independent and can run in any order.
- **Error Handling**: Test both successful outcomes and expected failures/exceptions.
- **CI Integration**: Ensure tests are integrated with Continuous Integration (CI) pipelines for automated testing.


### Generate tests

In [281]:
generare_tests = """I want to generate a set of tests for a code repository.
I have already generated the test cases for each file in the repository.

Here is the file I want to write the tests for:
{file_content}

Generate tests that cover the following test cases:
{test_cases}

Use the following testing guidelines:
{test_guidelines}

**IMPORTANT**:
Include the test case as docsctring in the corresponding test.
ONLY WRITE THE CODE. Your output will directly be written to files as part of the repository, and will be executed as such.
"""

In [279]:
path = '../../SWE-agent/run.py'

In [282]:
file_content = agent.context.codebase.get_file_content(path)

In [290]:
test_cases_str = ""
for class_or_function, test_cases in cases[path].items():
    test_cases_str += f"{class_or_function}:\n"
    for test_case in test_cases:
        test_cases_str += f"\t- {test_case}\n"

In [291]:
print(test_cases_str)

ActionsArguments:
	- Test that the post-init method raises a ValueError if push_gh_repo_url is provided
ScriptArguments:
	- Test the run_name property to ensure it generates a unique name based on the arguments
SaveApplyPatchHook:
	- Test the _save_patch method to ensure it saves patch files correctly
	- Test the _apply_patch method to ensure it applies patches to a local directory correctly
OpenPRHook:
	- Test the should_open_pr method to determine if opening a PR makes sense based on the submission and issue status



In [292]:
generare_tests_prompt = generare_tests.format(file_content=file_content, test_cases=test_cases_str, test_guidelines=test_guidelines)

In [294]:
messages = agent.get_messages(prompt=generare_tests_prompt, add_context=False)

In [295]:
agent.calculate_cost(messages, openai_params=GPT4_NO_JSON)



0.02405

In [296]:
responses = agent.generate_responses(messages, GPT4_NO_JSON)

In [297]:
print(responses["content"])

```python
# test_run_dev.py

import pytest
from unittest.mock import patch, MagicMock, mock_open
from run_dev import ActionsArguments, ScriptArguments, SaveApplyPatchHook, OpenPRHook, Main, get_args
from pathlib import Path
import subprocess
import json
import os

# Test for ActionsArguments
def test_ActionsArguments_post_init():
    """Test that the post-init method raises a ValueError if push_gh_repo_url is provided"""
    with pytest.raises(ValueError, match="push_gh_repo_url is obsolete. Use repo_path instead"):
        ActionsArguments(push_gh_repo_url="some_url")

# Test for ScriptArguments
def test_ScriptArguments_run_name():
    """Test the run_name property to ensure it generates a unique name based on the arguments"""
    args = ScriptArguments(
        environment=MagicMock(data_path="data_path", install_environment=True),
        agent=MagicMock(
            model=MagicMock(
                model_name="model_name",
                temperature=0.5,
                top_p=0.9,

# Improve code