In [4]:
import sys
sys.path.append("/datadrive05/huypn16/focalcoder/")
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from pathlib import Path
from SWE_bench.inference.make_datasets.bm25_retrieval import clone_repo, ContextManager
from git import Repo
import os
import logging
import subprocess
import jedi

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

dataset = load_dataset("princeton-nlp/SWE-bench")

# tokenizer = AutoTokenizer.from_pretrained("princeton-nlp/SWE-Llama-13b",)
# model = AutoModelForCausalLM.from_pretrained("princeton-nlp/SWE-Llama-13b").cuda()

print(dataset["test"][0])

[2023-11-09 03:47:13,827] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [5]:
bug_report_example = dataset["test"][1000]["problem_statement"]

In [6]:
print(bug_report_example)
example_names = "[sca, plt.sca, pyplot.py, SubFigure, axarr]"

[Bug]: Can't use `plt.sca()` on axes created using subfigures
### Bug summary

Using `plt.sca()` on an axes created using subfigures result in an error.

### Code for reproduction

```python
import matplotlib.pyplot as plt

fig = plt.figure()
subfigs = fig.subfigures(2, 1)

axarr = subfigs[0].subplots(2, 2)

plt.sca(axarr[0, 0])
```


### Actual outcome

```
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/me/.local/lib/python3.8/site-packages/matplotlib/pyplot.py", line 1085, in sca
    figure(ax.figure)
  File "/home/me/.local/lib/python3.8/site-packages/matplotlib/pyplot.py", line 773, in figure
    num = int(num)  # crude validation of num argument
TypeError: int() argument must be a string, a bytes-like object or a number, not 'SubFigure'
```


### Expected outcome

It should set the given ax as the current axe, as done by the code below:
```

import matplotlib.pyplot as plt

fig, axarr = plt.subplots(2, 2)

plt.sca(axarr[0, 0])
```

### Addit

In [7]:
root_dir = "data/repos"
gh_token = os.environ.get("GITHUB_TOKEN", None)

def clone_repo(repo, commit, root_dir, token):
    """
    Clones a GitHub repository to a specified directory.

    Args:
        repo (str): The GitHub repository to clone.
        commit (str): The commit to checkout.
        root_dir (str): The root directory to clone the repository to.
        token (str): The GitHub personal access token to use for authentication.

    Returns:
        Path: The path to the cloned repository directory.
    """
    repo_dir = Path(root_dir, f"repo__{repo.replace('/', '__')}__commit__{commit}")
    
    if not repo_dir.exists():
        repo_url = f"https://{token}@github.com/{repo}.git"
        logger.info(f"Cloning {repo} {os.getpid()}")
        Repo.clone_from(repo_url, repo_dir)
        cmd = f"cd {repo_dir} && git reset --hard {commit} && git clean -fdxq"
        subprocess.run(
                cmd,
                shell=True,
                check=True,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
        )
    return root_dir + "/" + repo_dir.name

In [8]:
sample_instance = dataset["test"][0]
repo_dir = clone_repo(sample_instance["repo"], sample_instance["base_commit"], root_dir, gh_token)

def get_code_jedi(definition, verbose=False):
    raw = definition.get_line_code(after=definition.get_definition_end_position()[0]-definition.get_definition_start_position()[0])
    start_num_line = definition.get_definition_start_position()[0] - 2 # jedi start from 1
    if not verbose:
        return raw
    else:
        results = []
        splited_raw = raw.split("\n")
        for idx, line in enumerate(splited_raw):
            new_line = str(start_num_line + 1) + " " + line
            results.append(new_line)
            start_num_line += 1
        return "\n".join(results)

def search_preliminary_inside_project(names, repo_path, num_result=2, verbose=False):
    """Get all matched identifiers from a repo
    
    Args:
        name (str): The name of the identifier
        repo_path (str): The path to the repo
        
    Returns:
        list: A list of matched identifiers
    """
    output_dict = {name: [] for name in names}
    project = jedi.Project(repo_path, environment_path="/datadrive05/huypn16/anaconda3/envs/knn-llm/bin/python3")
    for name in names:
        if not name.endswith(".py"):
            class_definitions = project.search(f"class {name}", all_scopes=True)
            function_definitions = project.search(f"def {name}", all_scopes=True)
            variable_definitions = project.search(name, all_scopes=True)
            idx = 0
            for definition in class_definitions:
                if definition.is_definition():
                    extracted_definition = {
                        "name": definition.name,
                        "full_name": definition.full_name,
                        "documentation": definition._get_docstring(),
                        "implementation": get_code_jedi(definition, verbose)
                    }
                    output_dict[name].append(extracted_definition)
                    if idx == num_result:
                        break
            
            idx = 0
            for definition in function_definitions:
                if definition.is_definition():
                    extracted_definition = {
                        "name": definition.name,
                        "full_name": definition.full_name,
                        "documentation": definition._get_docstring(),
                        "implementation": get_code_jedi(definition, verbose),
                    }
                    output_dict[name].append(extracted_definition)
                    if idx == num_result:
                        break
            
            idx = 0
            for definition in variable_definitions:
                extracted_definition = {
                    "name": definition.name,
                    "full_name": definition.full_name,
                    "documentation": None,
                    "implementation": definition.description,
                }
                output_dict[name].append(extracted_definition)
                if idx == num_result:
                    break
        else:
            definitions = project.search(name.replace(".py", ""))
            for definition in definitions:
                implementation = ""
                with open(definition.module_path, "r") as f:
                    implementation += f.read()
                extracted_definition = {
                    "name": name,
                    "implementation": implementation
                }
                output_dict[name].append(extracted_definition)
            
    return output_dict

In [9]:
search_preliminary_inside_project(["Gaussian1DKernel"], repo_path=repo_dir, num_result=2, verbose=True)

{'Gaussian1DKernel': [{'name': 'Gaussian1DKernel',
   'full_name': 'astropy.convolution.kernels.Gaussian1DKernel',
   'documentation': '1D Gaussian filter kernel.\n\nThe Gaussian filter is a filter with great smoothing properties. It is\nisotropic and does not produce artifacts.\n\nThe generated kernel is normalized so that it integrates to 1.',
   'implementation': '28 class Gaussian1DKernel(Kernel1D):\n29     """\n30     1D Gaussian filter kernel.\n31 \n32     The Gaussian filter is a filter with great smoothing properties. It is\n33     isotropic and does not produce artifacts.\n34 \n35     The generated kernel is normalized so that it integrates to 1.\n36     """\n37     _separable = True\n38     _is_bool = False\n39 \n40     def __init__(self, stddev, **kwargs):\n41         self._model = models.Gaussian1D(1. / (np.sqrt(2 * np.pi) * stddev),\n42                                         0, stddev)\n43         self._default_size = _round_integer(8 * stddev)\n44         super().__init_

In [10]:
print(sample_instance["problem_statement"])

'WCS.all_world2pix' failed to converge when plotting WCS with non linear distortions
<!-- This comments are hidden when you submit the issue,
so you do not need to remove them! -->

<!-- Please be sure to check out our contributing guidelines,
https://github.com/astropy/astropy/blob/master/CONTRIBUTING.md .
Please be sure to check out our code of conduct,
https://github.com/astropy/astropy/blob/master/CODE_OF_CONDUCT.md . -->

<!-- Please have a search on our GitHub repository to see if a similar
issue has already been posted.
If a similar issue is closed, have a quick look to see if you are satisfied
by the resolution.
If not please go ahead and open an issue! -->

<!-- Please check that the development version still produces the same bug.
You can install development version with
pip install git+https://github.com/astropy/astropy
command. -->

### Description
When trying to plot an image with a WCS as projection that contains non linear Distortions it fails with a `NoConvergence` erro

In [11]:
search_preliminary_inside_project(["Cosmology"], repo_dir)
search_preliminary_inside_project(['NoConvergence', 'WCS', 'SIP', 'Sip'], repo_dir)

{'NoConvergence': [{'name': 'NoConvergence',
   'full_name': 'astropy.wcs.wcs.NoConvergence',
   'documentation': 'An error class used to report non-convergence and/or divergence\nof numerical methods. It is used to report errors in the\niterative solution used by\nthe :py:meth:`~astropy.wcs.WCS.all_world2pix`.\n\nAttributes\n----------\n\nbest_solution : `numpy.ndarray`\n    Best solution achieved by the numerical method.\n\naccuracy : `numpy.ndarray`\n    Accuracy of the ``best_solution``.\n\nniter : `int`\n    Number of iterations performed by the numerical method\n    to compute ``best_solution``.\n\ndivergent : None, `numpy.ndarray`\n    Indices of the points in ``best_solution`` array\n    for which the solution appears to be divergent. If the\n    solution does not diverge, ``divergent`` will be set to `None`.\n\nslow_conv : None, `numpy.ndarray`\n    Indices of the solutions in ``best_solution`` array\n    for which the solution failed to converge within the\n    specified maxi

In [12]:
from LSP import LSPToolKit
lsptools = LSPToolKit(root_path="/datadrive05/huypn16/focalcoder/" + repo_dir)

2023-11-09 03:47:30,293 INFO Loaded pylsp plugin autopep8 from <module 'pylsp.plugins.autopep8_format' from '/datadrive05/huypn16/anaconda3/envs/knn-llm/lib/python3.10/site-packages/pylsp/plugins/autopep8_format.py'>
2023-11-09 03:47:30,294 INFO Loaded pylsp plugin flake8 from <module 'pylsp.plugins.flake8_lint' from '/datadrive05/huypn16/anaconda3/envs/knn-llm/lib/python3.10/site-packages/pylsp/plugins/flake8_lint.py'>
2023-11-09 03:47:30,295 INFO Loaded pylsp plugin folding from <module 'pylsp.plugins.folding' from '/datadrive05/huypn16/anaconda3/envs/knn-llm/lib/python3.10/site-packages/pylsp/plugins/folding.py'>
2023-11-09 03:47:30,295 INFO Loaded pylsp plugin jedi_completion from <module 'pylsp.plugins.jedi_completion' from '/datadrive05/huypn16/anaconda3/envs/knn-llm/lib/python3.10/site-packages/pylsp/plugins/jedi_completion.py'>
2023-11-09 03:47:30,296 INFO Loaded pylsp plugin jedi_definition from <module 'pylsp.plugins.definition' from '/datadrive05/huypn16/anaconda3/envs/knn-l

In [13]:
import openai
import json

openai.api_key = "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"

def run_preliminary_search(bug_report, repo_dir):
    # Step 1: send the conversation and available functions to GPT
    messages = [
        {"role": "system", "content": "I'm a codebase search agent. I'm responsible to find the most relevant materials in the codebase to help you"},
        {"role": "user", "content": f"""Given a github issue, finding the most relevant variable names, function names, class names that can be used to resolve the github issue.
        Bug report: {bug_report_example}
        Names to search: {example_names}
        Bug report: {bug_report}
        Names to search: """}]
    functions = [
        {
            "name": "search_preliminary_inside_project",
            "description": "Getting all matched identifiers (variable, function, class name) from a python repository",
            "parameters": {
                "type": "object",
                "properties": {
                    "names": {
                        "type": "array",
                        "items": {
                            "type": "string",
                        },
                        "description": "The names of the identifiers to search",
                    },
                },
                "required": ["names"],
            }
        },
        {
            "name": "go_to_definition",
            "description": "Getting the definition of an identifier",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "The name of the identifier to search",
                    },
                },
                "required": ["name"],
            
            }
        }
    ]
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        functions=functions,
        function_call="auto",  # auto is default, but we'll be explicit
    )
    print(response)
    response_message = response["choices"][0]["message"]

    # Step 2: check if GPT wanted to call a function
    if response_message.get("function_call"):
        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        available_functions = {
            "search_preliminary_inside_project": search_preliminary_inside_project,
        }  # only one function in this example, but you can have multiple
        function_name = response_message["function_call"]["name"]
        function_to_call = available_functions[function_name]
        function_args = json.loads(response_message["function_call"]["arguments"])
        print(function_args)
        function_response = function_to_call(
            names=function_args.get("names"),
            repo_path=repo_dir,
        )
        return function_response

        # Step 4: send the info on the function call and function response to GPT
        # messages.append(response_message)  # extend conversation with assistant's reply
        # messages.append(
        #     {
        #         "role": "function",
        #         "name": function_name,
        #         "content": function_response,
        #     }
        # )  # extend conversation with function response
        # second_response = openai.ChatCompletion.create(
        #     model="gpt-4",
        #     messages=messages,
        # )  # get a new response from GPT where it can see the function response
        # return second_response

# preliminary_results = run_preliminary_search(sample_instance["problem_statement"], repo_dir)
# print(preliminary_results)

In [14]:
openapi_format_instructions = """Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: what to instruct the AI Action representative.
Observation: The Agent's response
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer. User can't see any of my observations, API responses, links, or tools.
Final Answer: the final answer to the original input question with the right amount of detail

When responding with your Final Answer, remember that the person you are responding to CANNOT see any of your Thought/Action/Action Input/Observations, so if there is any relevant information there you need to include it explicitly in your response."""

In [15]:
search_preliminary_inside_project(["WCS"], repo_dir)

{'WCS': [{'name': 'wcs',
   'full_name': 'astropy.nddata.ccddata.CCDData.wcs',
   'documentation': '',
   'implementation': '    def wcs(self):\n        return self._wcs\n'},
  {'name': 'wcs',
   'full_name': 'astropy.nddata.ccddata.CCDData.wcs',
   'documentation': '',
   'implementation': '    def wcs(self, value):\n        if value is not None and not isinstance(value, WCS):\n            raise TypeError("the wcs must be a WCS instance.")\n        self._wcs = value\n'},
  {'name': 'wcs',
   'full_name': 'astropy.nddata.nddata.NDData.wcs',
   'documentation': 'any type : A world coordinate system (WCS) for the dataset, if any.',
   'implementation': '    def wcs(self):\n        """\n        any type : A world coordinate system (WCS) for the dataset, if any.\n        """\n        return self._wcs\n'},
  {'name': 'wcs',
   'full_name': 'astropy.nddata.nddata.NDData.wcs',
   'documentation': '',
   'implementation': '    def wcs(self, wcs):\n        if self._wcs is not None and wcs is no

In [16]:
from langchain.agents import AgentType, initialize_agent
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool, StructuredTool, Tool, tool

In [38]:
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools import BaseTool

class CodeSearchArgs(BaseModel):
    names: list[str] = Field(..., description="The names of the identifiers to search")

class CodeSearchTool(BaseTool):
    name = "search_preliminary_inside_project"
    description = "Useful when you want to find all matched identifiers (variable, function, class name) from a python repository, primarily used for class, function search"
    args_schema: Type[BaseModel] = CodeSearchArgs
    
    def _run(self, names: list[str], repo_path: str = repo_dir, verbose: bool = True):
        return search_preliminary_inside_project(names, repo_path, verbose=verbose)
    
    def _arun(self, names: list[str], repo_path: str = repo_dir):
        return NotImplementedError("Code Search Tool is not available for async run")

class GoToDefinitionArgs(BaseModel):
    word: str = Field(..., description="The name of the symbol to search")
    line: int = Field(..., description="The line number of the symbol to search")
    relative_path: str = Field(..., description="The relative path of the file containing the symbol to search")
    
class GoToDefinitionTool(BaseTool):
    name = "go_to_definition"
    description = """Useful when you want to find the definition of a symbol inside a code snippet if the current context is not cleared enough such as 
    0 import matplotlib.pyplot as plt
    1 class Directory(object):
    2
    3    def add_member(self, id, name):
    4        self.members[id] = plt.figure() we might want to find the definition of plt.figure() invoke with params ("figure", 6, 'test.py')"""
    args_schema = GoToDefinitionArgs
    
    def _run(self, word: str, line: int, relative_path: str):
        return lsptools.get_definition(word, relative_path, line, verbose=True)

    def _arun(self, word: str, line: int, relative_path: str):
        return NotImplementedError("Go To Definition Tool is not available for async run")

In [39]:
# Initialize the OpenAI language model
# Replace <your_api_key> in openai_api_key="<your_api_key>" with your actual OpenAI key.
llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview", openai_api_key="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")
question = "how parent of Gaussian1DKernel is initilized?"
tools = [CodeSearchTool(), GoToDefinitionTool()]


In [40]:
suffix = UFFIX = "Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary, prioritize using go-to-definition after search tool to have more precies go to definition. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:."
prefix = "You are an expert in programming, you're so good at code navigation and search inside large repository. Respond to the human as helpfully and accurately as possible. Consider use other tools if the results returned is not cleared enough for the query. You have access to the following tools:"

In [41]:
mrkl = initialize_agent(
    tools, llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True,
    prefix=prefix,
    suffix=suffix,
)

In [42]:
mrkl.run(question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction:
```json
{
  "action": "search_preliminary_inside_project",
  "action_input": {
    "names": ["Gaussian1DKernel"]
  }
}
```[0m
Observation: [36;1m[1;3m{'Gaussian1DKernel': [{'name': 'Gaussian1DKernel', 'full_name': 'astropy.convolution.kernels.Gaussian1DKernel', 'documentation': '1D Gaussian filter kernel.\n\nThe Gaussian filter is a filter with great smoothing properties. It is\nisotropic and does not produce artifacts.\n\nThe generated kernel is normalized so that it integrates to 1.', 'implementation': '28 class Gaussian1DKernel(Kernel1D):\n29     """\n30     1D Gaussian filter kernel.\n31 \n32     The Gaussian filter is a filter with great smoothing properties. It is\n33     isotropic and does not produce artifacts.\n34 \n35     The generated kernel is normalized so that it integrates to 1.\n36     """\n37     _separable = True\n38     _is_bool = False\n39 \n40     def __init__(self, stddev, **kwargs):\n41      

2023-11-09 03:59:13,511 INFO Loaded pylsp plugin autopep8 from <module 'pylsp.plugins.autopep8_format' from '/datadrive05/huypn16/anaconda3/envs/knn-llm/lib/python3.10/site-packages/pylsp/plugins/autopep8_format.py'>
2023-11-09 03:59:13,513 INFO Loaded pylsp plugin flake8 from <module 'pylsp.plugins.flake8_lint' from '/datadrive05/huypn16/anaconda3/envs/knn-llm/lib/python3.10/site-packages/pylsp/plugins/flake8_lint.py'>
2023-11-09 03:59:13,513 INFO Loaded pylsp plugin folding from <module 'pylsp.plugins.folding' from '/datadrive05/huypn16/anaconda3/envs/knn-llm/lib/python3.10/site-packages/pylsp/plugins/folding.py'>
2023-11-09 03:59:13,514 INFO Loaded pylsp plugin jedi_completion from <module 'pylsp.plugins.jedi_completion' from '/datadrive05/huypn16/anaconda3/envs/knn-llm/lib/python3.10/site-packages/pylsp/plugins/jedi_completion.py'>
2023-11-09 03:59:13,514 INFO Loaded pylsp plugin jedi_definition from <module 'pylsp.plugins.definition' from '/datadrive05/huypn16/anaconda3/envs/knn-l

[32;1m[1;3mThe search results indicate that `Gaussian1DKernel` is a class within the `astropy.convolution.kernels` module. The class is initialized with a standard deviation parameter `stddev` and possibly other keyword arguments. The initialization of the parent class (`Kernel1D`) is done through the `super().__init__(**kwargs)` call in line 44 of the implementation.

To provide more details on how the parent class `Kernel1D` is initialized, I would need to look at the definition of `Kernel1D`. Let's use the `go_to_definition` tool to find the definition of `Kernel1D`.

Action:
```json
{
  "action": "go_to_definition",
  "action_input": {
    "word": "Kernel1D",
    "line": 28,
    "relative_path": "astropy/convolution/kernels.py"
  }
}
```[0m
Observation: [33;1m[1;3mParent Name: None
Name: Kernel1D
Type: Class
Definition: class Kernel1D(Kernel):
    """
    Base class for 1D filter kernels.

    Parameters
    ----------
    model : `~astropy.modeling.FittableModel`
        Mode

'The `Gaussian1DKernel` class initializes its parent class `Kernel1D` by calling `super().__init__(**kwargs)`. The `Kernel1D` class can be initialized with a model, an array, and other keyword arguments. If a model is provided, it is discretized to create the kernel array. If an array is provided, it is used directly. The `super().__init__(array)` call in `Kernel1D` suggests there is another level of inheritance, but the specific details of the next parent class are not provided in the given context.'