Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancements to PR Code Suggestions Handling and Configuration #546

Merged
merged 5 commits into from
Dec 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/IMPROVE.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Under the section 'pr_code_suggestions', the [configuration file](./../pr_agent/
- `num_code_suggestions`: number of code suggestions provided by the 'improve' tool. Default is 4.
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
- `rank_suggestions`: if set to true, the tool will rank the suggestions, based on importance. Default is false.
- `include_improved_code`: if set to true, the tool will include an improved code implementation in the suggestion. Default is true.

#### params for '/improve --extended' mode
- `num_code_suggestions_per_chunk`: number of code suggestions provided by the 'improve' tool, per chunk. Default is 8.
Expand Down
84 changes: 46 additions & 38 deletions pr_agent/algo/pr_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,65 +264,73 @@ def _get_all_deployments(all_models: List[str]) -> List[str]:

def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
relevant_file: str,
relevant_line_in_file: str) -> Tuple[int, int]:
"""
Find the line number and absolute position of a relevant line in a file.

Args:
diff_files (List[FilePatchInfo]): A list of FilePatchInfo objects representing the patches of files.
relevant_file (str): The name of the file where the relevant line is located.
relevant_line_in_file (str): The content of the relevant line.

Returns:
Tuple[int, int]: A tuple containing the line number and absolute position of the relevant line in the file.
"""
relevant_line_in_file: str,
absolute_position: int = None) -> Tuple[int, int]:
position = -1
absolute_position = -1
if absolute_position is None:
absolute_position = -1
re_hunk_header = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")

for file in diff_files:
if file.filename and (file.filename.strip() == relevant_file):
patch = file.patch
patch_lines = patch.splitlines()

# try to find the line in the patch using difflib, with some margin of error
matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
patch_lines, n=3, cutoff=0.93)
if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
relevant_line_in_file = matches_difflib[0]

delta = 0
start1, size1, start2, size2 = 0, 0, 0, 0
for i, line in enumerate(patch_lines):
if line.startswith('@@'):
delta = 0
match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'):
delta += 1

if relevant_line_in_file in line and line[0] != '-':
position = i
absolute_position = start2 + delta - 1
break

if position == -1 and relevant_line_in_file[0] == '+':
no_plus_line = relevant_line_in_file[1:].lstrip()
if absolute_position != -1: # matching absolute to relative
for i, line in enumerate(patch_lines):
# new hunk
if line.startswith('@@'):
delta = 0
match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'):
delta += 1

if no_plus_line in line and line[0] != '-':
# The model might add a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line
#
absolute_position_curr = start2 + delta - 1

if absolute_position_curr == absolute_position:
position = i
break
else:
# try to find the line in the patch using difflib, with some margin of error
matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
patch_lines, n=3, cutoff=0.93)
if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
relevant_line_in_file = matches_difflib[0]


for i, line in enumerate(patch_lines):
if line.startswith('@@'):
delta = 0
match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'):
delta += 1

if relevant_line_in_file in line and line[0] != '-':
position = i
absolute_position = start2 + delta - 1
break

if position == -1 and relevant_line_in_file[0] == '+':
no_plus_line = relevant_line_in_file[1:].lstrip()
for i, line in enumerate(patch_lines):
if line.startswith('@@'):
delta = 0
match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'):
delta += 1

if no_plus_line in line and line[0] != '-':
# The model might add a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line
position = i
absolute_position = start2 + delta - 1
break
return position, absolute_position


Expand Down
3 changes: 0 additions & 3 deletions pr_agent/git_providers/azuredevops_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,6 @@ def remove_initial_comment(self):
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError("Azure DevOps provider does not support publishing inline comment yet")

def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError("Azure DevOps provider does not support creating inline comments yet")

def publish_inline_comments(self, comments: list[dict]):
raise NotImplementedError("Azure DevOps provider does not support publishing inline comments yet")

Expand Down
6 changes: 4 additions & 2 deletions pr_agent/git_providers/bitbucket_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,10 @@ def remove_comment(self, comment):
get_logger().exception(f"Failed to remove comment, error: {e}")

# funtion to create_inline_comment
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(), relevant_file.strip('`'), relevant_line_in_file)
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None):
position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(),
relevant_file.strip('`'),
relevant_line_in_file, absolute_position)
if position == -1:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
Expand Down
7 changes: 5 additions & 2 deletions pr_agent/git_providers/bitbucket_server_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,14 @@ def remove_comment(self, comment):
pass

# funtion to create_inline_comment
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
absolute_position: int = None):

position, absolute_position = find_line_number_of_relevant_line_in_file(
self.get_diff_files(),
relevant_file.strip('`'),
relevant_line_in_file
relevant_line_in_file,
absolute_position
)
if position == -1:
if get_settings().config.verbosity_level >= 2:
Expand Down
3 changes: 0 additions & 3 deletions pr_agent/git_providers/codecommit_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,6 @@ def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_compared_commit.html
raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet")

def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError("CodeCommit provider does not support creating inline comments yet")

def publish_inline_comments(self, comments: list[dict]):
raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet")

Expand Down
5 changes: 0 additions & 5 deletions pr_agent/git_providers/gerrit_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,6 @@ def publish_inline_comment(self, body: str, relevant_file: str,
'Publishing inline comments is not implemented for the gerrit '
'provider')

def create_inline_comment(self, body: str, relevant_file: str,
relevant_line_in_file: str):
raise NotImplementedError(
'Creating inline comments is not implemented for the gerrit '
'provider')

def publish_labels(self, labels):
# Not applicable to the local git provider,
Expand Down
6 changes: 3 additions & 3 deletions pr_agent/git_providers/git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ def publish_persistent_comment(self, pr_comment: str, initial_header: str, updat
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
pass

@abstractmethod
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
pass
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
absolute_position: int = None):
raise NotImplementedError("This git provider does not support creating inline comments yet")

@abstractmethod
def publish_inline_comments(self, comments: list[dict]):
Expand Down
8 changes: 6 additions & 2 deletions pr_agent/git_providers/github_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,12 @@ def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in
self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)])


def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files, relevant_file.strip('`'), relevant_line_in_file)
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
absolute_position: int = None):
position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files,
relevant_file.strip('`'),
relevant_line_in_file,
absolute_position)
if position == -1:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
Expand Down
2 changes: 1 addition & 1 deletion pr_agent/git_providers/gitlab_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no)

def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None):
raise NotImplementedError("Gitlab provider does not support creating inline comments yet")

def create_inline_comments(self, comments: list[dict]):
Expand Down
3 changes: 0 additions & 3 deletions pr_agent/git_providers/local_git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@ def publish_comment(self, pr_comment: str, is_temporary: bool = False):
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')

def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError('Creating inline comments is not implemented for the local git provider')

def publish_inline_comments(self, comments: list[dict]):
raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')

Expand Down
1 change: 1 addition & 0 deletions pr_agent/settings/configuration.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ include_generated_by_header=true
[pr_code_suggestions] # /improve #
num_code_suggestions=4
summarize = false
include_improved_code = true
extra_instructions = ""
rank_suggestions = false
# params for '/improve --extended' mode
Expand Down
72 changes: 24 additions & 48 deletions pr_agent/settings/pr_code_suggestions_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ __old hunk__

Specific instructions:
- Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions.
- Prioritize suggestions that address major problems, issues and bugs in the code. As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects.
- Prioritize suggestions that address major problems, issues and bugs in the code. As a second priority, suggestions should focus on enhancment, best practice, performance, maintainability, and other aspects.
- Don't suggest to add docstring, type hints, or comments.
- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
Expand All @@ -49,65 +49,41 @@ Extra instructions from the user:
======
{%- endif %}

The output must be a YAML object equivalent to type PRCodeSuggestions, according to the following Pydantic definitions:
=====
class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="the relevant file full path")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
existing_code: str = Field(description="a code snippet, showing the relevant code lines from a '__new hunk__' section. It must be contiguous, correctly formatted and indented, and without line numbers")
relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers")
label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'bug', 'performance', 'enhancement', 'possible issue', 'best practice', 'maintainability', etc. Other labels are also allowed")

class PRCodeSuggestions(BaseModel):
code_suggestions: List[CodeSuggestion]
=====

You must use the following YAML schema to format your answer:
```yaml
Code suggestions:
type: array
minItems: 1
maxItems: {{ num_code_suggestions }}
uniqueItems: true
items:
relevant file:
type: string
description: the relevant file full path
suggestion content:
type: string
description: |-
a concrete suggestion for meaningfully improving the new PR code.
existing code:
type: string
description: |-
a code snippet showing the relevant code lines from a '__new hunk__' section.
It must be contiguous, correctly formatted and indented, and without line numbers.
relevant lines start:
type: integer
description: |-
The relevant line number from a '__new hunk__' section where the suggestion starts (inclusive).
Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.
relevant lines end:
type: integer
description: |-
The relevant line number from a '__new hunk__' section where the suggestion ends (inclusive).
Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.
improved code:
type: string
description: |-
a new code snippet that can be used to replace the relevant lines in '__new hunk__' code.
Replacement suggestions should be complete, correctly formatted and indented, and without line numbers.
```

Example output:
```yaml
Code suggestions:
- relevant file: |-
code_suggestions:
- relevant_file: |-
src/file1.py
suggestion content: |-
suggestion_content: |-
Add a docstring to func1()
existing code: |-
existing_code: |-
def func1():
relevant lines start: |-
12
relevant lines end: |-
12
improved code: |-
relevant_lines_start: 12
relevant_lines_end: 12
improved_code: |-
...
label: |-
...
...
```


Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').
Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
"""

user="""PR Info:
Expand Down
Loading
Loading