From fed0ea349a3f4ad2f6164615862307bb0eef1acb Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Sat, 5 Aug 2023 10:34:09 +0300
Subject: [PATCH 1/2] find_line_number_of_relevant_line_in_file

find_line_number_of_relevant_line_in_file
---
 pr_agent/algo/pr_processing.py             | 57 ++++++++++++++++++++--
 pr_agent/algo/utils.py                     | 13 ++---
 pr_agent/git_providers/github_provider.py  | 44 ++++++++++-------
 pr_agent/git_providers/gitlab_provider.py  |  2 +-
 pr_agent/settings/configuration.toml       |  4 +-
 pr_agent/settings/pr_reviewer_prompts.toml | 30 ++++++------
 pr_agent/tools/pr_reviewer.py              | 52 ++++++++++++--------
 7 files changed, 138 insertions(+), 64 deletions(-)

diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index 1a84f7361..d48b4bde9 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -1,8 +1,9 @@
 from __future__ import annotations
 
+import re
+import difflib
 import logging
-from typing import Callable, Tuple
-
+from typing import Callable, Tuple, List, Any, Sequence
 from github import RateLimitExceededException
 
 from pr_agent.algo import MAX_TOKENS
@@ -10,7 +11,7 @@
 from pr_agent.algo.language_handler import sort_files_by_main_languages
 from pr_agent.algo.token_handler import TokenHandler
 from pr_agent.config_loader import get_settings
-from pr_agent.git_providers.git_provider import GitProvider
+from pr_agent.git_providers.git_provider import GitProvider, FilePatchInfo
 
 DELETED_FILES_ = "Deleted files:\n"
 
@@ -217,3 +218,53 @@ async def retry_with_fallback_models(f: Callable):
             logging.warning(f"Failed to generate prediction with {model}: {e}")
             if i == len(all_models) - 1:  # If it's the last iteration
                 raise  # Re-raise the last exception
+
+
+def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], relevant_file: str,
+                                              relevant_line_in_file: str) -> Tuple[int, int]:
+    position = -1
+    absolute_position = -1
+    RE_HUNK_HEADER = re.compile(
+        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
+    for file in diff_files:
+        if file.filename.strip() == relevant_file:
+            patch = file.patch
+            patch_lines = patch.splitlines()
+
+            # try to find the line in the patch using difflib, with some margin of error
+            matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
+                                                                         file.patch.splitlines(), n=3, cutoff=0.95)
+            if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
+                relevant_line_in_file = matches_difflib[0]
+
+            delta = 0
+            for i, line in enumerate(patch_lines):
+
+                if line.startswith('@@'):
+                    delta = 0
+                    match = RE_HUNK_HEADER.match(line)
+                    start1, size1, start2, size2 = map(int, match.groups()[:4])
+                elif not line.startswith('-'):
+                    delta += 1
+
+                if relevant_line_in_file in line and line[0] != '-':
+                    position = i
+                    absolute_position = start2 + delta - 1
+                    break
+            if position == -1:
+                for i, line in enumerate(patch_lines):
+                    if line.startswith('@@'):
+                        delta = 0
+                        match = RE_HUNK_HEADER.match(line)
+                        start1, size1, start2, size2 = map(int, match.groups()[:4])
+                    elif not line.startswith('-'):
+                        delta += 1
+
+                    if relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line and line[
+                        0] != '-':
+                        # The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
+                        # it's a context line
+                        position = i
+                        absolute_position = start2 + delta - 1
+                        break
+    return position, absolute_position
diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index 2f4466137..264575bb2 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -40,7 +40,7 @@ def convert_to_markdown(output_data: dict) -> str:
         "Security concerns": "🔒",
         "General PR suggestions": "💡",
         "Insights from user's answers": "📝",
-        "Code suggestions": "🤖",
+        "Code feedback": "🤖",
     }
 
     for key, value in output_data.items():
@@ -50,12 +50,12 @@ def convert_to_markdown(output_data: dict) -> str:
             markdown_text += f"## {key}\n\n"
             markdown_text += convert_to_markdown(value)
         elif isinstance(value, list):
-            if key.lower() == 'code suggestions':
+            if key.lower() == 'code feedback':
                 markdown_text += "\n"  # just looks nicer with additional line breaks
             emoji = emojis.get(key, "")
             markdown_text += f"- {emoji} **{key}:**\n\n"
             for item in value:
-                if isinstance(item, dict) and key.lower() == 'code suggestions':
+                if isinstance(item, dict) and key.lower() == 'code feedback':
                     markdown_text += parse_code_suggestion(item)
                 elif item:
                     markdown_text += f"  - {item}\n"
@@ -100,7 +100,7 @@ def try_fix_json(review, max_iter=10, code_suggestions=False):
     Args:
     - review: A string containing the JSON message to be fixed.
     - max_iter: An integer representing the maximum number of iterations to try and fix the JSON message.
-    - code_suggestions: A boolean indicating whether to try and fix JSON messages with code suggestions.
+    - code_suggestions: A boolean indicating whether to try and fix JSON messages with code feedback.
 
     Returns:
     - data: A dictionary containing the parsed JSON data.
@@ -108,7 +108,7 @@ def try_fix_json(review, max_iter=10, code_suggestions=False):
     The function attempts to fix broken or incomplete JSON messages by parsing until the last valid code suggestion.
     If the JSON message ends with a closing bracket, the function calls the fix_json_escape_char function to fix the
     message.
-    If code_suggestions is True and the JSON message contains code suggestions, the function tries to fix the JSON
+    If code_suggestions is True and the JSON message contains code feedback, the function tries to fix the JSON
     message by parsing until the last valid code suggestion.
     The function uses regular expressions to find the last occurrence of "}," with any number of whitespaces or
     newlines.
@@ -128,7 +128,8 @@ def try_fix_json(review, max_iter=10, code_suggestions=False):
     else:
         closing_bracket = "]}}"
 
-    if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
+    if (review.rfind("'Code feedback': [") > 0 or review.rfind('"Code feedback": [') > 0) or \
+            (review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0) :
         last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
         valid_json = False
         iter_count = 0
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
index c72010377..f3018e866 100644
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@@ -1,4 +1,6 @@
 import logging
+import hashlib
+
 from datetime import datetime
 from typing import Optional, Tuple
 from urllib.parse import urlparse
@@ -10,6 +12,7 @@
 from .git_provider import FilePatchInfo, GitProvider, IncrementalPR
 from ..algo.language_handler import is_valid_file
 from ..algo.utils import load_large_diff
+from ..algo.pr_processing import find_line_number_of_relevant_line_in_file
 from ..config_loader import get_settings
 from ..servers.utils import RateLimitExceeded
 
@@ -148,22 +151,9 @@ def publish_comment(self, pr_comment: str, is_temporary: bool = False):
     def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
         self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)])
 
+
     def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
-        diff_files = self.get_diff_files()
-        position = -1
-        for file in diff_files:
-            if file.filename.strip() == relevant_file:
-                patch = file.patch
-                patch_lines = patch.splitlines()
-                for i, line in enumerate(patch_lines):
-                    if relevant_line_in_file in line:
-                        position = i
-                        break
-                    elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line:
-                        # The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
-                        # it's a context line
-                        position = i
-                        break
+        position = find_line_number_of_relevant_line_in_file(self.diff_files, relevant_file.strip('`'), relevant_line_in_file)
         if position == -1:
             if get_settings().config.verbosity_level >= 2:
                 logging.info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
@@ -171,8 +161,6 @@ def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_
         else:
             subject_type = "LINE"
         path = relevant_file.strip()
-        # placeholder for future API support (already supported in single inline comment)
-        # return dict(body=body, path=path, position=position, subject_type=subject_type)
         return dict(body=body, path=path, position=position) if subject_type == "LINE" else {}
 
     def publish_inline_comments(self, comments: list[dict]):
@@ -384,3 +372,25 @@ def get_commit_messages(self) -> str:
         except:
             commit_messages_str = ""
         return commit_messages_str
+
+    def generate_link_to_relevant_line_number(self, suggestion) -> str:
+        try:
+            relevant_file = suggestion['relevant file']
+            relevant_line_str = suggestion['relevant line']
+            position, absolute_position = find_line_number_of_relevant_line_in_file \
+                (self.diff_files, relevant_file.strip('`'), relevant_line_str)
+
+            if absolute_position != -1:
+                # # link to right file only
+                # link = f"https://github.com/{self.repo}/blob/{self.pr.head.sha}/{relevant_file}" \
+                #        + "#" + f"L{absolute_position}"
+
+                # link to diff
+                sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest()
+                link = f"https://github.com/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{absolute_position}"
+                return link
+        except Exception as e:
+            if get_settings().config.verbosity_level >= 2:
+                logging.info(f"Failed adding line link, error: {e}")
+
+        return ""
\ No newline at end of file
diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py
index d28d59b85..14d1d8835 100644
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@@ -344,4 +344,4 @@ def get_commit_messages(self) -> str:
             commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages_list)])
         except:
             commit_messages_str = ""
-        return commit_messages_str
+        return commit_messages_str
\ No newline at end of file
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 6a41b2b4d..cb9af7752 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -13,8 +13,8 @@ require_focused_review=true
 require_score_review=false
 require_tests_review=true
 require_security_review=true
-num_code_suggestions=0
-inline_code_comments = true
+num_code_suggestions=3
+inline_code_comments = false
 ask_and_reflect=false
 extra_instructions = ""
 
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 9a6babf44..613f13602 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -1,9 +1,9 @@
 [pr_review_prompt]
 system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
 Your task is to provide constructive and concise feedback for the PR, and also provide meaningfull code suggestions to improve the new PR code (the '+' lines).
-- Provide up to {{ num_code_suggestions }} code suggestions.
 {%- if num_code_suggestions > 0 %}
-- Try to focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningfull code improvements, like performance, vulnerability, modularity, and best practices.
+- Provide up to {{ num_code_suggestions }} code suggestions.
+- Try to focus on the most important suggestions, like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningfull code improvements, like performance, vulnerability, modularity, and best practices.
 - Suggestions should focus on improving the new added code lines.
 - Make sure not to provide suggestions repeating modifications already implemented in the new PR code (the '+' lines).
 {%- endif %}
@@ -24,7 +24,7 @@ You must use the following JSON schema to format your answer:
     },
     "Type of PR": {
       "type": "string",
-      "enum": ["Bug fix", "Tests", "Bug fix with tests", "Refactoring", "Enhancement", "Documentation", "Other"]
+      "enum": ["Bug fix", "Tests", "Refactoring", "Enhancement", "Documentation", "Other"]
     },
 {%- if require_score %}
     "Score": {
@@ -47,17 +47,17 @@ You must use the following JSON schema to format your answer:
 {%- if require_focused %}
     "Focused PR": {
       "type": "string",
-      "description": "Is this a focused PR, in the sense that it has a clear and coherent title and description, and all PR code diff changes are properly derived from the title and description? Explain your response."
+      "description": "Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly."
     }
   },
 {%- endif %}
   "PR Feedback": {
-    "General PR suggestions": {
+    "General suggestions": {
       "type": "string",
-      "description": "General suggestions and feedback for the contributors and maintainers of this PR. May include important suggestions for the overall structure, primary purpose, best practices, critical bugs, and other aspects of the PR. Explain your suggestions."
+      "description": "General suggestions and feedback for the contributors and maintainers of this PR. May include important suggestions for the overall structure, primary purpose, best practices, critical bugs, and other aspects of the PR. Don't address PR title and description, or lack of tests. Explain your suggestions."
     },
 {%- if num_code_suggestions > 0 %}
-    "Code suggestions": {
+    "Code feedback": {
       "type": "array",
       "maxItems": {{ num_code_suggestions }},
       "uniqueItems": true,
@@ -66,13 +66,13 @@ You must use the following JSON schema to format your answer:
           "type": "string",
           "description": "the relevant file full path"
         },
-        "suggestion content": {
+        "suggestion": {
           "type": "string",
           "description": "a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
         },
-        "relevant line in file": {
+        "relevant line": {
           "type": "string",
-          "description": "an authentic single code line from the PR git diff section, to which the suggestion applies."
+          "description": "a single code line taken from the relevant file, to which the suggestion applies. The line should be a '+' line. Make sure to output the line exactly as it appears in the relevant file"
         }
       }
     },
@@ -80,8 +80,8 @@ You must use the following JSON schema to format your answer:
 {%- if require_security %}
     "Security concerns": {
       "type": "string",
-      "description": "yes\\no question: does this PR code introduce possible security concerns or issues, like SQL injection, XSS, CSRF, and others ? explain your answer"
-       ? explain your answer"
+      "description": "yes\\no question: does this PR code introduce possible security concerns or issues, like SQL injection, XSS, CSRF, and others ? If answered 'yes', explain your answer shortly"
+       ? explain your answer shortly"
     }
 {%- endif %}
   }
@@ -109,11 +109,11 @@ Example output:
     {
         "General PR suggestions": "..., `xxx`...",
 {%- if num_code_suggestions > 0 %}
-        "Code suggestions": [
+        "Code feedback": [
             {
                 "relevant file": "directory/xxx.py",
-                "suggestion content": "xxx [important]",
-                "relevant line in file": "xxx",
+                "suggestion": "xxx [important]",
+                "relevant line": "xxx",
             },
             ...
         ]
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index e4d526b5c..3f8671f8d 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -7,7 +7,8 @@
 from jinja2 import Environment, StrictUndefined
 
 from pr_agent.algo.ai_handler import AiHandler
-from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
+from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, \
+    find_line_number_of_relevant_line_in_file
 from pr_agent.algo.token_handler import TokenHandler
 from pr_agent.algo.utils import convert_to_markdown, try_fix_json
 from pr_agent.config_loader import get_settings
@@ -160,27 +161,38 @@ def _prepare_pr_review(self) -> str:
         the feedback.
         """
         review = self.prediction.strip()
-    
+
         try:
             data = json.loads(review)
         except json.decoder.JSONDecodeError:
             data = try_fix_json(review)
 
         # Move 'Security concerns' key to 'PR Analysis' section for better display
-        if 'PR Feedback' in data and 'Security concerns' in data['PR Feedback']:
-            val = data['PR Feedback']['Security concerns']
-            del data['PR Feedback']['Security concerns']
-            data['PR Analysis']['Security concerns'] = val
-
-        # Filter out code suggestions that can be submitted as inline comments
-        if get_settings().config.git_provider != 'bitbucket' and get_settings().pr_reviewer.inline_code_comments \
-                and 'Code suggestions' in data['PR Feedback']:
-            data['PR Feedback']['Code suggestions'] = [
-                d for d in data['PR Feedback']['Code suggestions']
-                if any(key not in d for key in ('relevant file', 'relevant line in file', 'suggestion content'))
-            ]
-            if not data['PR Feedback']['Code suggestions']:
-                del data['PR Feedback']['Code suggestions']
+        pr_feedback = data.get('PR Feedback', {})
+        security_concerns = pr_feedback.get('Security concerns')
+        if security_concerns:
+            del pr_feedback['Security concerns']
+            data.setdefault('PR Analysis', {})['Security concerns'] = security_concerns
+
+        # 
+        if 'Code feedback' in pr_feedback:
+            code_feedback = pr_feedback['Code feedback']
+
+            # Filter out code suggestions that can be submitted as inline comments
+            if get_settings().pr_reviewer.inline_code_comments:
+                del pr_feedback['Code feedback']
+            else:
+                for suggestion in code_feedback:
+                    relevant_line_str = suggestion['relevant line'].split('\n')[0]
+
+                    # removing '+'
+                    suggestion['relevant line'] = relevant_line_str.lstrip('+').strip()
+
+                    # try to add line numbers link to code suggestions
+                    if hasattr(self.git_provider, 'generate_link_to_relevant_line_number'):
+                        link = self.git_provider.generate_link_to_relevant_line_number(suggestion)
+                        if link:
+                            suggestion['relevant line'] = f"[{suggestion['relevant line']}]({link})"
 
         # Add incremental review section
         if self.incremental.is_incremental:
@@ -205,7 +217,7 @@ def _prepare_pr_review(self) -> str:
         # Log markdown response if verbosity level is high
         if get_settings().config.verbosity_level >= 2:
             logging.info(f"Markdown response:\n{markdown_text}")
-    
+
         return markdown_text
 
     def _publish_inline_code_comments(self) -> None:
@@ -222,10 +234,10 @@ def _publish_inline_code_comments(self) -> None:
             data = try_fix_json(review)
 
         comments: List[str] = []
-        for suggestion in data.get('PR Feedback', {}).get('Code suggestions', []):
+        for suggestion in data.get('PR Feedback', {}).get('Code feedback', []):
             relevant_file = suggestion.get('relevant file', '').strip()
-            relevant_line_in_file = suggestion.get('relevant line in file', '').strip()
-            content = suggestion.get('suggestion content', '')
+            relevant_line_in_file = suggestion.get('relevant line', '').strip()
+            content = suggestion.get('suggestion', '')
             if not relevant_file or not relevant_line_in_file or not content:
                 logging.info("Skipping inline comment with missing file/line/content")
                 continue

From 7367c62cf917edf006986a9ccd8f6c8855cdaa2d Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Sun, 6 Aug 2023 08:31:15 +0300
Subject: [PATCH 2/2] TestFindLineNumberOfRelevantLineInFile

---
 pr_agent/algo/pr_processing.py                | 36 +++++++---
 ...nd_line_number_of_relevant_line_in_file.py | 68 +++++++++++++++++++
 2 files changed, 93 insertions(+), 11 deletions(-)
 create mode 100644 tests/unittest/test_find_line_number_of_relevant_line_in_file.py

diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index d48b4bde9..fb66583d5 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -3,7 +3,7 @@
 import re
 import difflib
 import logging
-from typing import Callable, Tuple, List, Any, Sequence
+from typing import Callable, Tuple, List, Any
 from github import RateLimitExceededException
 
 from pr_agent.algo import MAX_TOKENS
@@ -220,12 +220,25 @@ async def retry_with_fallback_models(f: Callable):
                 raise  # Re-raise the last exception
 
 
-def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], relevant_file: str,
+def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
+                                              relevant_file: str,
                                               relevant_line_in_file: str) -> Tuple[int, int]:
+    """
+    Find the line number and absolute position of a relevant line in a file.
+
+    Args:
+        diff_files (List[FilePatchInfo]): A list of FilePatchInfo objects representing the patches of files.
+        relevant_file (str): The name of the file where the relevant line is located.
+        relevant_line_in_file (str): The content of the relevant line.
+
+    Returns:
+        Tuple[int, int]: A tuple containing the line number and absolute position of the relevant line in the file.
+    """
     position = -1
     absolute_position = -1
-    RE_HUNK_HEADER = re.compile(
+    re_hunk_header = re.compile(
         r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
+
     for file in diff_files:
         if file.filename.strip() == relevant_file:
             patch = file.patch
@@ -233,16 +246,16 @@ def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], r
 
             # try to find the line in the patch using difflib, with some margin of error
             matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
-                                                                         file.patch.splitlines(), n=3, cutoff=0.95)
+                                                                         patch_lines, n=3, cutoff=0.93)
             if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
                 relevant_line_in_file = matches_difflib[0]
 
             delta = 0
+            start1, size1, start2, size2 = 0, 0, 0, 0
             for i, line in enumerate(patch_lines):
-
                 if line.startswith('@@'):
                     delta = 0
-                    match = RE_HUNK_HEADER.match(line)
+                    match = re_hunk_header.match(line)
                     start1, size1, start2, size2 = map(int, match.groups()[:4])
                 elif not line.startswith('-'):
                     delta += 1
@@ -251,18 +264,19 @@ def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], r
                     position = i
                     absolute_position = start2 + delta - 1
                     break
-            if position == -1:
+
+            if position == -1 and relevant_line_in_file[0] == '+':
+                no_plus_line = relevant_line_in_file[1:].lstrip()
                 for i, line in enumerate(patch_lines):
                     if line.startswith('@@'):
                         delta = 0
-                        match = RE_HUNK_HEADER.match(line)
+                        match = re_hunk_header.match(line)
                         start1, size1, start2, size2 = map(int, match.groups()[:4])
                     elif not line.startswith('-'):
                         delta += 1
 
-                    if relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line and line[
-                        0] != '-':
-                        # The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
+                    if no_plus_line in line and line[0] != '-':
+                        # The model might add a '+' to the beginning of the relevant_line_in_file even if originally
                         # it's a context line
                         position = i
                         absolute_position = start2 + delta - 1
diff --git a/tests/unittest/test_find_line_number_of_relevant_line_in_file.py b/tests/unittest/test_find_line_number_of_relevant_line_in_file.py
new file mode 100644
index 000000000..7488c6dff
--- /dev/null
+++ b/tests/unittest/test_find_line_number_of_relevant_line_in_file.py
@@ -0,0 +1,68 @@
+
+# Generated by CodiumAI
+from pr_agent.git_providers.git_provider import FilePatchInfo
+from pr_agent.algo.pr_processing import find_line_number_of_relevant_line_in_file
+
+
+import pytest
+
+class TestFindLineNumberOfRelevantLineInFile:
+    # Tests that the function returns the correct line number and absolute position when the relevant line is found in the patch
+    def test_relevant_line_found_in_patch(self):
+        diff_files = [
+            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+line2\n+relevant_line\n', filename='file1')
+        ]
+        relevant_file = 'file1'
+        relevant_line_in_file = 'relevant_line'
+        expected = (3, 2) # (position in patch, absolute_position in new file)
+        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
+
+    # Tests that the function returns the correct line number and absolute position when a similar line is found using difflib
+    def test_similar_line_found_using_difflib(self):
+        diff_files = [
+            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line in file similar match\n', filename='file1')
+        ]
+        relevant_file = 'file1'
+        relevant_line_in_file = '+relevant_line in file similar match ' # note the space at the end. This is to simulate a similar line found using difflib
+        expected = (2, 1)
+        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
+
+    # Tests that the function returns (-1, -1) when the relevant line is not found in the patch and no similar line is found using difflib
+    def test_relevant_line_not_found(self):
+        diff_files = [
+            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file1')
+        ]
+        relevant_file = 'file1'
+        relevant_line_in_file = 'not_found'
+        expected = (-1, -1)
+        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
+
+    # Tests that the function returns (-1, -1) when the relevant file is not found in any of the patches
+    def test_relevant_file_not_found(self):
+        diff_files = [
+            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file2')
+        ]
+        relevant_file = 'file1'
+        relevant_line_in_file = 'relevant_line'
+        expected = (-1, -1)
+        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
+
+    # Tests that the function returns (-1, -1) when the relevant_line_in_file is an empty string
+    def test_empty_relevant_line(self):
+        diff_files = [
+            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file1')
+        ]
+        relevant_file = 'file1'
+        relevant_line_in_file = ''
+        expected = (0, 0)
+        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
+
+    # Tests that the function returns (-1, -1) when the relevant_line_in_file is found in the patch but it is a deleted line
+    def test_relevant_line_found_but_deleted(self):
+        diff_files = [
+            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,2 +1,1 @@\n-line1\n-relevant_line\n', filename='file1')
+        ]
+        relevant_file = 'file1'
+        relevant_line_in_file = 'relevant_line'
+        expected = (-1, -1)
+        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
\ No newline at end of file