feat: Add support for regex, prefix, and suffix markers in Marker class

elifarley · elifarley · commit 08cc4cb77523 · 2024-10-27T15:34:00.000-03:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ classifiers = [
 ]
 keywords = ["cedarscript", "code-editing", "refactoring", "code-analysis", "sql-like", "ai-assisted-development"]
 dependencies = [
-    "cedarscript-ast-parser==0.2.11",
+    "cedarscript-ast-parser==0.3.0",
     "grep-ast==0.3.3",
     "tree-sitter-languages==1.10.2",
 ]
diff --git a/src/cedarscript_editor/__init__.py b/src/cedarscript_editor/__init__.py
@@ -7,6 +7,7 @@
     "__version__", "find_commands", "CEDARScriptEditor"
 ]
 
+
 # TODO Move to cedarscript-ast-parser
 def find_commands(content: str):
     # Regex pattern to match CEDARScript blocks
diff --git a/src/text_manipulation/indentation_kit.py b/src/text_manipulation/indentation_kit.py
@@ -20,6 +20,10 @@
 from typing import NamedTuple
 
 
+def get_line_indent_count_from_lines(lines: Sequence[str], index: int) -> int:
+    return get_line_indent_count(lines[index])
+
+
 def get_line_indent_count(line: str) -> int:
     """
     Count the number of leading whitespace characters in a line.
diff --git a/src/text_manipulation/range_spec.py b/src/text_manipulation/range_spec.py
@@ -17,7 +17,7 @@
 
 from cedarscript_ast_parser import Marker, RelativeMarker, RelativePositionType, MarkerType, BodyOrWhole
 
-from .indentation_kit import get_line_indent_count
+from .indentation_kit import get_line_indent_count_from_lines
 
 MATCH_TYPES = ('exact', 'stripped', 'normalized', 'partial')
 
@@ -190,25 +190,71 @@ def from_line_marker(
             f"must be less than or equal to line count ({len(lines)})"
         )
 
-        for i in range(search_start_index, search_end_index):
-            line = lines[i]
-            reference_indent = get_line_indent_count(line)
+        marker_subtype = (search_term.marker_subtype or "").casefold()
+
+        match marker_subtype:
+            case 'number':  # Simple case: a line number
+                index = int(stripped_search) - 1
+                assert 0 <= index < len(lines), (
+                    f"Line number {stripped_search} out of bounds "
+                    f"(must be in interval [1, {len(lines)}])"
+                )
+                reference_indent = get_line_indent_count_from_lines(lines, index)
+                index += calc_index_delta_for_relative_position(search_term)
+                return cls(index, index, reference_indent)
 
-            # Check for exact match
-            if search_line == line:
-                matches['exact'].append((i, reference_indent))
+            case 'regex':
+                try:
+                    pattern = re.compile(search_line)
+                except re.error as e:
+                    raise ValueError(f"Invalid regex pattern '{search_line}': {e}")
 
-            # Check for stripped match
-            elif stripped_search == line.strip():
-                matches['stripped'].append((i, reference_indent))
+            case _:
+                pattern = None
 
-            # Check for normalized match
-            elif normalized_search_line == cls.normalize_line(line):
-                matches['normalized'].append((i, reference_indent))
+        # Not a line number, so we need to find all line matches
+        for i in range(search_start_index, search_end_index):
+            reference_indent = get_line_indent_count_from_lines(lines, i)
 
-            # Dangerous! Last resort!
-            elif normalized_search_line.casefold() in cls.normalize_line(line).casefold():
-                matches['partial'].append((i, reference_indent))
+            line = lines[i]
+            match marker_subtype:
+
+                case 'regex':
+                    if pattern.search(line) or pattern.search(line.strip()):
+                        matches['exact'].append((i, reference_indent))
+
+                case 'prefix':
+                    # Check for stripped prefix match
+                    if line.strip().startswith(stripped_search):
+                        matches['exact'].append((i, reference_indent))
+                    # Check for normalized prefix match
+                    elif cls.normalize_line(line).startswith(normalized_search_line):
+                        matches['normalized'].append((i, reference_indent))
+
+                case 'suffix':
+                    # Check for stripped suffix match
+                    if line.strip().endswith(stripped_search):
+                        matches['exact'].append((i, reference_indent))
+                    # Check for normalized suffix match
+                    elif cls.normalize_line(line).endswith(normalized_search_line):
+                        matches['normalized'].append((i, reference_indent))
+
+                case _:
+                    # Check for exact match
+                    if search_line == line:
+                        matches['exact'].append((i, reference_indent))
+
+                    # Check for stripped match
+                    elif stripped_search == line.strip():
+                        matches['stripped'].append((i, reference_indent))
+
+                    # Check for normalized match
+                    elif normalized_search_line == cls.normalize_line(line):
+                        matches['normalized'].append((i, reference_indent))
+
+                    # Dangerous! Last resort!
+                    elif normalized_search_line.casefold() in cls.normalize_line(line).casefold():
+                        matches['partial'].append((i, reference_indent))
 
         offset = search_term.offset or 0
         max_match_count = max([len(m) for m in matches.values()])
@@ -241,16 +287,7 @@ def from_line_marker(
                     case 'partial':
                         print(f"Note: Won't accept {match_type} match at index {index} for {search_term}")
                         continue
-                if isinstance(search_term, RelativeMarker):
-                    match search_term.qualifier:
-                        case RelativePositionType.BEFORE:
-                            index += -1
-                        case RelativePositionType.AFTER:
-                            index += 1
-                        case RelativePositionType.AT:
-                            pass
-                        case _ as invalid:
-                            raise ValueError(f"Not implemented: {invalid}")
+                index += calc_index_delta_for_relative_position(search_term)
                 return cls(index, index, reference_indent)
 
         return None
@@ -259,12 +296,29 @@ def from_line_marker(
 RangeSpec.EMPTY = RangeSpec(0, -1, 0)
 
 
+def calc_index_delta_for_relative_position(marker: Marker):
+    match marker:
+        case RelativeMarker(qualifier=RelativePositionType.BEFORE):
+            return -1
+        case RelativeMarker(qualifier=RelativePositionType.AFTER):
+            return 1
+        case RelativeMarker(qualifier=RelativePositionType.AT):
+            pass
+        case RelativeMarker(qualifier=invalid):
+            raise ValueError(f"Not implemented: {invalid}")
+        case _:
+            pass
+    return 0
+
+
 class ParentInfo(NamedTuple):
     parent_name: str
     parent_type: str
 
+
 ParentRestriction: TypeAlias = RangeSpec | str | None
 
+
 class IdentifierBoundaries(NamedTuple):
     """
     Represents the boundaries of an identifier in code, including its whole range and body range.
diff --git a/tests/corpus/x.update.identifier.insert.after.line-number/1.py b/tests/corpus/x.update.identifier.insert.after.line-number/1.py
@@ -0,0 +1,22 @@
+class A:
+    def a(self):
+        pass
+    def calculate(self,
+        a,
+        b,
+        c,
+        d,
+        e
+    ):
+        pass
+class B:
+    def a(self):
+        pass
+    def calculate(self,
+        a,
+        b,
+        c,
+        d,
+        e
+    ):
+        pass
diff --git a/tests/corpus/x.update.identifier.insert.after.line-number/chat.xml b/tests/corpus/x.update.identifier.insert.after.line-number/chat.xml
@@ -0,0 +1,11 @@
+<no-train>
+```CEDARScript
+UPDATE CLASS "B"
+FROM FILE "1.py"
+REPLACE LINE 1
+WITH CONTENT '''
+@0:def calculate(self, line_1,
+@1:line_2,
+''';
+```
+</no-train>
diff --git a/tests/corpus/x.update.identifier.insert.after.line-number/expected.1.py b/tests/corpus/x.update.identifier.insert.after.line-number/expected.1.py
@@ -0,0 +1,23 @@
+class A:
+    def a(self):
+        pass
+    def calculate(self,
+        a,
+        b,
+        c,
+        d,
+        e
+    ):
+        pass
+class B:
+    def a(self):
+        pass
+    def calculate(self, line_1,
+        line_2,
+        a,
+        b,
+        c,
+        d,
+        e
+    ):
+        pass

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ classifiers = [`
`22`	`22`	`]`
`23`	`23`	`keywords = ["cedarscript", "code-editing", "refactoring", "code-analysis", "sql-like", "ai-assisted-development"]`
`24`	`24`	`dependencies = [`
`25`		`- "cedarscript-ast-parser==0.2.11",`
	`25`	`+ "cedarscript-ast-parser==0.3.0",`
`26`	`26`	`"grep-ast==0.3.3",`
`27`	`27`	`"tree-sitter-languages==1.10.2",`
`28`	`28`	`]`
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`	`"__version__", "find_commands", "CEDARScriptEditor"`
`8`	`8`	`]`
`9`	`9`
	`10`	`+`
`10`	`11`	`# TODO Move to cedarscript-ast-parser`
`11`	`12`	`def find_commands(content: str):`
`12`	`13`	`# Regex pattern to match CEDARScript blocks`