Skip to content

Commit c59bb6f

Browse files
committed
indentation_kit.py: fix in char_count calculation.
Now analyses the deltas in the sorted space counts first, and only then checks GCD
1 parent 11b0c21 commit c59bb6f

File tree

5 files changed

+336
-55
lines changed

5 files changed

+336
-55
lines changed

src/cedarscript_editor/cedarscript_editor.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,8 @@ def _update_command(self, cmd: UpdateCommand):
146146
content_range = restrict_search_range_for_marker(
147147
region, action, lines, RangeSpec.EMPTY, identifier_finder
148148
)
149-
content = content_range.read(lines)
150-
content = IndentationInfo.from_content(lines).update_min_indent_level(content).shift_indentation(
151-
content, search_range.indent, relindent_level
149+
content = IndentationInfo.shift_indentation(
150+
content_range.read(lines), lines, search_range.indent, relindent_level
152151
)
153152
content = (region, content)
154153
case _:
@@ -158,9 +157,8 @@ def _update_command(self, cmd: UpdateCommand):
158157
# region, action, lines, RangeSpec.EMPTY, identifier_finder
159158
# )
160159
# TODO Are the 3 lines above needed?
161-
content = move_src_range.read(lines)
162-
content = IndentationInfo.from_content(lines).update_min_indent_level(content).shift_indentation(
163-
content, search_range.indent, relindent_level
160+
content = IndentationInfo.shift_indentation(
161+
move_src_range.read(lines), lines, search_range.indent, relindent_level
164162
)
165163
case DeleteClause():
166164
pass

src/text_manipulation/indentation_kit.py

Lines changed: 69 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,54 @@ class IndentationInfo(NamedTuple):
110110
def default(cls) -> 'IndentationInfo':
111111
return cls(4, ' ', 0)
112112

113+
@classmethod
114+
def shift_indentation(cls,
115+
content: Sequence[str], target_lines: Sequence[str], target_reference_indentation_count: int,
116+
relindent_level: int | None
117+
) -> list[str]:
118+
"""
119+
Returns 'content' with shifted indentation based on a relative indent level and a reference indentation count.
120+
121+
This method adjusts the indentation of each non-empty line in the input sequence.
122+
It calculates the difference between the target base indentation and the minimum
123+
indentation found in the content, then applies this shift to all lines.
124+
125+
Args:
126+
content (Sequence[str]): A sequence of strings representing the lines to be adjusted.
127+
target_reference_indentation_count (int): The target base indentation count to adjust to.
128+
relindent_level (int|None):
129+
130+
Returns:
131+
list[str]: A new list of strings with adjusted indentation.
132+
133+
Note:
134+
- Empty lines and lines with only whitespace are preserved as-is.
135+
- The method uses the IndentationInfo of the instance to determine
136+
the indentation character and count.
137+
- This method is useful for uniformly adjusting indentation across all lines.
138+
139+
Example:
140+
>>> info = IndentationInfo(4, ' ', 1, True)
141+
>>> lines = [" def example():", " print('Hello')"]
142+
>>> info.shift_indentation(content, 8)
143+
[' def example():', ' print('Hello')']
144+
"""
145+
context_indent_char_count = cls.from_content(target_lines).char_count
146+
return (cls.
147+
from_content(content).
148+
_replace(char_count=context_indent_char_count).
149+
_shift_indentation(
150+
content, target_lines, target_reference_indentation_count, relindent_level
151+
)
152+
)
153+
154+
def _shift_indentation(self,
155+
content: Sequence[str], target_lines: Sequence[str], target_base_indentation_count: int, relindent_level: int | None
156+
) -> list[str]:
157+
target_base_indentation_count += self.char_count * (relindent_level or 0)
158+
raw_line_adjuster = self._shift_indentation_fun(target_base_indentation_count)
159+
return [raw_line_adjuster(line) for line in content]
160+
113161
@classmethod
114162
def from_content(cls, content: str | Sequence[str]) -> 'IndentationInfo':
115163
"""
@@ -132,14 +180,16 @@ def from_content(cls, content: str | Sequence[str]) -> 'IndentationInfo':
132180
character count by analyzing patterns and using GCD.
133181
"""
134182
# TODO Always send str?
135-
lines = [x.lstrip() for x in content.splitlines() if x.strip()] if isinstance(content, str) else content
183+
lines = [x for x in content.splitlines() if x.strip()] if isinstance(content, str) else content
136184

137185
indentations = [extract_indentation(line) for line in lines if line.strip()]
186+
has_zero_indent = any((i == '' for i in indentations))
187+
indentations = [indent for indent in indentations if indent]
138188

139189
if not indentations:
140190
return cls(4, ' ', 0, True, "No indentation found. Assuming 4 spaces (PEP 8).")
141191

142-
indent_chars = Counter(indent[0] for indent in indentations if indent)
192+
indent_chars = Counter(indent[0] for indent in indentations)
143193
dominant_char = ' ' if indent_chars.get(' ', 0) >= indent_chars.get('\t', 0) else '\t'
144194

145195
indent_lengths = [len(indent) for indent in indentations]
@@ -148,20 +198,26 @@ def from_content(cls, content: str | Sequence[str]) -> 'IndentationInfo':
148198
char_count = 1
149199
else:
150200
# For spaces, determine the most likely char_count
151-
space_counts = [sc for sc in indent_lengths if sc % 2 == 0 and sc > 0]
201+
space_counts = [sc for sc in indent_lengths if sc % 2 == 0]
152202
if not space_counts:
153203
char_count = 2 # Default to 2 if no even space counts
154204
else:
155-
# Sort top 5 space counts and find the largest GCD
156-
sorted_counts = sorted([c[0] for c in Counter(space_counts).most_common(5)], reverse=True)
157-
char_count = sorted_counts[0]
158-
for i in range(1, len(sorted_counts)):
159-
new_gcd = gcd(char_count, sorted_counts[i])
160-
if new_gcd <= 1:
161-
break
162-
char_count = new_gcd
163-
164-
min_indent_chars = min(indent_lengths) if indent_lengths else 0
205+
unique_space_counts = sorted(set(space_counts))
206+
deltas = sorted([b - a for a, b in zip(unique_space_counts, unique_space_counts[1:])], reverse=True)
207+
most_common_deltas = Counter(deltas).most_common(5)
208+
ratio_most_common = most_common_deltas[0][1] / len(deltas)
209+
if ratio_most_common > .6:
210+
char_count = most_common_deltas[0][0]
211+
else:
212+
char_count = deltas[0]
213+
# find the largest GCD
214+
for i in range(1, len(most_common_deltas)):
215+
new_gcd = gcd(char_count, most_common_deltas[i][0])
216+
if new_gcd <= 1:
217+
break
218+
char_count = new_gcd
219+
220+
min_indent_chars = 0 if has_zero_indent else min(indent_lengths) if indent_lengths else 0
165221
min_indent_level = min_indent_chars // char_count
166222

167223
consistency = all(len(indent) % char_count == 0 for indent in indentations if indent)
@@ -217,42 +273,6 @@ def level_to_chars(self, level: int) -> str:
217273
"""
218274
return level * self.char_count * self.char
219275

220-
# TODO Revise
221-
def shift_indentation(
222-
self, lines: Sequence[str], target_base_indentation_count: int, relindent_level: int | None
223-
) -> list[str]:
224-
"""
225-
Shift the indentation of a sequence of lines based on a target base indentation count.
226-
227-
This method adjusts the indentation of each non-empty line in the input sequence.
228-
It calculates the difference between the target base indentation and the minimum
229-
indentation found in the content, then applies this shift to all lines.
230-
231-
Args:
232-
lines (Sequence[str]): A sequence of strings representing the lines to be adjusted.
233-
target_base_indentation_count (int): The target base indentation count to adjust to.
234-
relindent_level (int|None):
235-
236-
Returns:
237-
list[str]: A new list of strings with adjusted indentation.
238-
239-
Note:
240-
- Empty lines and lines with only whitespace are preserved as-is.
241-
- The method uses the IndentationInfo of the instance to determine
242-
the indentation character and count.
243-
- This method is useful for uniformly adjusting indentation across all lines.
244-
245-
Example:
246-
>>> info = IndentationInfo(4, ' ', 1, True)
247-
>>> lines = [" def example():", " print('Hello')"]
248-
>>> info.shift_indentation(lines, 8)
249-
[' def example():', ' print('Hello')']
250-
"""
251-
target_base_indentation_count += self.char_count * (relindent_level or 0)
252-
raw_line_adjuster = self._shift_indentation_fun(target_base_indentation_count)
253-
# Return the transformed lines
254-
return [raw_line_adjuster(line) for line in lines]
255-
256276
def _shift_indentation_fun(self, target_base_indentation_count: int):
257277
# Calculate the indentation difference
258278
level_difference = self.level_difference(target_base_indentation_count)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# RemovedInDjango50Warning
2+
# Copyright (c) 2010 Guilherme Gondim. All rights reserved.
3+
# Copyright (c) 2009 Simon Willison. All rights reserved.
4+
# Copyright (c) 2002 Drew Perttula. All rights reserved.
5+
#
6+
# License:
7+
# Python Software Foundation License version 2
8+
#
9+
# See the file "LICENSE" for terms & conditions for usage, and a DISCLAIMER OF
10+
# ALL WARRANTIES.
11+
#
12+
# This Baseconv distribution contains no GNU General Public Licensed (GPLed)
13+
# code so it may be used in proprietary projects just like prior ``baseconv``
14+
# distributions.
15+
#
16+
# All trademarks referenced herein are property of their respective holders.
17+
#
18+
19+
"""
20+
Convert numbers from base 10 integers to base X strings and back again.
21+
22+
Sample usage::
23+
24+
>>> base20 = BaseConverter('0123456789abcdefghij')
25+
>>> base20.encode(1234)
26+
'31e'
27+
>>> base20.decode('31e')
28+
1234
29+
>>> base20.encode(-1234)
30+
'-31e'
31+
>>> base20.decode('-31e')
32+
-1234
33+
>>> base11 = BaseConverter('0123456789-', sign='$')
34+
>>> base11.encode(-1234)
35+
'$-22'
36+
>>> base11.decode('$-22')
37+
-1234
38+
39+
"""
40+
import warnings
41+
42+
from django.utils.deprecation import RemovedInDjango50Warning
43+
44+
warnings.warn(
45+
"The django.utils.baseconv module is deprecated.",
46+
category=RemovedInDjango50Warning,
47+
stacklevel=2,
48+
)
49+
50+
BASE2_ALPHABET = "01"
51+
BASE16_ALPHABET = "0123456789ABCDEF"
52+
BASE56_ALPHABET = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnpqrstuvwxyz"
53+
BASE36_ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
54+
BASE62_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
55+
BASE64_ALPHABET = BASE62_ALPHABET + "-_"
56+
57+
58+
class BaseConverter:
59+
decimal_digits = "0123456789"
60+
61+
def __init__(self, digits, sign="-"):
62+
self.sign = sign
63+
self.digits = digits
64+
if sign in self.digits:
65+
raise ValueError("Sign character found in converter base digits.")
66+
67+
def __repr__(self):
68+
return "<%s: base%s (%s)>" % (
69+
self.__class__.__name__,
70+
len(self.digits),
71+
self.digits,
72+
)
73+
74+
def encode(self, i):
75+
neg, value = self.convert(i, self.decimal_digits, self.digits, "-")
76+
if neg:
77+
return self.sign + value
78+
return value
79+
80+
def decode(self, s):
81+
neg, value = self.convert(s, self.digits, self.decimal_digits, self.sign)
82+
if neg:
83+
value = "-" + value
84+
return int(value)
85+
86+
def convert(self, number, from_digits, to_digits, sign):
87+
if str(number)[0] == sign:
88+
number = str(number)[1:]
89+
neg = 1
90+
else:
91+
neg = 0
92+
93+
# make an integer out of the number
94+
x = 0
95+
for digit in str(number):
96+
x = x * len(from_digits) + from_digits.index(digit)
97+
98+
# create the result in base 'len(to_digits)'
99+
if x == 0:
100+
res = to_digits[0]
101+
else:
102+
res = ""
103+
while x > 0:
104+
digit = x % len(to_digits)
105+
res = to_digits[digit] + res
106+
x = int(x // len(to_digits))
107+
return neg, res
108+
109+
110+
base2 = BaseConverter(BASE2_ALPHABET)
111+
base16 = BaseConverter(BASE16_ALPHABET)
112+
base36 = BaseConverter(BASE36_ALPHABET)
113+
base56 = BaseConverter(BASE56_ALPHABET)
114+
base62 = BaseConverter(BASE62_ALPHABET)
115+
base64 = BaseConverter(BASE64_ALPHABET, sign="$")
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<no-train>
2+
```CEDARScript
3+
-- Move the `convert` method from the `BaseConverter` class, placing it at the top level, just before the line where its class starts
4+
UPDATE CLASS "BaseConverter"
5+
FROM FILE "baseconv.py"
6+
MOVE FUNCTION "convert"
7+
INSERT BEFORE CLASS "BaseConverter"
8+
RELATIVE INDENTATION 0;
9+
10+
-- Update the copied function to remove references to `self`
11+
UPDATE FUNCTION "convert"
12+
FROM FILE "baseconv.py"
13+
REPLACE LINE "def convert(self, number, from_digits, to_digits, sign):"
14+
WITH CONTENT '''
15+
@0:def convert(number, from_digits, to_digits, sign):
16+
''';
17+
18+
-- Update ALL call sites of the method `convert` to call the new top-level function with the same name
19+
UPDATE FUNCTION "encode"
20+
FROM FILE "baseconv.py"
21+
REPLACE LINE "neg, value = self.convert(i, self.decimal_digits, self.digits, \"-\")"
22+
WITH CONTENT '''
23+
@0:neg, value = convert(i, self.decimal_digits, self.digits, "-")
24+
''';
25+
UPDATE FUNCTION "decode"
26+
FROM FILE "baseconv.py"
27+
REPLACE LINE "neg, value = self.convert(s, self.digits, self.decimal_digits, self.sign)"
28+
WITH CONTENT '''
29+
@0:neg, value = convert(s, self.digits, self.decimal_digits, self.sign)
30+
''';
31+
32+
```
33+
</no-train>

0 commit comments

Comments
 (0)