/
codedit_str.lua
273 lines (238 loc) · 7.9 KB
/
codedit_str.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
--string module for codedit by Cosmin Apreutesei (unlicensed).
--deals specifically with tabs, spaces, lines and words.
local glue = require'glue'
local utf8 = require'utf8'
local str = glue.update({}, utf8)
--tabs and whitespace ----------------------------------------------------------------------------------------------------
--check for an ascii char at a byte index without string creation
function str.isascii(s, i, c)
assert(i >= 1 and i <= #s, 'out of range')
return s:byte(i) == c:byte(1)
end
--check if the char at byte index i is a tab
function str.istab(s, i)
return str.isascii(s, i, '\t')
end
--check if the char at byte index i is a space char
function str.isspacechar(s, i)
return str.isascii(s, i, ' ')
end
--check if the char at byte index i is a whitespace char
function str.isspace(s, i)
return str.isspacechar(s, i) or str.istab(s, i)
end
--char index of the next non-space char after some char (nil if none).
--if after_ci is ommited, the first non-space char in the string is returned.
function str.next_nonspace(s, after_ci)
after_ci = after_ci or 0
local ci = 0
for i in str.byte_indices(s) do
ci = ci + 1
if ci > after_ci and not str.isspace(s, i) then
return ci
end
end
end
--char index of the next double-space char after some char (nil if none).
--if after_ci is ommited, the first double-space char in the string is returned.
function str.next_double_space(s, after_ci)
after_ci = after_ci or 0
local ci = 0
local was_space
for i in str.byte_indices(s) do
ci = ci + 1
if ci > after_ci and str.isspace(s, i) then
if was_space then
return ci
else
was_space = true
end
else
was_space = false
end
end
end
--char index of the last non-space char before some char (nil if none).
--if before_ci is ommited, the last non-space char in the string is returned.
function str.prev_nonspace(s, before_ci)
before_ci = before_ci or 1/0
local ci = 0
local ns_ci
for i in str.byte_indices(s) do
ci = ci + 1
if ci >= before_ci then
return ns_ci
end
if not str.isspace(s, i) then
ns_ci = ci
end
end
return ns_ci
end
--left trim of space and tab characters
function str.ltrim(s)
local ns_ci = str.next_nonspace(s)
return ns_ci and str.sub(s, ns_ci) or ''
end
--right trim of space and tab characters
function str.rtrim(s)
local ns_ci = str.prev_nonspace(s)
return ns_ci and str.sub(s, 1, ns_ci) or ''
end
--number of tabs and of spaces in indentation
--TODO: use this
function str.indent_counts(s)
local tabs, spaces = 0, 0
for i in str.byte_indices(s) do
if str.istab(s, i) then
tabs = tabs + 1
elseif str.isspace(s, i) then
spaces = spaces + 1
else
break
end
end
return tabs, spaces
end
--lines ------------------------------------------------------------------------------------------------------------------
--return the index where the next line starts (unimportant) and the indices of the line starting at a given index.
--the last line is the substring after the last line terminator to the end of the string (see tests).
function str.next_line_indices(s, i)
i = i or 1
if i == #s + 1 then --string ended with newline, or string is empty: iterate one more empty line
return 1/0, i, i-1
elseif i > #s then
return
end
local j, nexti = s:match('^[^\r\n]*()\r?\n?()', i)
if nexti > #s and j == nexti then --string ends without a newline, mark that by setting nexti to inf
nexti = 1/0
end
return nexti, i, j-1
end
--iterate lines, returning the index where the next line starts (unimportant) and the indices of each line
function str.line_indices(s)
return str.next_line_indices, s
end
--return the index where the next line starts (unimportant) and the contents of the line starting at a given index.
--the last line is the substring after the last line terminator to the end of the string (see tests).
function str.next_line(s, i)
local _, i, j = str.next_line_indices(s, i)
if not _ then return end
return _, s:sub(i, j)
end
--iterate lines, returning the index where the next line starts (unimportant) and the contents of each line
function str.lines(s)
return str.next_line, s
end
function str.line_count(s)
local n = 0
for _ in str.line_indices(s) do
n = n + 1
end
return n
end
--words ------------------------------------------------------------------------------------------------------------------
function str.isword(s, i, word_chars)
return s:find(word_chars, i) ~= nil
end
--from a char index, search forwards for:
--1) 1..n spaces followed by a non-space char
--2) 1..n word chars or non-word chars follwed by case 1
--3) 1..n word chars followed by a non-word char
--4) 1..n non-word chars followed by a word char
--if the next break should be on a different line, return nil.
function str.next_word_break(s, first_ci, word_chars)
if first_ci < 1 then return 1 end
local firsti = str.byte_index(s, first_ci)
if not firsti then return end
local expect = str.isspace(s, firsti) and 'space' or str.isword(s, firsti, word_chars) and 'word' or 'nonword'
local ci = first_ci
for i in str.byte_indices(s, firsti) do
ci = ci + 1
if expect == 'space' then --case 1
if not str.isspace(s, i) then --case 1 exit
return ci
end
elseif str.isspace(s, i) then --case 2 -> case 1
expect = 'space'
elseif expect ~= (str.isword(s, i, word_chars) and 'word' or 'nonword') then --case 3 and 4 exit
return ci
end
end
return ci + 1
end
--from a char index, search backwards for:
--1) 1..n spaces followed by 1..n words or non-words
--2) 1 words or non-words followed by case 1
--3) 2..n words or non-words follwed by a char of a differnt class
--in other words: look back until the char type changes from the type at firsti or of the prev. char, and skip spaces.
--if the prev. break should be on a different line, return nil.
function str.prev_word_break(s, first_ci, word_chars)
if first_ci <= 1 then return end
local firsti = str.byte_index(s, first_ci)
local expect = not firsti and 'prev' or
(str.isspace(s, firsti) and 'space' or str.isword(s, firsti, word_chars) and 'word' or 'nonword')
local lasti = firsti
local ci = first_ci
for i in str.byte_indices_reverse(s, firsti) do
ci = ci - 1
if expect == 'space' then
if not str.isspace(s, i) then
expect = str.isword(s, i, word_chars) and 'word' or 'nonword'
end
elseif expect ~= (str.isspace(s, i) and 'space' or str.isword(s, i, word_chars) and 'word' or 'nonword') then
if lasti == firsti then
expect =
str.isspace(s, i) and 'space' or
str.isword(s, i, word_chars) and 'word' or 'nonword'
else
return ci + 1
end
end
lasti = i
end
return 1
end
--tests ------------------------------------------------------------------------------------------------------------------
if not ... then
assert(str.next_nonspace('') == nil)
assert(str.next_nonspace(' ') == nil)
assert(str.next_nonspace(' x') == 2)
assert(str.next_nonspace(' x ') == 2)
assert(str.next_nonspace('x ') == 1)
assert(str.prev_nonspace('') == nil)
assert(str.prev_nonspace(' ') == nil)
assert(str.prev_nonspace('x') == 1)
assert(str.prev_nonspace('x ') == 1)
assert(str.prev_nonspace(' x ') == 2)
assert(str.rtrim('abc \t ') == 'abc')
assert(str.rtrim(' \t abc x \t ') == ' \t abc x')
assert(str.rtrim('abc') == 'abc')
assert(str.rtrim(' ') == '')
assert(str.rtrim('') == '')
local function assert_lines(s, t)
local i = 0
local dt = {}
for _,s in str.lines(s) do
i = i + 1
assert(t[i] == s, i .. ': "' .. s .. '" ~= "' .. tostring(t[i]) .. '"')
dt[i] = s
end
assert(i == #t, i .. ' ~= ' .. #t .. ': ' .. table.concat(dt, ', '))
end
assert_lines('', {''})
assert_lines(' ', {' '})
assert_lines('x\ny', {'x', 'y'})
assert_lines('x\ny\n', {'x', 'y', ''})
assert_lines('x\n\ny', {'x', '', 'y'})
assert_lines('\n', {'', ''})
assert_lines('\n\r\n', {'','',''})
assert_lines('\r\n\n', {'','',''})
assert_lines('\n\r', {'','',''})
assert_lines('\n\r\n\r', {'','','',''})
assert_lines('\n\n\r', {'','','',''})
--TODO: next_word_break, prev_word_break
end
if not ... then require'codedit_demo' end
return str