/
lex-filt.l
482 lines (413 loc) · 10.9 KB
/
lex-filt.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
%pointer
%s RULES RULE1 RULEX RULER STATES ACTIONS ACTION0 ACTION1 ACTION2 ACTION3 CODE CODE1 COMMENT
%a 20000
%n 10000
%o 30000
%p 25000
%{
/*
* $Id: lex-filt.l,v 1.87 2016/12/17 01:54:26 tom Exp $
*
* Filter to add vile "attribution" sequences to selected bits of LEX program.
*/
#include <filters.h>
#include <fltstack.h>
DefineFilter(lex);
#define NAME_LEX_PATTERN "LexPattern"
#define NAME_LEX_SECTION "LexSection"
#define NAME_LEX_STATES "LexStates"
static char *Action_attr;
static char *Comment_attr;
static char *Error_attr;
static char *Ident_attr;
static char *Keyword_attr;
static char *Number_attr;
static char *Preproc_attr;
static char *String_attr;
static char *Pattern_attr;
static char *Section_attr;
static char *States_attr;
static int section = 0;
static int nesting = 0;
static int bracket = 0;
static void end_action(void);
static void set_rules(void);
static void set_state(void);
static void write_patterns(char *text, int len);
static void write_states(char *text, int len);
%}
SPACE [[:blank:]]
IDENT [[:alpha:]_][[:alnum:]_]*
DIRECTIVE ^%([{}+*-]|{IDENT})
INTEGER [-+]?([[:digit:]]+)
SSTRING \'(\\.|[^'\\])*\'
DSTRING \"(\\.|[^"\\]|\\\r?\n)*\"
STRINGS ({SSTRING}|{DSTRING})
ESCAPED (\\[^\r\n])
UNQUOTED0 ([^{}"\\[\][:space:]<>])
UNQUOTED1 ([^{}"\\[\][:space:]])
QUOTED (\"([^"\\\r\n]|{ESCAPED})*\")
CCLASS ("[:"{IDENT}":]")
RANGE0 (\^[^\r\n])
RANGE1 ([^\r\n\]]|{CCLASS}|{ESCAPED})
RANGE ("["{RANGE0}?{RANGE1}*"]")
LIMITED ([[:digit:]]+([,][[:digit:]]+)*)
BRACED ("{"({IDENT}|{LIMITED}+)"}")
/*
* Combining all of these pieces makes the lex filter much
* larger than the other lex-based filters.
*/
PATTERN0 ({ESCAPED}|{BRACED}|{QUOTED}|{RANGE}|{UNQUOTED0})
PATTERN1 ({ESCAPED}|{BRACED}|{QUOTED}|{RANGE}|{UNQUOTED1})
PATTERN (({PATTERN0}|"("{PATTERN1}")")+|"<<EOF>>")
PATTERNS (({PATTERN0}|{PATTERN1}|"("{PATTERN1}")")+|"<<EOF>>")
STATES ("<"("*"|({IDENT}|\,)+)">")
%%
/*
* An entirely blank line should not affect the state.
*/
^{SPACE}+$ { ECHO; }
/*
* Lines in the patterns section beginning with whitespace
* are passed through to the output as-is. The main use for
* that is to allow inline comments. However, variables also
* can be declared.
*/
<RULES>^{SPACE}+ {
ECHO;
new_state(CODE1);
}
/*
* Handle comments. flex actually pays attention only to
* newlines and C-style comments. We handle "//" here to
* help with compiling its output.
*/
<RULES,ACTION0,ACTION3,CODE,CODE1>"//"[^\r\n]* {
WriteToken(Comment_attr);
}
<RULES,ACTION0,ACTION3,CODE,CODE1>"/*" {
PushQuote(COMMENT, Comment_attr);
}
<RULES,ACTIONS,CODE>^"%%"{SPACE}*[^[:space:]]+[^\r\n]* {
WriteToken(Comment_attr);
}
<RULES,ACTIONS,CODE>^"%%"{SPACE}* |
<RULES,ACTIONS,CODE>{DIRECTIVE} {
WriteToken(Section_attr);
switch(yytext[1]) {
case '%':
section++;
set_state();
break;
case '{':
new_state(CODE);
break;
case '}':
set_state();
break;
case 'S': /* FALLTHRU */
case 's': /* FALLTHRU */
case 'X': /* FALLTHRU */
case 'x':
new_state(STATES);
break;
case '+':
case '-':
case '*':
break;
default:
break;
}
}
<STATES>{IDENT} {
const char *attr = class_attr(yytext);
if (attr == 0) {
insert_keyword(yytext, NAME_LEX_STATES, 0);
attr = get_keyword_attr(yytext);
} else {
attr = Error_attr;
flt_error("Keyword \"%s\" is already a classname", yytext);
}
WriteToken(attr);
}
<STATES>{SPACE} { ECHO; }
<STATES>[^\r\n[:blank:]] { WriteToken(Error_attr);
flt_error("Expected newline or blanks");
}
<STATES>[\n] { ECHO; set_state(); }
<RULES>^{IDENT} {
if (set_symbol_table(NAME_LEX_PATTERN)) {
const char *attr = class_attr(yytext);
if (attr == 0) {
insert_keyword(yytext, Pattern_attr, 0);
attr = get_keyword_attr(yytext);
} else {
attr = Error_attr;
flt_error("Keyword \"%s\" is already a classname", yytext);
}
WriteToken(attr);
set_symbol_table(default_table);
} else {
WriteToken(Ident_attr);
}
new_state(RULE1);
}
<RULE1>{SPACE}+ { ECHO; new_state(RULEX); }
<RULEX>{PATTERNS} { write_patterns(yytext, yyleng); new_state(RULER); }
<RULER>[^\r\n]* { WriteToken(Error_attr);
flt_error("Expected newline");
}
<RULER>[\n] { ECHO; new_state(RULES); }
<ACTIONS>^{SPACE}+ { ECHO; new_state(CODE1); }
<ACTIONS>{SPACE}+ { ECHO; }
<ACTIONS>^{STATES} { write_states(yytext, yyleng); new_state(ACTION1); }
<ACTIONS>^{PATTERN} { write_patterns(yytext, yyleng); new_state(ACTION2); }
<ACTION0>^{SPACE}+ { ECHO; }
<ACTION0>{STATES} { write_states(yytext, yyleng); new_state(ACTION1); }
<ACTION0>{PATTERN} { write_patterns(yytext, yyleng); new_state(ACTION2); }
<ACTION1>"{" { WriteToken(Action_attr); ++bracket; new_state(ACTION0); }
<ACTIONS,ACTION0>"}" { if (bracket) { --bracket; WriteToken(Action_attr); end_action(); } else ECHO; }
<ACTION1>{PATTERN} { write_patterns(yytext, yyleng); new_state(ACTION2); }
<ACTION1>{SPACE}+ { ECHO; new_state(ACTION3); }
<ACTION1>[\n] { ECHO; end_action(); }
<ACTION2>{SPACE}+ { ECHO; new_state(ACTION3); }
<ACTION2>[\n] { ECHO; end_action(); }
<ACTION3>{IDENT} { WriteToken(get_keyword_attr(yytext)); new_state(CODE1); }
<ACTION3>\{ { ECHO; nesting = 1; new_state(CODE); }
<ACTION3>[\n] { ECHO; end_action(); }
<CODE,CODE1>{IDENT} { WriteToken(get_keyword_attr(yytext)); }
<CODE,CODE1>{STRINGS} { WriteToken(String_attr); }
<CODE,CODE1>{INTEGER} { WriteToken(Number_attr); }
<CODE1>[\n] { ECHO; set_state(); }
<CODE>\{ { ECHO; nesting++; }
<CODE>\} { ECHO; if (--nesting <= 0) { nesting = 0; set_rules(); } }
<COMMENT>[^*]* { flt_bfr_append(yytext, yyleng); }
<COMMENT>"*"+[^*/]* { flt_bfr_append(yytext, yyleng); }
<COMMENT>"*"+"/" { PopQuote(); }
<RULES,CODE>^{SPACE}*#{SPACE}*{IDENT}({SPACE}+(\<[^>]+\>|\"[^"]+\"))? {
WriteToken(Preproc_attr);
if (FLT_STATE == RULES)
new_state(CODE);
}
%%
#include <fltstack.h>
static void
end_action(void)
{
if (bracket)
new_state(ACTION0);
else
new_state(ACTIONS);
}
static void
set_state(void)
{
if (section >= 2) {
new_state(CODE);
} else if (section >= 1) {
end_action();
} else {
new_state(RULES);
}
}
static void
set_rules(void)
{
if (section >= 1)
set_state();
}
static void
write_1state(char *text, int len)
{
const char *attr = get_keyword_attr(text);
if (attr == 0) {
if (len == 1 && *text == '*') {
attr = Keyword_attr;
} else {
attr = Error_attr;
flt_error("Unknown state name \"%s\"", text);
}
}
flt_bfr_embed(text, len, attr);
}
/*
* FIXME: do this with lex states
*/
static void
write_states(char *text, int len)
{
int n;
char *next;
char *last;
if (text[0] == '<') { /* only happens if we have {STATES} */
flt_bfr_begin(Keyword_attr);
flt_bfr_append(text, 1);
++text;
--len;
if ((last = strchr(text, '>')) != 0)
*last = 0;
while ((next = strchr(text, ',')) != 0) {
*next = 0;
write_1state(text, (int) (next - text));
*next = ',';
len -= (int) (next - text);
text = next;
flt_bfr_append(text, 1);
++text;
--len;
}
if (last != 0) { /* ...or is confused with a {PATTERN} */
n = (int) (1 + last - text);
write_1state(text, n - 1);
*last = '>';
flt_bfr_append(last, 1);
}
flt_bfr_finish();
}
}
static int
ok_to_embed(char *text, int first, int last, int value)
{
return ((value != first
&& value < last
&& text[value] == R_CURLY)
? (value + 1)
: -1);
}
static int
parse_ident(char *text, int first, int last)
{
int n;
for (n = first; n < last; ++n) {
int ch = CharOf(text[n]);
int ok;
if (n == first) {
ok = isalpha(ch);
} else {
ok = isalnum(ch) || (ch == '_');
}
if (!ok)
break;
}
return ok_to_embed(text, first, last, n);
}
static int
parse_limits(char *text, int first, int last)
{
int n;
for (n = first; n < last; ++n) {
int ch = CharOf(text[n]);
int ok;
if (n == first) {
ok = isdigit(ch);
} else {
ok = isdigit(ch) || (ch == ',');
}
if (!ok)
break;
}
return ok_to_embed(text, first, last, n);
}
/*
* FIXME: do this with lex states
*/
static void
write_patterns(char *text, int len)
{
const char *attr;
int quoted = 0;
int escape = 0;
int ranges = 0;
int first, last, next;
set_symbol_table(NAME_LEX_PATTERN);
flt_bfr_begin(String_attr);
for (first = last = 0; last < len; ++last) {
int ch = CharOf(text[last]);
if (escape) {
escape = 0;
} else if (quoted) {
if (ch == DQUOTE)
quoted = 0;
} else if (ranges) {
if (ch == L_BLOCK) {
++ranges;
} else if (ch == R_BLOCK) {
--ranges;
}
} else {
if (ch == '\\') {
escape = 1;
} else if (ch == DQUOTE) {
quoted = 1;
} else if (ch == L_BLOCK) {
ranges = 1;
} else if (ch == L_CURLY) {
if ((next = parse_ident(text, last + 1, len)) > 0) {
int save = text[next - 1];
text[next - 1] = 0;
/*
* flex accepts forward-references to names, but this
* is a one-pass highlighter and cannot tell if a failure
* is a forward reference. But show an error anyway since
* it is more likely to be useful.
*/
flt_bfr_append(text + first, last - first);
if ((attr = get_keyword_attr(text + last + 1)) == 0) {
attr = Error_attr;
flt_error("Undefined name \"%s\"", text + last + 1);
}
text[next - 1] = (char) save;
flt_bfr_embed(text + last, next - last, attr);
first = next;
} else if ((next = parse_limits(text, last + 1, len)) >= 0) {
flt_bfr_append(text + first, last - first);
flt_bfr_embed(text + last, next - last, Number_attr);
first = next;
}
}
}
}
flt_bfr_append(text + first, len - first);
flt_bfr_finish();
set_symbol_table(default_table);
}
static void
init_filter(int before GCC_UNUSED)
{
(void) before;
}
static void
do_filter(FILE *inputs)
{
InitLEX(inputs);
section = 0;
nesting = 0;
Action_attr = class_attr(NAME_ACTION);
Comment_attr = class_attr(NAME_COMMENT);
Error_attr = class_attr(NAME_ERROR);
Ident_attr = class_attr(NAME_IDENT);
Keyword_attr = class_attr(NAME_KEYWORD);
Number_attr = class_attr(NAME_NUMBER);
Preproc_attr = class_attr(NAME_PREPROC);
String_attr = class_attr(NAME_LITERAL);
if ((Section_attr = class_attr(NAME_LEX_SECTION)) == 0)
Section_attr = Keyword_attr;
if ((States_attr = class_attr(NAME_LEX_STATES)) == 0)
insert_keyword(NAME_LEX_STATES, Keyword_attr, 0);
flt_make_symtab(NAME_LEX_PATTERN);
set_symbol_table(NAME_LEX_PATTERN);
if ((Pattern_attr = class_attr(NAME_LEX_PATTERN)) == 0)
Pattern_attr = String_attr;
set_symbol_table(default_table);
begin_state(RULES);
RunLEX();
flt_bfr_error();
end_state();
}
#if NO_LEAKS
static void
free_filter(void)
{
USE_LEXFREE;
}
#endif