Skip to content

Commit 9a4472c

Browse files
committed
[mlir] Add basic tree-sitter grammar file
tree-sitter grammar file that tries to closely matches LangRef (it could use some tweaking and cleanup, but kept fairly basic). Also updated LangRef in places where found some issues while doing the nearly direct transcription. This only adds a grammar file, not all the other parts (npm etc) that accompanies it. Those I'll propose for separate repo like we do for vscode extension. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D124352
1 parent c38344d commit 9a4472c

File tree

3 files changed

+276
-4
lines changed

3 files changed

+276
-4
lines changed

mlir/docs/LangRef.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ Syntax:
201201
bare-id ::= (letter|[_]) (letter|digit|[_$.])*
202202
bare-id-list ::= bare-id (`,` bare-id)*
203203
value-id ::= `%` suffix-id
204+
alias-name :: = bare-id
204205
suffix-id ::= (digit+ | ((letter|id-punct) (letter|id-punct|digit)*))
205206
206207
symbol-ref-id ::= `@` (suffix-id | string-literal) (`::` symbol-ref-id)?
@@ -295,7 +296,7 @@ custom-operation ::= bare-id custom-operation-format
295296
op-result-list ::= op-result (`,` op-result)* `=`
296297
op-result ::= value-id (`:` integer-literal)
297298
successor-list ::= `[` successor (`,` successor)* `]`
298-
successor ::= caret-id (`:` bb-arg-list)?
299+
successor ::= caret-id (`:` block-arg-list)?
299300
region-list ::= `(` region (`,` region)* `)`
300301
dictionary-attribute ::= `{` (attribute-entry (`,` attribute-entry)*)? `}`
301302
trailing-location ::= (`loc` `(` location `)`)?
@@ -645,9 +646,12 @@ type-list-parens ::= `(` `)`
645646
646647
// This is a common way to refer to a value with a specified type.
647648
ssa-use-and-type ::= ssa-use `:` type
649+
ssa-use ::= value-use
648650
649651
// Non-empty list of names and types.
650652
ssa-use-and-type-list ::= ssa-use-and-type (`,` ssa-use-and-type)*
653+
654+
function-type ::= (type | type-list-parens) `->` (type | type-list-parens)
651655
```
652656

653657
### Type Aliases
@@ -693,10 +697,9 @@ pretty-dialect-item-contents ::= pretty-dialect-item-body
693697
| '(' pretty-dialect-item-contents+ ')'
694698
| '[' pretty-dialect-item-contents+ ']'
695699
| '{' pretty-dialect-item-contents+ '}'
696-
| '[^[<({>\])}\0]+'
700+
| '[^\[<({\]>)}\0]+'
697701
698-
dialect-type ::= '!' opaque-dialect-item
699-
dialect-type ::= '!' pretty-dialect-item
702+
dialect-type ::= '!' (opaque-dialect-item | pretty-dialect-item)
700703
```
701704

702705
Dialect types can be specified in a verbose form, e.g. like this:

mlir/utils/tree-sitter-mlir/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
tree-sitter-mlir
2+
================
3+
4+
Basic [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for
5+
MLIR following the [lang-ref](https://mlir.llvm.org/docs/LangRef/).
6+
7+
Note: the directory in [LLVM repo](https://github.com/llvm/llvm-project/)
8+
merely contains the grammar file(s) and not the NPM/generated code.
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
module.exports = grammar({
2+
name : 'mlir',
3+
extras : $ => [/\s/,
4+
$.comment,
5+
],
6+
conflicts : $ => [],
7+
rules : {
8+
// Top level production:
9+
// (operation | attribute-alias-def | type-alias-def)
10+
toplevel : $ => seq(choice(
11+
$.operation,
12+
$.attribute_alias_def,
13+
$.type_alias_def,
14+
)),
15+
16+
// Common syntax (lang-ref)
17+
// digit ::= [0-9]
18+
// hex_digit ::= [0-9a-fA-F]
19+
// letter ::= [a-zA-Z]
20+
// id-punct ::= [$._-]
21+
//
22+
// integer-literal ::= decimal-literal | hexadecimal-literal
23+
// decimal-literal ::= digit+
24+
// hexadecimal-literal ::= `0x` hex_digit+
25+
// float-literal ::= [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
26+
// string-literal ::= `"` [^"\n\f\v\r]* `"` TODO: define escaping rules
27+
//
28+
_digit : $ => /[0-9]/,
29+
_hex_digit : $ => /[0-9a-fA-F]/,
30+
integer_literal : $ => choice($._decimal_literal, $._hexadecimal_literal),
31+
_decimal_literal : $ => repeat1($._digit),
32+
_hexadecimal_literal : $ => seq('0x', repeat1($._hex_digit)),
33+
float_literal : $ => token(
34+
seq(optional(/[-+]/), repeat1(/[0_9]/),
35+
optional(seq('.', repeat(/[0-9]/),
36+
optional(seq(/[eE]/, optional(/[-+]/),
37+
repeat1(/[0-9]/))))))),
38+
string_literal : $ => seq(
39+
'"',
40+
repeat(token.immediate(prec(1, /[^\\"\n\f\v\r]+/))),
41+
'"',
42+
),
43+
44+
// Identifiers
45+
// bare-id ::= (letter|[_]) (letter|digit|[_$.])*
46+
// bare-id-list ::= bare-id (`,` bare-id)*
47+
// value-id ::= `%` suffix-id
48+
// suffix-id ::= (digit+ | ((letter|id-punct) (letter|id-punct|digit)*))
49+
// alias-name :: = bare-id
50+
//
51+
// symbol-ref-id ::= `@` (suffix-id | string-literal) (`::`
52+
// symbol-ref-id)?
53+
// value-id-list ::= value-id (`,` value-id)*
54+
//
55+
// // Uses of value, e.g. in an operand list to an operation.
56+
// value-use ::= value-id
57+
// value-use-list ::= value-use (`,` value-use)*
58+
bare_id : $ => seq(token(/[a-zA-Z_]/),
59+
token.immediate(repeat(/[a-zA-Z0-9_$]/))),
60+
bare_id_list : $ => seq($.bare_id, repeat(seq(',', $.bare_id))),
61+
value_id : $ => seq('%', $._suffix_id),
62+
alias_name : $ => $.bare_id,
63+
_suffix_id : $ => choice(repeat1(/[0-9]/),
64+
seq(/[a-zA-Z_$.]/, repeat(/[a-zA-Z0-9_$.]/))),
65+
symbol_ref_id : $ => seq('@', choice($._suffix_id, $.string_literal),
66+
optional(seq('::', $.symbol_ref_id))),
67+
value_use : $ => $.value_id,
68+
value_use_list : $ => seq($.value_use, repeat(seq(',', $.value_use))),
69+
70+
// Operations
71+
// operation ::= op-result-list? (generic-operation |
72+
// custom-operation)
73+
// trailing-location?
74+
// generic-operation ::= string-literal `(` value-use-list? `)`
75+
// successor-list?
76+
// region-list? dictionary-attribute? `:`
77+
// function-type
78+
// custom-operation ::= bare-id custom-operation-format
79+
// op-result-list ::= op-result (`,` op-result)* `=`
80+
// op-result ::= value-id (`:` integer-literal)
81+
// successor-list ::= `[` successor (`,` successor)* `]`
82+
// successor ::= caret-id (`:` bb-arg-list)?
83+
// region-list ::= `(` region (`,` region)* `)`
84+
// dictionary-attribute ::= `{` (attribute-entry (`,` attribute-entry)*)?
85+
// `}`
86+
// trailing-location ::= (`loc` `(` location `)`)?
87+
operation : $ => seq(optional($.op_result_list),
88+
choice($.generic_operation, $.custom_operation),
89+
optional($.trailing_location)),
90+
generic_operation : $ =>
91+
seq($.string_literal, '(', optional($.value_use_list),
92+
')', optional($.successor_list),
93+
optional($.region_list),
94+
optional($.dictionary_attribute), ':',
95+
$.function_type),
96+
// custom-operation rule is defined later in the grammar, post the generic.
97+
op_result_list : $ => seq($.op_result, repeat(seq(',', $.op_result)), '='),
98+
op_result : $ => seq($.value_id, optional(seq(':', $.integer_literal))),
99+
successor_list : $ => seq('[', $.successor, repeat(seq(',', $.successor)),
100+
']'),
101+
successor : $ => seq($.caret_id, optional(seq(':', $.block_arg_list))),
102+
region_list : $ => seq('(', $.region, repeat(seq(',', $.region)), ')'),
103+
dictionary_attribute : $ => seq(
104+
'{',
105+
optional(seq($.attribute_entry,
106+
repeat(seq(',', $.attribute_entry)))),
107+
'}'),
108+
trailing_location : $ => seq('loc(', $.location, ')'),
109+
// TODO: Complete location forms.
110+
location : $ => $.string_literal,
111+
112+
// Blocks
113+
// block ::= block-label operation+
114+
// block-label ::= block-id block-arg-list? `:`
115+
// block-id ::= caret-id
116+
// caret-id ::= `^` suffix-id
117+
// value-id-and-type ::= value-id `:` type
118+
//
119+
// // Non-empty list of names and types.
120+
// value-id-and-type-list ::= value-id-and-type (`,` value-id-and-type)*
121+
//
122+
// block-arg-list ::= `(` value-id-and-type-list? `)`
123+
block : $ => seq($.block_label, repeat1($.operation)),
124+
block_label : $ => seq($._block_id, optional($.block_arg_list), ':'),
125+
_block_id : $ => $.caret_id,
126+
caret_id : $ => seq('^', $._suffix_id),
127+
value_id_and_type : $ => seq($.value_id, ':', $.type),
128+
value_id_and_type_list : $ => seq($.value_id_and_type,
129+
repeat(seq(',', $.value_id_and_type))),
130+
block_arg_list : $ => seq('(', optional($.value_id_and_type_list), ')'),
131+
132+
// Regions
133+
// region ::= `{` entry-block? block* `}`
134+
// entry-block ::= operation+
135+
region : $ => seq('{', optional($.entry_block), repeat($.block), '}'),
136+
entry_block : $ => repeat1($.operation),
137+
138+
// Types
139+
// type ::= type-alias | dialect-type | builtin-type
140+
//
141+
// type-list-no-parens ::= type (`,` type)*
142+
// type-list-parens ::= `(` type-list-no-parens? `)`
143+
//
144+
// // This is a common way to refer to a value with a specified type.
145+
// ssa-use-and-type ::= ssa-use `:` type
146+
// ssa-use ::= value-use
147+
//
148+
// // Non-empty list of names and types.
149+
// ssa-use-and-type-list ::= ssa-use-and-type (`,` ssa-use-and-type)*
150+
//
151+
// function-type ::= (type | type-list-parens) `->` (type |
152+
// type-list-parens)
153+
type : $ => choice($.type_alias, $.dialect_type, $.builtin_type),
154+
type_list_no_parens : $ => seq($.type, repeat(seq(',', $.type))),
155+
type_list_parens : $ => seq('(', optional($.type_list_no_parens), ')'),
156+
ssa_use_and_type : $ => seq($.ssa_use, ':', $.type),
157+
ssa_use : $ => $.value_use,
158+
ssa_use_and_type_list : $ => seq($.ssa_use_and_type,
159+
repeat(seq(',', $.ssa_use_and_type))),
160+
function_type : $ => seq(choice($.type, $.type_list_parens), '->',
161+
choice($.type, $.type_list_parens)),
162+
163+
// Type aliases
164+
// type-alias-def ::= '!' alias-name '=' 'type' type
165+
// type-alias ::= '!' alias-name
166+
type_alias_def : $ => seq('!', $.alias_name, '=', 'type', $.type),
167+
type_alias : $ => seq('!', $.alias_name),
168+
169+
// Dialect Types
170+
// dialect-namespace ::= bare-id
171+
//
172+
// opaque-dialect-item ::= dialect-namespace '<' string-literal '>'
173+
//
174+
// pretty-dialect-item ::= dialect-namespace '.'
175+
// pretty-dialect-item-lead-ident
176+
// pretty-dialect-item-body?
177+
//
178+
// pretty-dialect-item-lead-ident ::= '[A-Za-z][A-Za-z0-9._]*'
179+
// pretty-dialect-item-body ::= '<' pretty-dialect-item-contents+ '>'
180+
// pretty-dialect-item-contents ::= pretty-dialect-item-body
181+
// | '(' pretty-dialect-item-contents+ ')'
182+
// | '[' pretty-dialect-item-contents+ ']'
183+
// | '{' pretty-dialect-item-contents+ '}'
184+
// | '[^[<({>\])}\0]+'
185+
//
186+
// dialect-type ::= '!' (opaque-dialect-item | pretty-dialect-item)
187+
dialect_type : $ => seq(
188+
'!', choice($.opaque_dialect_item, $.pretty_dialect_item)),
189+
dialect_namespace : $ => $.bare_id,
190+
opaque_dialect_item : $ => seq($.dialect_namespace, '<', $.string_literal,
191+
'>'),
192+
pretty_dialect_item : $ => seq($.dialect_namespace, '.',
193+
$.pretty_dialect_item_lead_ident,
194+
optional($.pretty_dialect_item_body)),
195+
pretty_dialect_item_lead_ident : $ => $.bare_id,
196+
pretty_dialect_item_body : $ => seq('<',
197+
repeat1($.pretty_dialect_item_contents),
198+
'>'),
199+
// TODO: not sure why prec.left (setting left-associated parsing) needed
200+
// here,
201+
// left-associated way avoids an ambiguity flagged by generator. It may not
202+
// be needed and be only papering over an issue.
203+
pretty_dialect_item_contents : $ => prec.left(choice(
204+
$.pretty_dialect_item_body,
205+
seq('(',
206+
repeat1(
207+
$.pretty_dialect_item_contents),
208+
')'),
209+
seq('[',
210+
repeat1(
211+
$.pretty_dialect_item_contents),
212+
']'),
213+
seq('{',
214+
repeat1(
215+
$.pretty_dialect_item_contents),
216+
'}'),
217+
repeat1(/[^\[<({>\])}\\0]/))),
218+
dialect_type : $ => seq(
219+
'!', choice($.opaque_dialect_item, $.pretty_dialect_item)),
220+
221+
// Builtin types
222+
builtin_type : $ => choice(
223+
// TODO: Add builtin types
224+
seq('i', repeat1(/[0-9]/))),
225+
226+
// Attributes
227+
// attribute-entry ::= (bare-id | string-literal) `=` attribute-value
228+
// attribute-value ::= attribute-alias | dialect-attribute |
229+
// builtin-attribute
230+
attribute_entry : $ => seq(choice($.bare_id, $.string_literal), '=',
231+
$.attribute_value),
232+
attribute_value : $ => choice($.attribute_alias, $.dialect_attribute,
233+
$.builtin_attribute),
234+
235+
// Attribute Value Aliases
236+
// attribute-alias-def ::= '#' alias-name '=' attribute-value
237+
// attribute-alias ::= '#' alias-name
238+
attribute_alias_def : $ => seq('#', $.alias_name, '=', $.attribute_value),
239+
attribute_alias : $ => seq('#', $.alias_name),
240+
241+
// Dialect Attribute Values
242+
dialect_attribute : $ => seq('#', choice($.opaque_dialect_item,
243+
$.pretty_dialect_item)),
244+
245+
// Builtin Attribute Values
246+
builtin_attribute : $ => choice(
247+
// TODO
248+
$.function_type,
249+
$.string_literal,
250+
),
251+
252+
// Comment (standard BCPL)
253+
comment : $ => token(seq('//', /.*/)),
254+
255+
custom_operation : $ => choice(
256+
// TODO: Just basic/incomplete instance.
257+
seq('func', field('name', $.symbol_ref_id),
258+
$.block_arg_list, '->', $.type, $.region),
259+
),
260+
}
261+
});

0 commit comments

Comments
 (0)