-
Notifications
You must be signed in to change notification settings - Fork 2
/
types.ts
111 lines (105 loc) · 2.86 KB
/
types.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import { tokenize, rest, Token } from "./core.js"
/**
* A token stream is an array of strings and {@link Token} objects.
*
* Token streams have to fulfill a few properties that are assumed by most functions (mostly internal ones) that process
* them.
*
* 1. No adjacent strings.
* 2. No empty strings.
*
* The only exception here is the token stream that only contains the empty string and nothing else.
*/
export type TokenStream = (string | Token)[]
export type StandardTokenName =
| 'atrule'
| 'attr-name'
| 'attr-value'
| 'bold'
| 'boolean'
| 'builtin'
| 'cdata'
| 'char'
| 'class-name'
| 'comment'
| 'constant'
| 'deleted'
| 'doctype'
| 'entity'
| 'function'
| 'important'
| 'inserted'
| 'italic'
| 'keyword'
| 'namespace'
| 'number'
| 'operator'
| 'prolog'
| 'property'
| 'punctuation'
| 'regex'
| 'selector'
| 'string'
| 'symbol'
| 'tag'
| 'url'
export type TokenName = string & {} | StandardTokenName
/**
* The expansion of a simple `RegExp` literal to support additional properties.
*/
export interface GrammarToken {
/**
* The regular expression of the token.
*/
pattern: RegExp
/**
* If `true`, then the first capturing group of `pattern` will (effectively) behave as a lookbehind group meaning that the captured text will not be part of the matched text of the new token.
*
* @default false
*/
lookbehind?: boolean
/**
* Whether the token is greedy.
*
* @default false
*/
greedy?: boolean
/**
* An optional alias. Multiple aliases are separated by spaces.
*/
alias?: TokenName
/**
* The nested grammar of this token.
*
* The `inside` grammar will be used to tokenize the text value of each token of this kind.
*
* This can be used to make nested and even recursive language definitions.
*
* Note: This can cause infinite recursion. Be careful when you embed different languages or even the same language into
* each another.
*/
inside?: Grammar | string | null
/**
* A property to make the types {@link GrammarToken} and {@link RegExp} non-overlapping.
*
* Since {@link GrammarToken} requires `exec` to be `undefined` and {@link RegExp} requires it to be a function,
* there can be no object that is both a {@link GrammarToken} and a {@link RegExp}.
*/
readonly exec?: never
}
/**
* A custom tokenizer for a grammar.
*
* @see {@link tokenize} symbol for more info.
*
* @param code A string with the code this grammar needs to tokenize.
* @param grammar The grammar with the custom tokenizer
* @returns A token stream representing the matched code.
*/
export type CustomTokenizer = (code: string, grammar: Grammar) => TokenStream
export type GrammarTokens = Partial<Record<TokenName, RegExp | GrammarToken | (RegExp | GrammarToken)[]>>
export type GrammarSymbols = {
[rest]?: Grammar | string | null
[tokenize]?: CustomTokenizer | null
}
export type Grammar = GrammarTokens & GrammarSymbols