-
Notifications
You must be signed in to change notification settings - Fork 0
/
Parser.ts
189 lines (178 loc) · 5.61 KB
/
Parser.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/**
* Gets the name of all rules in a grammar
* @param g the grammar to get the rules names from
*/
const rule_names = <T>(g: Grammar<T>): string[] => {
return g.map((rule) => rule.name);
};
/**
* Gets a specific grammar rule for from a grammar
* where the rule name is provided
* @param g the grammar to get the rule from
* @param r the name of the rule to find
* @throws If the rule name r is not in grammar g
*/
const get_rule = <T>(g: Grammar<T>, r: string): GrammarRule<T> => {
const gr = g.find((val) => val.name === r);
if (gr) {
return gr;
}
throw new Error(`Rule '${r}' does not exist in grammar ${g}`);
};
/**
* A LL(k) parser for a specific grammar rules pattern
* @param k the number of lookahead tokens for this parser
* @param ts the stream of tokens to parse
* @param g the grammar to use for parsing
* @param r the top-level grammar rule to attempt to parse
* @param p the specific pattern to attempt to parse
* @param final whether or not this parser should consume all remaining tokens
*/
const LL_pattern = <T>(
k: number,
ts: Tokens,
g: Grammar<T>,
r: GrammarRule<T>,
p: GrammarPattern,
final: boolean
): [Match<T>, number] | undefined => {
/** We are attempting to force match pattern "p" with tokens "ts"
We have to match "k" tokens, and then if they all match,
consume the rest of the pattern.
If we finish our match before "k" tokens,
then we can fully consume that as the pattern is applicable
If we have matched as we reach "k" tokens,
then continue going. (If we fail later, the grammar is not LL(k)
which will not be our fault)
*/
const ruleNames = rule_names(g);
const running_rule: RuleMatch<T> = {
type: "Rule",
name: r.name,
callback: (context?: any) => r.callback(running_rule, context),
match: [],
};
// Checking the first "k" tokens
let tokenInd = 0;
for (
let patternInd = 0;
patternInd < p.length && tokenInd < ts.length;
patternInd++
) {
const tokI = ts[tokenInd];
const patternI = p[patternInd];
if (patternI === "EMPTY") {
const emptyMatch: TokenMatch<T> = {
type: "Token",
name: "EMPTY",
match: "",
};
running_rule.match.push(emptyMatch);
return [running_rule, 0];
}
if (tokI.name === patternI) {
// This match at point 'i'
const tokenMatch: TokenMatch<T> = {
type: "Token",
name: tokI.name,
match: tokI.match,
};
running_rule.match.push(tokenMatch);
tokenInd++;
} else if (ruleNames.includes(patternI)) {
// pattern[i] is a separate rule, recurse down to match
// If our pattern is the last in the list, it should consume all
const consumeAll = final && patternInd === p.length - 1;
const patRule = get_rule(g, patternI);
const matchedRule = LL_rule(
k,
ts.slice(tokenInd),
g,
patRule,
consumeAll
);
if (matchedRule) {
// We did not have an undefined traversal.
// Add the matches
running_rule.match.push(matchedRule[0]);
tokenInd += matchedRule[1];
} else {
if (tokenInd < k) {
// Matched rule was undefined, and this is the first "k"
// tokens, so we have to bail out
return undefined;
} else {
throw new Error("We made it 'k' tokens, but it failed afterwards");
}
}
} else {
if (tokenInd < k) {
// So we didn't consume "k" tokens and failed,
// so this is not a true error, just a bail out
return undefined;
} else {
throw new Error("We made it 'k' tokens, but it failed afterwards");
}
}
}
// We have consumed all the way we need to
return [running_rule, tokenInd];
};
/**
* A LL(k) parser for a specific grammar rule
* @param k the number of lookahead tokens for this parser
* @param ts the stream of tokens to parse
* @param g the grammar to use for parsing
* @param r the top-level grammar rule to attempt to parse
* @param final whether or not this parser should consume all remaining tokens
*/
const LL_rule = <T>(
k: number,
ts: Tokens,
g: Grammar<T>,
r: GrammarRule<T>,
final: boolean
): [Match<T>, number] | undefined => {
// While we still have tokens to consume, try the rules patterns
for (const pattern of r.pattern) {
// For each possible pattern
// Try to apply that pattern,
const patternReturn = LL_pattern(k, ts, g, r, pattern, final);
if (patternReturn) {
const [patternTry, ind] = patternReturn;
// If we have validly reached this point,
// the entire pattern has been consumed!
if (final && ind < ts.length) {
// If this is supposed to consume all, but it didnt
continue;
// This cannot be the pattern, so continue
}
return [patternTry, ind];
}
}
if (final) {
throw new Error("All patterns exhausted, and none validly parsed");
}
return undefined;
};
/**
* Parses the token stream with an LL(k) parser
* using the provided Grammer and a top-level grammar rule
* that the token stream must conform to.
* @param k the number of the LL(k) parser
* @param tokStream the stream of tokens to parse
* @param langGrammar the grammar to use for parsing
* @param topLevelRule the top-level rule that the token stream must conform to
*/
export const Parser = <T>(
k: number,
tokStream: Tokens,
langGrammar: Grammar<T>,
topLevelRule: GrammarRule<T>
): Match<T> => {
const ruleOut = LL_rule(k, tokStream, langGrammar, topLevelRule, true);
if (ruleOut) {
return ruleOut[0];
}
throw new Error("Failed to parser!");
};