Skip to content

Commit 25b4797

Browse files
committed
refactor: integrate lexer into parser and simplify API
- Refactor Parser to use on-demand lexing instead of pre-tokenization - Remove complex Token** parameter passing in parser methods - Add token management methods (nextToken, match, check, expect) - Simplify main.cpp by removing manual tokenization loop - Fix linker warning by adding .note.GNU-stack section to generated assembly This change makes the parser cleaner and more maintainable by: 1. Eliminating the need to tokenize entire input upfront 2. Removing error-prone Token** Rest parameters 3. Following standard recursive descent parser patterns
1 parent a6bae67 commit 25b4797

4 files changed

Lines changed: 121 additions & 86 deletions

File tree

include/Parser.h

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,42 @@
77
namespace chibcc {
88

99
//===----------------------------------------------------------------------===//
10-
// Parser
10+
// Parser - Recursive descent parser with integrated lexer
1111
//===----------------------------------------------------------------------===//
1212

1313
class Parser {
1414
private:
15+
Lexer &Lex;
16+
std::unique_ptr<Token> CurTok; // Current token
17+
18+
// Helper methods for AST node creation
1519
std::unique_ptr<Node> newNode(NodeKind Kind);
1620
std::unique_ptr<Node> newBinary(NodeKind Kind, std::unique_ptr<Node> Lhs,
1721
std::unique_ptr<Node> Rhs);
1822
std::unique_ptr<Node> newUnary(NodeKind Kind, std::unique_ptr<Node> Expr);
1923
std::unique_ptr<Node> newNum(int Val);
2024

21-
std::unique_ptr<Node> expr(Token **Rest, Token *Tok);
22-
std::unique_ptr<Node> equality(Token **Rest, Token *Tok);
23-
std::unique_ptr<Node> relational(Token **Rest, Token *Tok);
24-
std::unique_ptr<Node> add(Token **Rest, Token *Tok);
25-
std::unique_ptr<Node> mul(Token **Rest, Token *Tok);
26-
std::unique_ptr<Node> unary(Token **Rest, Token *Tok);
27-
std::unique_ptr<Node> primary(Token **Rest, Token *Tok);
28-
29-
Lexer &Lex;
25+
// Token management
26+
void nextToken(); // Advance to next token
27+
bool match(const char *Op); // Check and consume if matches
28+
bool match(tok::TokenKind Kind); // Check and consume if matches
29+
void expect(const char *Op); // Consume or error
30+
bool check(const char *Op); // Check without consuming
31+
bool check(tok::TokenKind Kind); // Check without consuming
32+
33+
// Grammar rules
34+
std::unique_ptr<Node> expr();
35+
std::unique_ptr<Node> equality();
36+
std::unique_ptr<Node> relational();
37+
std::unique_ptr<Node> add();
38+
std::unique_ptr<Node> mul();
39+
std::unique_ptr<Node> unary();
40+
std::unique_ptr<Node> primary();
3041

3142
public:
3243
explicit Parser(Lexer &L) : Lex(L) {}
3344

34-
std::unique_ptr<Node> parse(Token *Tok);
45+
std::unique_ptr<Node> parse();
3546
};
3647

3748
} // namespace chibcc

main.cpp

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,11 @@ int main(int Argc, char **Argv) {
2020
// Create lexer
2121
Lexer Lex(Input, Input + strlen(Input), Diags);
2222

23-
// Tokenize all input into a linked list
24-
std::unique_ptr<Token> Head = std::make_unique<Token>();
25-
Token *Current = Head.get();
26-
27-
while (true) {
28-
auto Tok = Lex.lex();
29-
Current->Next = std::move(Tok);
30-
Current = Current->Next.get();
31-
if (Current->Kind == tok::eof) {
32-
break;
33-
}
34-
}
35-
36-
// Check for lexical errors
37-
if (Diags.hasErrorOccurred()) {
38-
return 1;
39-
}
40-
41-
// Parse tokens into AST
23+
// Parse input into AST (lexer is called on-demand during parsing)
4224
Parser P(Lex);
43-
auto Ast = P.parse(Head->Next.get());
25+
auto Ast = P.parse();
4426

45-
// Check for parse errors
27+
// Check for errors
4628
if (Diags.hasErrorOccurred()) {
4729
return 1;
4830
}

src/CodeGenerator.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ void CodeGenerator::codegen(Node *N) {
7979
genExpr(N);
8080
printf(" ret\n");
8181

82+
// Add GNU stack note to prevent executable stack warning
83+
printf(" .section .note.GNU-stack,\"\",%%progbits\n");
84+
8285
assert(Depth == 0);
8386
}
8487

src/Parser.cpp

Lines changed: 93 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,44 @@ namespace chibcc {
66
// Parser Implementation
77
//===----------------------------------------------------------------------===//
88

9+
// Token management methods
10+
11+
void Parser::nextToken() {
12+
CurTok = Lex.lex();
13+
}
14+
15+
bool Parser::match(const char *Op) {
16+
if (check(Op)) {
17+
nextToken();
18+
return true;
19+
}
20+
return false;
21+
}
22+
23+
bool Parser::match(tok::TokenKind Kind) {
24+
if (check(Kind)) {
25+
nextToken();
26+
return true;
27+
}
28+
return false;
29+
}
30+
31+
void Parser::expect(const char *Op) {
32+
if (!match(Op)) {
33+
errorTok(CurTok.get(), "expected '%s'", Op);
34+
}
35+
}
36+
37+
bool Parser::check(const char *Op) {
38+
return CurTok && Lexer::equal(CurTok.get(), Op);
39+
}
40+
41+
bool Parser::check(tok::TokenKind Kind) {
42+
return CurTok && CurTok->Kind == Kind;
43+
}
44+
45+
// AST node creation helpers
46+
947
std::unique_ptr<Node> Parser::newNode(NodeKind Kind) {
1048
return std::make_unique<Node>(Kind);
1149
}
@@ -32,137 +70,138 @@ std::unique_ptr<Node> Parser::newNum(int Val) {
3270
return N;
3371
}
3472

73+
// Grammar rules
74+
3575
// expr = equality
36-
std::unique_ptr<Node> Parser::expr(Token **Rest, Token *Tok) {
37-
return equality(Rest, Tok);
76+
std::unique_ptr<Node> Parser::expr() {
77+
return equality();
3878
}
3979

4080
// equality = relational ("==" relational | "!=" relational)*
41-
std::unique_ptr<Node> Parser::equality(Token **Rest, Token *Tok) {
42-
auto N = relational(&Tok, Tok);
81+
std::unique_ptr<Node> Parser::equality() {
82+
auto N = relational();
4383

4484
for (;;) {
45-
if (Lexer::equal(Tok, "==")) {
46-
N = newBinary(NodeKind::Eq, std::move(N),
47-
relational(&Tok, Tok->Next.get()));
85+
if (match("==")) {
86+
N = newBinary(NodeKind::Eq, std::move(N), relational());
4887
continue;
4988
}
5089

51-
if (Lexer::equal(Tok, "!=")) {
52-
N = newBinary(NodeKind::Ne, std::move(N),
53-
relational(&Tok, Tok->Next.get()));
90+
if (match("!=")) {
91+
N = newBinary(NodeKind::Ne, std::move(N), relational());
5492
continue;
5593
}
5694

57-
*Rest = Tok;
5895
return N;
5996
}
6097
}
6198

6299
// relational = add ("<" add | "<=" add | ">" add | ">=" add)*
63-
std::unique_ptr<Node> Parser::relational(Token **Rest, Token *Tok) {
64-
auto N = add(&Tok, Tok);
100+
std::unique_ptr<Node> Parser::relational() {
101+
auto N = add();
65102

66103
for (;;) {
67-
if (Lexer::equal(Tok, "<")) {
68-
N = newBinary(NodeKind::Lt, std::move(N), add(&Tok, Tok->Next.get()));
104+
if (match("<")) {
105+
N = newBinary(NodeKind::Lt, std::move(N), add());
69106
continue;
70107
}
71108

72-
if (Lexer::equal(Tok, "<=")) {
73-
N = newBinary(NodeKind::Le, std::move(N), add(&Tok, Tok->Next.get()));
109+
if (match("<=")) {
110+
N = newBinary(NodeKind::Le, std::move(N), add());
74111
continue;
75112
}
76113

77-
if (Lexer::equal(Tok, ">")) {
78-
N = newBinary(NodeKind::Lt, add(&Tok, Tok->Next.get()), std::move(N));
114+
if (match(">")) {
115+
N = newBinary(NodeKind::Lt, add(), std::move(N));
79116
continue;
80117
}
81118

82-
if (Lexer::equal(Tok, ">=")) {
83-
N = newBinary(NodeKind::Le, add(&Tok, Tok->Next.get()), std::move(N));
119+
if (match(">=")) {
120+
N = newBinary(NodeKind::Le, add(), std::move(N));
84121
continue;
85122
}
86123

87-
*Rest = Tok;
88124
return N;
89125
}
90126
}
91127

92128
// add = mul ("+" mul | "-" mul)*
93-
std::unique_ptr<Node> Parser::add(Token **Rest, Token *Tok) {
94-
auto N = mul(&Tok, Tok);
129+
std::unique_ptr<Node> Parser::add() {
130+
auto N = mul();
95131

96132
for (;;) {
97-
if (Lexer::equal(Tok, "+")) {
98-
N = newBinary(NodeKind::Add, std::move(N), mul(&Tok, Tok->Next.get()));
133+
if (match("+")) {
134+
N = newBinary(NodeKind::Add, std::move(N), mul());
99135
continue;
100136
}
101137

102-
if (Lexer::equal(Tok, "-")) {
103-
N = newBinary(NodeKind::Sub, std::move(N), mul(&Tok, Tok->Next.get()));
138+
if (match("-")) {
139+
N = newBinary(NodeKind::Sub, std::move(N), mul());
104140
continue;
105141
}
106142

107-
*Rest = Tok;
108143
return N;
109144
}
110145
}
111146

112147
// mul = unary ("*" unary | "/" unary)*
113-
std::unique_ptr<Node> Parser::mul(Token **Rest, Token *Tok) {
114-
auto N = unary(&Tok, Tok);
148+
std::unique_ptr<Node> Parser::mul() {
149+
auto N = unary();
115150

116151
for (;;) {
117-
if (Lexer::equal(Tok, "*")) {
118-
N = newBinary(NodeKind::Mul, std::move(N), unary(&Tok, Tok->Next.get()));
152+
if (match("*")) {
153+
N = newBinary(NodeKind::Mul, std::move(N), unary());
119154
continue;
120155
}
121156

122-
if (Lexer::equal(Tok, "/")) {
123-
N = newBinary(NodeKind::Div, std::move(N), unary(&Tok, Tok->Next.get()));
157+
if (match("/")) {
158+
N = newBinary(NodeKind::Div, std::move(N), unary());
124159
continue;
125160
}
126161

127-
*Rest = Tok;
128162
return N;
129163
}
130164
}
131165

132166
// unary = ("+" | "-") unary
133167
// | primary
134-
std::unique_ptr<Node> Parser::unary(Token **Rest, Token *Tok) {
135-
if (Lexer::equal(Tok, "+"))
136-
return unary(Rest, Tok->Next.get());
168+
std::unique_ptr<Node> Parser::unary() {
169+
if (match("+"))
170+
return unary();
137171

138-
if (Lexer::equal(Tok, "-"))
139-
return newUnary(NodeKind::Neg, unary(Rest, Tok->Next.get()));
172+
if (match("-"))
173+
return newUnary(NodeKind::Neg, unary());
140174

141-
return primary(Rest, Tok);
175+
return primary();
142176
}
143177

144178
// primary = "(" expr ")" | num
145-
std::unique_ptr<Node> Parser::primary(Token **Rest, Token *Tok) {
146-
if (Lexer::equal(Tok, "(")) {
147-
auto N = expr(&Tok, Tok->Next.get());
148-
*Rest = Lexer::skip(Tok, ")");
179+
std::unique_ptr<Node> Parser::primary() {
180+
if (match("(")) {
181+
auto N = expr();
182+
expect(")");
149183
return N;
150184
}
151185

152-
if (Tok->Kind == tok::numeric_constant) {
153-
auto N = newNum(Tok->IntegerValue);
154-
*Rest = Tok->Next.get();
186+
if (check(tok::numeric_constant)) {
187+
auto N = newNum(CurTok->IntegerValue);
188+
nextToken();
155189
return N;
156190
}
157191

158-
errorTok(Tok, "expected an expression");
192+
errorTok(CurTok.get(), "expected an expression");
159193
return nullptr; // Never reached
160194
}
161195

162-
std::unique_ptr<Node> Parser::parse(Token *Tok) {
163-
auto N = expr(&Tok, Tok);
164-
if (Tok->Kind != tok::eof)
165-
errorTok(Tok, "extra token");
196+
std::unique_ptr<Node> Parser::parse() {
197+
// Initialize by reading first token
198+
nextToken();
199+
200+
auto N = expr();
201+
202+
if (!check(tok::eof))
203+
errorTok(CurTok.get(), "extra token");
204+
166205
return N;
167206
}
168207

0 commit comments

Comments
 (0)