/
lexer.c
180 lines (150 loc) · 3.9 KB
/
lexer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#include "lexer.h"
#include "token.h"
#include "util.h"
#include "alloc-list.h"
#include <ctype.h>
// if EOF: return NULL
// else: return the first pointer to the non-space character
static char*
skip_space(char *src)
{
while (*src) {
if (*src == ' ' || *src == '\t' || *src == '\n')
src++;
else
return src;
}
return NULL;
}
// if not digit or EOF: return NULL
static char*
get_digit(char *src, char **buf)
{
int pos;
int dot_pos = 0;
// check if top of src is digit
for (pos = 0; src[pos]; pos++) {
if (src[pos] == '.') {
if (dot_pos) {
// src has '.' already
WARN2("parsing error near [%s]", src);
return NULL;
}
dot_pos = pos;
}
else if (! isdigit(src[pos])) {
break;
}
}
if (pos == 0) {
return NULL;
}
if (dot_pos > 0 && (dot_pos == 0 || (size_t)dot_pos == strlen(src) - 1)) {
WARN("'.' at the head or tail of src");
return NULL;
}
*buf = al_malloc(pos + 1);
strncpy(*buf, src, pos);
(*buf)[pos] = '\0';
return src + pos;
}
// If EOF or syntax error: return NULL
// Otherwise, Return allocated token.
Token*
lexer_get_token(char *src, char **next_pos, bool allow_signed, bool *error)
{
char *after_pos = NULL;
char *tok_buf = NULL;
char *sign_pos = NULL;
Token *tok_result;
TokenType tok_type;
// Set true when syntax error.
*error = false;
// Return when eof.
if (src == NULL)
return NULL;
char *incl_src = skip_space(src);
if (incl_src == NULL) {
return NULL;
}
src = incl_src;
// TODO Use table
switch (*src) {
case '(':
tok_type = TOK_LPAREN;
goto save_chr_to_tok_buf;
case ')':
tok_type = TOK_RPAREN;
goto save_chr_to_tok_buf;
case '*':
tok_type = TOK_MULTIPLY;
goto save_chr_to_tok_buf;
case '/':
tok_type = TOK_DIVIDE;
goto save_chr_to_tok_buf;
case '%':
tok_type = TOK_MODULO;
goto save_chr_to_tok_buf;
case '^':
tok_type = TOK_UP_ALLOW;
save_chr_to_tok_buf:
tok_buf = al_malloc(2);
tok_buf[0] = src[0];
tok_buf[1] = '\0';
src++;
break;
case '+':
tok_type = TOK_PLUS;
goto save_digit_to_tok_buf;
case '-':
tok_type = TOK_MINUS;
save_digit_to_tok_buf:
// Allow '+<digit>' or '-<digit>'.
if (allow_signed && isdigit(*skip_space(src + 1))) {
// if NOT signed digit
sign_pos = src;
src = skip_space(src + 1); // NOTE: Allow spaces between sign and digit.
/* FALLTHROUGH */
}
else {
tok_buf = al_malloc(2);
tok_buf[0] = src[0];
tok_buf[1] = '\0';
src++;
break;
}
default:
if (isdigit(*src)) {
after_pos = get_digit(src, &tok_buf);
if (after_pos == NULL) {
WARN2("malformed digit [%s]", src);
*error = true;
return NULL;
}
src = after_pos;
tok_type = TOK_DIGIT;
break;
}
// syntax error
WARN3("syntax error near [%c]%s", *src, src + 1);
*error = true;
return NULL;
}
tok_result = al_malloc(sizeof(Token));
if (! tok_result) {
DIE("cannot allocate memory for Token!");
}
token_init(tok_result);
size_t alloc_num = (sign_pos == NULL ? 0 : 1) + strlen(tok_buf) + 1;
token_alloc(tok_result, alloc_num);
tok_result->type = tok_type;
if (sign_pos == NULL) {
strncpy(tok_result->str, tok_buf, alloc_num);
}
else {
tok_result->str[0] = sign_pos[0];
strncpy(tok_result->str + 1, tok_buf, alloc_num - 1);
}
*next_pos = src;
return tok_result;
}