/
tokenizer.l
231 lines (199 loc) · 5.17 KB
/
tokenizer.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
%{
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdlib.h>
#include "util/AST.h"
#include "util/symbol.h"
#include "y.tab.h"
#define MAX_STR_CONST 1000
#define yyterminate() return EOF_TOKEN
#define YY_USER_ACTION \
col += yyleng;
extern char *currentFileName;
int col = 1;
int lineno = 1;
int yyerror(char*s);
void point_at_in_line(int lineno, int from, int to);
%}
alpha [a-zA-Z]
alphanum [a-zA-Z0-9]
digit [0-9]
nonzero [1-9]
%x str
%x comment single_line_comment
%%
char string_buf[MAX_STR_CONST];
char *string_buf_ptr;
//Keywords
"int" {return K_INT;}
"float" {return K_FLOAT;}
"char" {return K_CHAR;}
"for" {return FOR;}
"while" {return WHILE;}
"else" {return ELSE;}
"if" {return IF;}
"switch" {return SWITCH;}
"case" {return CASE;}
"return" {return RETURN;}
"continue" {return CONTINUE;}
"break" {return BREAK;}
"default" {return DEFAULT;}
"#include".*(\n) {col = 1; lineno++;
fprintf(stderr, ANSI_COLOR_BOLD ANSI_COLOR_YELLOW "Note: " ANSI_COLOR_RESET "Include directives are ignored\n");
point_at_in_line(lineno-2, 0, 8);
}
/* Symbols */
[-+*/%] return *yytext;
[;,:()] return *yytext;
[\{\}\[\]] return *yytext;
[=!<>] return *yytext;
/* [&|] return *yytext; */
"==" {return EQ;}
">=" {return GE;}
"<=" {return LE;}
"!=" {return NE;}
"&&" {return AND;}
"||" {return OR;}
"++" {return INC;}
"--" {return DEC;}
"+=" {return PLUS_ASSIGN;}
"-=" {return MINUS_ASSIGN;}
"*=" {return STAR_ASSIGN;}
"/=" {return SLASH_ASSIGN;}
"%=" {return MOD_ASSIGN;}
/* Identifiers */
(_|{alpha})((_|{alphanum}))* {
yylval.id.name = malloc(strlen(yytext)+1);
yylval.id.src.line = lineno;
yylval.id.src.col = col - yyleng;
// yylval.name = malloc(strlen(yytext)+1);
strcpy(yylval.id.name, yytext);
// strcpy(yylval.name, yytext);
return IDENTIFIER;
}
/* Integers */
0 {yylval.iValue = 0; return INTEGER;}
{nonzero}({digit})*([eE][-+]?[0-9]+)? {yylval.iValue = (int)round(atof(yytext)); return INTEGER;}
/* Floats */
{nonzero}({digit})*"."({digit})*([eE][-+]?[0-9]+)? {yylval.fValue = atof(yytext); return FLOAT;}
/* Characters */
"\'"({alpha}|{digit})"\'" {yylval.cValue = yytext[1]; return CHARACTER;}
/* Comments*/
/* "//"[^\n]*\n { lineno++; } */
"//" BEGIN(single_line_comment);
<single_line_comment>"\n" {col = 1; lineno++; BEGIN(INITIAL);}
<single_line_comment><<EOF>> {BEGIN(INITIAL); return EOF_TOKEN;}
<single_line_comment>[^\n]+ ;
"/*" BEGIN(comment);
<comment>"\n" {col = 1; lineno++;}
<comment>"*/" {BEGIN(INITIAL);}
<comment><<EOF>> {yyerror("Unclosed comment found\n");}
<comment>. ;
/* Strings*/
\" { string_buf_ptr = string_buf; BEGIN(str);}
<str>\" {
BEGIN(INITIAL);
*string_buf_ptr = '\0';
yylval.sValue = (char*)malloc(strlen(string_buf)+1);
strcpy(yylval.sValue, string_buf);
return STRING;
}
<str>\n {yyerror("Unterminated string.\n"); return ERROR;}
<str>\\n {*string_buf_ptr++ = '\n';}
<str>[^\n] {
*string_buf_ptr++ = *yytext;
}
/* Whitespace */
[ \t\r] ;
\n {col = 1; lineno++;}
/* <<EOF>> return EOF_TOKEN; */
/* Error */
. {yyerror("Error: Invalid character"); return ERROR;}
%%
int yywrap(){
yyterminate();
return EOF_TOKEN;
}
int yyerror(char *s){
fprintf(stderr, ANSI_COLOR_BOLD "%s[%d:%d] "ANSI_COLOR_RED "%s" ANSI_COLOR_RESET " at '%s'\n", currentFileName, lineno, col-1, s, yytext);
point_at_in_line(lineno-1, col - yyleng-2, col-2);
exit(1);
}
char *getlineat(int lineno)
{
FILE *file = fopen(currentFileName, "r");
// read the line `lineno` from file
char *line = NULL;
size_t len = 0;
ssize_t read;
int i = 0;
while ((read = getline(&line, &len, file)) != -1)
{
if (i == lineno)
{
return line;
}
i++;
}
return (char*)0;
}
void point_at_in_line(int lineno, int from, int to)
{
// print the line and show a caret from bottom line at position `col`
char *line = getlineat(lineno);
if(!line || strlen(line) == 0){
fprintf(stderr, "\tError at [End of file]\n");
return;
}
// Recover silently and gracefully in invalid inputs
if (from > to)
{
int temp = from;
from = to;
to = temp;
}
if (strlen(line) < to)
{
to = strlen(line);
}
if (strlen(line) < from)
{
from = strlen(line);
}
from = (from < 0) ? 0 : from;
// print all characters till 'from'
fprintf(stderr, " %d | %.*s", lineno+1, from, line);
// color characters enclosed in from-to with red and boldface
fprintf(stderr, "\e[31;1m%.*s\e[0m", to - from, line + from);
// print all characters from 'to' till end of line
fprintf(stderr, "%s", line + to);
//check if last character was newline, if not print it
if(line[strlen(line)-1] != '\n'){
fprintf(stderr, "\n");
}
//find number of digits in lineno for the left offset
int offset = 0;
lineno++;
while((lineno) > 0){
offset++;
lineno /= 10;
}
//add the left padding
offset+=3;
while(offset--){
fprintf(stderr, " ");
}
fprintf(stderr, " | ");
for (int i = 0; i < from; i++)
{
fprintf(stderr, " ");
}
//place the caret
fprintf(stderr, "\e[32;1m" ANSI_COLOR_RED "^");
//underline the rest of the characters
for(int i=from; i < to-1; i++){
fprintf(stderr, "~");
}
fprintf(stderr, ANSI_COLOR_RESET "\n");
}