Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 482 lines (435 sloc) 10.143 kB
02bdd61 @NotFound First published release
authored
1 // token.cpp
07da7c7 @NotFound escaped double quotes in stage 0
authored
2 // Revision 27-jan-2010
02bdd61 @NotFound First published release
authored
3
4 #include "token.h"
5 #include "errors.h"
6
2d6aa37 @NotFound less abuse of Token::str
authored
7 #include <sstream>
8
d19552a @NotFound assume utf8 also in single quote in non ascii string literals
authored
9
10 static std::string unsinglequote (const std::string &s);
11
02bdd61 @NotFound First published release
authored
12 //**********************************************************************
13
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored
14 static std::string tostring(int n)
15 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
16 std::ostringstream oss;
17 oss << n;
18 return oss.str();
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored
19 }
20
21 //**********************************************************************
22
02bdd61 @NotFound First published release
authored
23 Token::Token () :
6fe994f @NotFound add -= operator and reformat token.cpp
authored
24 ttype(TokenTUnknown), ln(0)
02bdd61 @NotFound First published release
authored
25 { }
26
27 Token::Token (TokenType tt) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored
28 ttype(tt), ln(0)
02bdd61 @NotFound First published release
authored
29 { }
30
31 Token::Token (TokenType tt, const std::string &file) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored
32 ttype(tt), ln(0), filename(file)
02bdd61 @NotFound First published release
authored
33 {
34 }
35
36 Token::Token (const std::string &ss, unsigned int linenum,
6fe994f @NotFound add -= operator and reformat token.cpp
authored
37 const std::string &file) :
38 ttype(TokenTUnknown), s(ss), ln(linenum), filename(file)
02bdd61 @NotFound First published release
authored
39 { }
40
41 Token::Token (TokenType type, const std::string &ss, unsigned int linenum,
6fe994f @NotFound add -= operator and reformat token.cpp
authored
42 const std::string &file) :
43 ttype(type), s(ss), ln(linenum), filename(file)
02bdd61 @NotFound First published release
authored
44 { }
45
1b6fdb5 @NotFound more cleaning of optimize functions
authored
46 Token::Token (TokenType type, const std::string &ss, const Token &base) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored
47 ttype(type), s(ss),
48 ln(base.linenum()), filename(base.file())
1b6fdb5 @NotFound more cleaning of optimize functions
authored
49 { }
50
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored
51 Token::Token (bool value, const Token &base) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored
52 ttype(TokenTInteger), s(value ? "1" : "0"),
53 ln(base.linenum()), filename(base.file())
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored
54 {
55 }
56
57 Token::Token (int value, const Token &base) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored
58 ttype(TokenTInteger), s(tostring(value)),
59 ln(base.linenum()), filename(base.file())
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored
60 {
61 }
62
02bdd61 @NotFound First published release
authored
63 bool Token::empty () const
64 { return s.empty(); }
65
2d6aa37 @NotFound less abuse of Token::str
authored
66 int Token::getinteger() const
67 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
68 if (ttype == TokenTInteger)
69 {
70 std::istringstream iss(s);
71 int n;
72 iss >> n;
73 return n;
74 }
75 else
76 throw Expected("integer number", *this);
2d6aa37 @NotFound less abuse of Token::str
authored
77 }
78
79 #if 1
02bdd61 @NotFound First published release
authored
80 std::string Token::str() const
81 { return s; }
2d6aa37 @NotFound less abuse of Token::str
authored
82 #endif
83
84 std::string Token::identifier() const
85 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
86 if (ttype == TokenTIdentifier)
87 return s;
88 else
89 throw Expected("identifier", *this);
2d6aa37 @NotFound less abuse of Token::str
authored
90 }
91
92 std::string Token::pirliteralstring() const
93 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
94 switch (ttype) {
95 case TokenTSingleQuoted:
d19552a @NotFound assume utf8 also in single quote in non ascii string literals
authored
96 return unsinglequote(s);
6fe994f @NotFound add -= operator and reformat token.cpp
authored
97 case TokenTQuoted:
98 return unquote(s);
99 default:
100 throw Expected("literal string", *this);
101 }
2d6aa37 @NotFound less abuse of Token::str
authored
102 }
02bdd61 @NotFound First published release
authored
103
104 std::string Token::describe() const
105 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
106 switch (ttype) {
107 case TokenTEOF:
108 return "*End of file*";
109 case TokenTSingleQuoted:
110 return "'" + s + "'";
111 case TokenTQuoted:
112 return "\"" + unquote(s) + "\"";
113 case TokenTOperator:
114 return "'" + s + "'";
115 case TokenTUnknown:
116 return "*Unknown*";
117 default:
118 return s;
119 }
02bdd61 @NotFound First published release
authored
120 }
121
122 unsigned int Token::linenum() const
123 { return ln; }
124
125 std::string Token::file() const
126 { return filename; }
127
128 bool Token::isidentifier() const
129 { return ttype == TokenTIdentifier; }
130
131 bool Token::isinteger() const
132 { return ttype == TokenTInteger; }
133
134 bool Token::issinglequoted() const
135 { return ttype == TokenTSingleQuoted; }
136
137 bool Token::isliteralstring() const
138 { return ttype == TokenTSingleQuoted || ttype == TokenTQuoted; }
139
7b6ddf3 @NotFound refactor some common token operations
authored
140 bool Token::isop(const std::string &name) const
141 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
142 return ttype == TokenTOperator &&
143 s == name;
7b6ddf3 @NotFound refactor some common token operations
authored
144 }
145
146 bool Token::isop(char name) const
147 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
148 return ttype == TokenTOperator &&
149 s.length() == 1 && s[0] == name;
7b6ddf3 @NotFound refactor some common token operations
authored
150 }
151
2d6aa37 @NotFound less abuse of Token::str
authored
152 bool Token::iskeyword(const std::string &name) const
153 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
154 return ttype == TokenTIdentifier &&
155 s == name;
2d6aa37 @NotFound less abuse of Token::str
authored
156 }
157
02bdd61 @NotFound First published release
authored
158 bool Token::isspace() const
159 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
160 return
161 ttype == TokenTComment ||
162 ttype == TokenTWhiteSpace ||
163 (ttype == TokenTUnknown &&
164 (s.empty() ||
165 s[0] == ' ' ||
166 s[0] == '\t' ||
167 s[0] == '\n'
168 ));
02bdd61 @NotFound First published release
authored
169 }
170
171 //**********************************************************************
172
173 bool isidentifierstart(char c)
174 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
175 return c == '_' || isalpha((unsigned char) c);
02bdd61 @NotFound First published release
authored
176 }
177
178 bool isidentifier(char c)
179 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
180 return c == '_' || isalnum((unsigned char) c);
02bdd61 @NotFound First published release
authored
181 }
182
183 std::string unquote (const std::string &s)
184 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
185 std::string r;
ffcc209 @NotFound assume utf8 in non ascii string literals
authored
186 bool nonascii= false;
6fe994f @NotFound add -= operator and reformat token.cpp
authored
187 for (size_t i= 0; i < s.size(); ++i)
188 {
ffcc209 @NotFound assume utf8 in non ascii string literals
authored
189 unsigned char c= s[i];
190 if (c > 127)
191 nonascii= true;
6fe994f @NotFound add -= operator and reformat token.cpp
authored
192 switch(c)
193 {
194 case '\n':
195 r+= "\\n"; break;
196 case '\t':
197 r+= "\\t"; break;
198 case '\\':
199 r+= "\\\\"; break;
07da7c7 @NotFound escaped double quotes in stage 0
authored
200 case '"':
201 r+= "\\\""; break;
6fe994f @NotFound add -= operator and reformat token.cpp
authored
202 default:
203 r+= c;
204 }
205 }
ffcc209 @NotFound assume utf8 in non ascii string literals
authored
206 return (nonascii ? "utf8:unicode:\"" : "\"") + r + "\"";
02bdd61 @NotFound First published release
authored
207 }
208
d19552a @NotFound assume utf8 also in single quote in non ascii string literals
authored
209 static std::string unsinglequote (const std::string &s)
210 {
211 bool nonascii= false;
212 for (size_t i= 0; i < s.size(); ++i)
213 {
214 unsigned char c= s[i];
215 if (c > 127)
216 nonascii= true;
217 }
218 if (nonascii)
219 return unquote(s);
220 else
221 return '\'' + s + '\'';
222 }
223
02bdd61 @NotFound First published release
authored
224 //**********************************************************************
225
226 Tokenizer::Tokenizer (std::istream &is_a, const char *filename) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored
227 is (is_a),
228 name (std::string(filename ? filename : "(unknown)")),
229 ln(1),
230 unc('\0')
02bdd61 @NotFound First published release
authored
231 {
232 }
233
234 char Tokenizer::getchar()
235 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
236 if (unc)
237 {
238 char c= unc;
239 unc = '\0';
240 return c;
241 }
242 else
243 {
244 char c= is.get();
245 if (is.eof())
246 c= '\0';
247 if (c == '\n')
248 ++ln;
249 return c;
250 }
02bdd61 @NotFound First published release
authored
251 }
252
253 void Tokenizer::ungetchar(char c)
254 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
255 unc = c;
02bdd61 @NotFound First published release
authored
256 }
257
258 std::string Tokenizer::quoted()
259 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
260 std::string s;
261 unsigned int line = ln;
262 char c;
263 while ((c= getchar()) && is && c != '"' && c != '\n')
264 {
265 if (c == '\\')
266 {
267 c= getchar();
268 switch(c)
269 {
270 case 'n':
271 s+= '\n';
272 break;
273 case 't':
274 s+= '\t';
275 break;
276 case '\\':
277 s+= '\\';
278 break;
07da7c7 @NotFound escaped double quotes in stage 0
authored
279 case '"':
280 s+= '\"';
281 break;
6fe994f @NotFound add -= operator and reformat token.cpp
authored
282 }
283 }
284 else
285 s+= c;
286 }
287 if ((!is) || c != '"')
aba4cd7 @NotFound a little cleaning of compiler exceptions
authored
288 throw SyntaxError ("Unterminated string ",
289 Token(TokenTQuoted, s, line, name));
6fe994f @NotFound add -= operator and reformat token.cpp
authored
290 return s;
02bdd61 @NotFound First published release
authored
291 }
292
293 void Tokenizer::unget (const Token & t)
294 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
295 untoc.push_back(t);
02bdd61 @NotFound First published release
authored
296 }
297
298 Token Tokenizer::getany ()
299 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
300 if (! untoc.empty () )
301 {
302 Token t(untoc.back());
303 untoc.pop_back();
304 return t;
305 }
306 char c = getchar();
307 while (is && (c == ' ' || c == '\t' || c == '\n'))
308 c= getchar();
309 if (is.eof())
310 return Token(TokenTEOF, name);
311
312 unsigned int linenum = ln;
313 std::string s(1, c);
314 switch (c) {
315 case '#':
316 c= getchar();
317 while (c != '\0' && c != '\n')
318 {
319 s+= c;
320 c= getchar();
321 }
322 return Token(TokenTComment, s, linenum, name);
323 case '/':
324 switch((c = getchar()))
325 {
326 case '/':
327 for (; (!is.eof()) && c != '\n'; c= getchar())
328 s+= c;
329 return Token(TokenTComment, s, linenum, name);
330 case '*':
331 s+= c;
332 c= getchar();
333 do
334 {
335 while (is && c != '*') {
336 s+= c;
337 c= getchar();
338 }
339 c= getchar();
340 } while (is && c != '/');
341 s+= c;
342 return Token(TokenTComment, s, linenum, name);
343 default:
344 ungetchar(c);
345 }
346 break;
347 case ':':
348 switch ((c= getchar()))
349 {
350 case ':':
351 s+= c;
352 break;
353 default:
354 ungetchar(c);
355 }
356 break;
357 case '<':
358 switch ((c= getchar()))
359 {
360 case '<': case '=':
361 s+= c;
362 break;
363 default:
364 ungetchar(c);
365 }
366 break;
7be4ee1 @NotFound add '<=' and '>=' operators and refector '<' and '>'
authored
367 case '>':
368 switch ((c= getchar()))
369 {
370 case '>': case '=':
371 s+= c;
372 break;
373 default:
374 ungetchar(c);
375 }
376 break;
6fe994f @NotFound add -= operator and reformat token.cpp
authored
377 case '=':
378 switch ((c= getchar()))
379 {
380 case ':': case '=':
381 s+= c;
382 break;
383 default:
384 ungetchar(c);
385 }
386 break;
387 case '&':
388 switch ((c= getchar()))
389 {
390 case '&': case '=':
391 s+= c;
392 break;
393 default:
394 ungetchar(c);
395 }
396 break;
397 case '|':
398 switch ((c= getchar()))
399 {
400 case '|': case '=':
401 s+= c;
402 break;
403 default:
404 ungetchar(c);
405 }
406 break;
407 case '\'':
408 s= std::string();
409 while ((c= getchar()) && is && c != '\'' && c != '\n')
410 s+= c;
411 if ((!is) || c != '\'')
aba4cd7 @NotFound a little cleaning of compiler exceptions
authored
412 {
413 throw SyntaxError("Unterminated string",
414 Token(TokenTSingleQuoted, s, linenum, name));
415 }
6fe994f @NotFound add -= operator and reformat token.cpp
authored
416 return Token(TokenTSingleQuoted, s, linenum, name);
417 case '"':
418 s= quoted ();
419 return Token(TokenTQuoted, s, linenum, name);
420 case '0': case '1': case '2': case '3': case '4':
421 case '5': case '6': case '7': case '8': case '9':
422 while ((c= getchar()) && is && c >= '0' && c <= '9')
423 s+= c;
424 if (is)
425 ungetchar(c);
426 return Token(TokenTInteger, s, linenum, name);
427 case '+':
428 switch ((c= getchar()))
d19552a @NotFound assume utf8 also in single quote in non ascii string literals
authored
429 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
430 case '+': case '=':
431 s+= c;
432 break;
433 default:
434 ungetchar(c);
435 }
436 break;
437 case '-':
438 switch ((c= getchar()))
439 {
440 case '-': case '=':
441 s+= c;
442 break;
443 default:
444 ungetchar(c);
445 }
446 break;
447 case '!':
448 c= getchar();
449 if (c == '=')
450 s+= c;
451 else
452 ungetchar(c);
453 break;
d6a4915 @NotFound new operator '%%' (cmod), suggested by PacoLinux++
authored
454 case '%':
455 c= getchar();
456 if (c == '%')
457 s+= c;
458 else
459 ungetchar(c);
460 break;
6fe994f @NotFound add -= operator and reformat token.cpp
authored
461 default:
462 if (isidentifierstart(c))
463 {
464 for (c= getchar(); isidentifier(c); c= getchar())
465 s+= c;
466 ungetchar(c);
467 return Token(TokenTIdentifier, s, linenum, name);
468 }
469 }
470 return Token(TokenTOperator, s, linenum, name);
02bdd61 @NotFound First published release
authored
471 }
472
473 Token Tokenizer::get ()
474 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored
475 Token t;
476 while ((t= getany () ).isspace())
477 continue;
478 return t;
02bdd61 @NotFound First published release
authored
479 }
480
481 // End of token.cpp
Something went wrong with that request. Please try again.