Skip to content
Newer
Older
100644 438 lines (394 sloc) 8.99 KB
02bdd61 @NotFound First published release
authored Oct 22, 2009
1 // token.cpp
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
2 // Revision 4-nov-2009
02bdd61 @NotFound First published release
authored Oct 22, 2009
3
4 #include "token.h"
5 #include "errors.h"
6
2d6aa37 @NotFound less abuse of Token::str
authored Oct 28, 2009
7 #include <sstream>
8
02bdd61 @NotFound First published release
authored Oct 22, 2009
9 //**********************************************************************
10
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored Oct 30, 2009
11 static std::string tostring(int n)
12 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
13 std::ostringstream oss;
14 oss << n;
15 return oss.str();
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored Oct 30, 2009
16 }
17
18 //**********************************************************************
19
02bdd61 @NotFound First published release
authored Oct 22, 2009
20 Token::Token () :
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
21 ttype(TokenTUnknown), ln(0)
02bdd61 @NotFound First published release
authored Oct 22, 2009
22 { }
23
24 Token::Token (TokenType tt) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
25 ttype(tt), ln(0)
02bdd61 @NotFound First published release
authored Oct 22, 2009
26 { }
27
28 Token::Token (TokenType tt, const std::string &file) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
29 ttype(tt), ln(0), filename(file)
02bdd61 @NotFound First published release
authored Oct 22, 2009
30 {
31 }
32
33 Token::Token (const std::string &ss, unsigned int linenum,
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
34 const std::string &file) :
35 ttype(TokenTUnknown), s(ss), ln(linenum), filename(file)
02bdd61 @NotFound First published release
authored Oct 22, 2009
36 { }
37
38 Token::Token (TokenType type, const std::string &ss, unsigned int linenum,
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
39 const std::string &file) :
40 ttype(type), s(ss), ln(linenum), filename(file)
02bdd61 @NotFound First published release
authored Oct 22, 2009
41 { }
42
1b6fdb5 @NotFound more cleaning of optimize functions
authored Oct 31, 2009
43 Token::Token (TokenType type, const std::string &ss, const Token &base) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
44 ttype(type), s(ss),
45 ln(base.linenum()), filename(base.file())
1b6fdb5 @NotFound more cleaning of optimize functions
authored Oct 31, 2009
46 { }
47
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored Oct 30, 2009
48 Token::Token (bool value, const Token &base) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
49 ttype(TokenTInteger), s(value ? "1" : "0"),
50 ln(base.linenum()), filename(base.file())
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored Oct 30, 2009
51 {
52 }
53
54 Token::Token (int value, const Token &base) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
55 ttype(TokenTInteger), s(tostring(value)),
56 ln(base.linenum()), filename(base.file())
c6b8de7 @NotFound some cleanup on subexpression optimizations, and optimize for == and !=
authored Oct 30, 2009
57 {
58 }
59
02bdd61 @NotFound First published release
authored Oct 22, 2009
60 bool Token::empty () const
61 { return s.empty(); }
62
2d6aa37 @NotFound less abuse of Token::str
authored Oct 28, 2009
63 int Token::getinteger() const
64 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
65 if (ttype == TokenTInteger)
66 {
67 std::istringstream iss(s);
68 int n;
69 iss >> n;
70 return n;
71 }
72 else
73 throw Expected("integer number", *this);
2d6aa37 @NotFound less abuse of Token::str
authored Oct 28, 2009
74 }
75
76 #if 1
02bdd61 @NotFound First published release
authored Oct 22, 2009
77 std::string Token::str() const
78 { return s; }
2d6aa37 @NotFound less abuse of Token::str
authored Oct 28, 2009
79 #endif
80
81 std::string Token::identifier() const
82 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
83 if (ttype == TokenTIdentifier)
84 return s;
85 else
86 throw Expected("identifier", *this);
2d6aa37 @NotFound less abuse of Token::str
authored Oct 28, 2009
87 }
88
89 std::string Token::pirliteralstring() const
90 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
91 switch (ttype) {
92 case TokenTSingleQuoted:
93 return '\'' + s + '\'';
94 case TokenTQuoted:
95 return unquote(s);
96 default:
97 throw Expected("literal string", *this);
98 }
2d6aa37 @NotFound less abuse of Token::str
authored Oct 28, 2009
99 }
02bdd61 @NotFound First published release
authored Oct 22, 2009
100
101 std::string Token::describe() const
102 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
103 switch (ttype) {
104 case TokenTEOF:
105 return "*End of file*";
106 case TokenTSingleQuoted:
107 return "'" + s + "'";
108 case TokenTQuoted:
109 return "\"" + unquote(s) + "\"";
110 case TokenTOperator:
111 return "'" + s + "'";
112 case TokenTUnknown:
113 return "*Unknown*";
114 default:
115 return s;
116 }
02bdd61 @NotFound First published release
authored Oct 22, 2009
117 }
118
119 unsigned int Token::linenum() const
120 { return ln; }
121
122 std::string Token::file() const
123 { return filename; }
124
125 bool Token::isidentifier() const
126 { return ttype == TokenTIdentifier; }
127
128 bool Token::isinteger() const
129 { return ttype == TokenTInteger; }
130
131 bool Token::issinglequoted() const
132 { return ttype == TokenTSingleQuoted; }
133
134 bool Token::isliteralstring() const
135 { return ttype == TokenTSingleQuoted || ttype == TokenTQuoted; }
136
7b6ddf3 @NotFound refactor some common token operations
authored Oct 26, 2009
137 bool Token::isop(const std::string &name) const
138 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
139 return ttype == TokenTOperator &&
140 s == name;
7b6ddf3 @NotFound refactor some common token operations
authored Oct 26, 2009
141 }
142
143 bool Token::isop(char name) const
144 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
145 return ttype == TokenTOperator &&
146 s.length() == 1 && s[0] == name;
7b6ddf3 @NotFound refactor some common token operations
authored Oct 26, 2009
147 }
148
2d6aa37 @NotFound less abuse of Token::str
authored Oct 28, 2009
149 bool Token::iskeyword(const std::string &name) const
150 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
151 return ttype == TokenTIdentifier &&
152 s == name;
2d6aa37 @NotFound less abuse of Token::str
authored Oct 28, 2009
153 }
154
02bdd61 @NotFound First published release
authored Oct 22, 2009
155 bool Token::isspace() const
156 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
157 return
158 ttype == TokenTComment ||
159 ttype == TokenTWhiteSpace ||
160 (ttype == TokenTUnknown &&
161 (s.empty() ||
162 s[0] == ' ' ||
163 s[0] == '\t' ||
164 s[0] == '\n'
165 ));
02bdd61 @NotFound First published release
authored Oct 22, 2009
166 }
167
168 //**********************************************************************
169
170 bool isidentifierstart(char c)
171 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
172 return c == '_' || isalpha((unsigned char) c);
02bdd61 @NotFound First published release
authored Oct 22, 2009
173 }
174
175 bool isidentifier(char c)
176 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
177 return c == '_' || isalnum((unsigned char) c);
02bdd61 @NotFound First published release
authored Oct 22, 2009
178 }
179
180 std::string unquote (const std::string &s)
181 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
182 std::string r;
ffcc209 @NotFound assume utf8 in non ascii string literals
authored Nov 4, 2009
183 bool nonascii= false;
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
184 for (size_t i= 0; i < s.size(); ++i)
185 {
ffcc209 @NotFound assume utf8 in non ascii string literals
authored Nov 4, 2009
186 unsigned char c= s[i];
187 if (c > 127)
188 nonascii= true;
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
189 switch(c)
190 {
191 case '\n':
192 r+= "\\n"; break;
193 case '\t':
194 r+= "\\t"; break;
195 case '\\':
196 r+= "\\\\"; break;
197 default:
198 r+= c;
199 }
200 }
ffcc209 @NotFound assume utf8 in non ascii string literals
authored Nov 4, 2009
201 return (nonascii ? "utf8:unicode:\"" : "\"") + r + "\"";
02bdd61 @NotFound First published release
authored Oct 22, 2009
202 }
203
204 //**********************************************************************
205
206 Tokenizer::Tokenizer (std::istream &is_a, const char *filename) :
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
207 is (is_a),
208 name (std::string(filename ? filename : "(unknown)")),
209 ln(1),
210 unc('\0')
02bdd61 @NotFound First published release
authored Oct 22, 2009
211 {
212 }
213
214 char Tokenizer::getchar()
215 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
216 if (unc)
217 {
218 char c= unc;
219 unc = '\0';
220 return c;
221 }
222 else
223 {
224 char c= is.get();
225 if (is.eof())
226 c= '\0';
227 if (c == '\n')
228 ++ln;
229 return c;
230 }
02bdd61 @NotFound First published release
authored Oct 22, 2009
231 }
232
233 void Tokenizer::ungetchar(char c)
234 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
235 unc = c;
02bdd61 @NotFound First published release
authored Oct 22, 2009
236 }
237
238 std::string Tokenizer::quoted()
239 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
240 std::string s;
241 unsigned int line = ln;
242 char c;
243 while ((c= getchar()) && is && c != '"' && c != '\n')
244 {
245 if (c == '\\')
246 {
247 c= getchar();
248 switch(c)
249 {
250 case 'n':
251 s+= '\n';
252 break;
253 case 't':
254 s+= '\t';
255 break;
256 case '\\':
257 s+= '\\';
258 break;
259 }
260 }
261 else
262 s+= c;
263 }
264 if ((!is) || c != '"')
265 throw SyntaxError ("Unterminated string ", line);
266 return s;
02bdd61 @NotFound First published release
authored Oct 22, 2009
267 }
268
269 void Tokenizer::unget (const Token & t)
270 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
271 untoc.push_back(t);
02bdd61 @NotFound First published release
authored Oct 22, 2009
272 }
273
274 Token Tokenizer::getany ()
275 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
276 if (! untoc.empty () )
277 {
278 Token t(untoc.back());
279 untoc.pop_back();
280 return t;
281 }
282 char c = getchar();
283 while (is && (c == ' ' || c == '\t' || c == '\n'))
284 c= getchar();
285 if (is.eof())
286 return Token(TokenTEOF, name);
287
288 unsigned int linenum = ln;
289 std::string s(1, c);
290 switch (c) {
291 case '#':
292 c= getchar();
293 while (c != '\0' && c != '\n')
294 {
295 s+= c;
296 c= getchar();
297 }
298 return Token(TokenTComment, s, linenum, name);
299 case '/':
300 switch((c = getchar()))
301 {
302 case '/':
303 for (; (!is.eof()) && c != '\n'; c= getchar())
304 s+= c;
305 return Token(TokenTComment, s, linenum, name);
306 case '*':
307 s+= c;
308 c= getchar();
309 do
310 {
311 while (is && c != '*') {
312 s+= c;
313 c= getchar();
314 }
315 c= getchar();
316 } while (is && c != '/');
317 s+= c;
318 return Token(TokenTComment, s, linenum, name);
319 default:
320 ungetchar(c);
321 }
322 break;
323 case ':':
324 switch ((c= getchar()))
325 {
326 case ':':
327 s+= c;
328 break;
329 default:
330 ungetchar(c);
331 }
332 break;
333 case '<':
334 switch ((c= getchar()))
335 {
336 case '<': case '=':
337 s+= c;
338 break;
339 default:
340 ungetchar(c);
341 }
342 break;
343 case '=':
344 switch ((c= getchar()))
345 {
346 case ':': case '=':
347 s+= c;
348 break;
349 default:
350 ungetchar(c);
351 }
352 break;
353 case '&':
354 switch ((c= getchar()))
355 {
356 case '&': case '=':
357 s+= c;
358 break;
359 default:
360 ungetchar(c);
361 }
362 break;
363 case '|':
364 switch ((c= getchar()))
365 {
366 case '|': case '=':
367 s+= c;
368 break;
369 default:
370 ungetchar(c);
371 }
372 break;
373 case '\'':
374 s= std::string();
375 while ((c= getchar()) && is && c != '\'' && c != '\n')
376 s+= c;
377 if ((!is) || c != '\'')
378 throw SyntaxError("Unterminated string", linenum);
379 return Token(TokenTSingleQuoted, s, linenum, name);
380 case '"':
381 s= quoted ();
382 return Token(TokenTQuoted, s, linenum, name);
383 case '0': case '1': case '2': case '3': case '4':
384 case '5': case '6': case '7': case '8': case '9':
385 while ((c= getchar()) && is && c >= '0' && c <= '9')
386 s+= c;
387 if (is)
388 ungetchar(c);
389 return Token(TokenTInteger, s, linenum, name);
390 case '+':
391 switch ((c= getchar()))
02bdd61 @NotFound First published release
authored Oct 22, 2009
392 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
393 case '+': case '=':
394 s+= c;
395 break;
396 default:
397 ungetchar(c);
398 }
399 break;
400 case '-':
401 switch ((c= getchar()))
402 {
403 case '-': case '=':
404 s+= c;
405 break;
406 default:
407 ungetchar(c);
408 }
409 break;
410 case '!':
411 c= getchar();
412 if (c == '=')
413 s+= c;
414 else
415 ungetchar(c);
416 break;
417 default:
418 if (isidentifierstart(c))
419 {
420 for (c= getchar(); isidentifier(c); c= getchar())
421 s+= c;
422 ungetchar(c);
423 return Token(TokenTIdentifier, s, linenum, name);
424 }
425 }
426 return Token(TokenTOperator, s, linenum, name);
02bdd61 @NotFound First published release
authored Oct 22, 2009
427 }
428
429 Token Tokenizer::get ()
430 {
6fe994f @NotFound add -= operator and reformat token.cpp
authored Nov 4, 2009
431 Token t;
432 while ((t= getany () ).isspace())
433 continue;
434 return t;
02bdd61 @NotFound First published release
authored Oct 22, 2009
435 }
436
437 // End of token.cpp
Something went wrong with that request. Please try again.