From 2ec96202ce9b287724c01e10e9ad7ceebb7bd563 Mon Sep 17 00:00:00 2001 From: Jason Frey Date: Mon, 14 Jan 2013 21:10:55 -0500 Subject: [PATCH] Moved ScalarScanner Ragel state machine from Ruby to C. --- ext/psych/psych.c | 1 + ext/psych/psych.h | 1 + ext/psych/psych_scalar_scanner.c | 490 +++++++++++++++++++++++++++ ext/psych/psych_scalar_scanner.h | 6 + ext/psych/psych_scalar_scanner.rl | 148 +++++++++ lib/psych/scalar_scanner.rb | 531 +----------------------------- lib/psych/scalar_scanner.rl | 275 ---------------- 7 files changed, 647 insertions(+), 805 deletions(-) create mode 100644 ext/psych/psych_scalar_scanner.c create mode 100644 ext/psych/psych_scalar_scanner.h create mode 100644 ext/psych/psych_scalar_scanner.rl delete mode 100644 lib/psych/scalar_scanner.rl diff --git a/ext/psych/psych.c b/ext/psych/psych.c index 69ff1d8d..e7f5a903 100644 --- a/ext/psych/psych.c +++ b/ext/psych/psych.c @@ -28,6 +28,7 @@ void Init_psych() Init_psych_parser(); Init_psych_emitter(); + Init_psych_scalar_scanner(); Init_psych_to_ruby(); Init_psych_yaml_tree(); } diff --git a/ext/psych/psych.h b/ext/psych/psych.h index 1830ca4b..4d82a3d4 100644 --- a/ext/psych/psych.h +++ b/ext/psych/psych.h @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/ext/psych/psych_scalar_scanner.c b/ext/psych/psych_scalar_scanner.c new file mode 100644 index 00000000..c2393759 --- /dev/null +++ b/ext/psych/psych_scalar_scanner.c @@ -0,0 +1,490 @@ + +#line 1 "ext/psych/psych_scalar_scanner.rl" +/* + * THIS FILE IS AUTOMATICALLY GENERATED. EDIT psych_scalar_scanner.rl INSTEAD + * + * To compile: ragel psych_scalar_scanner.rl + * To generate svg: ragel -V -p psych_scalar_scanner.rl | dot -Tsvg -o psych_scalar_scanner.svg + */ + + +#line 12 "ext/psych/psych_scalar_scanner.c" +static const char _psych_scalar_scanner_actions[] = { + 0, 1, 0, 1, 1, 1, 2, 1, + 3, 1, 4, 1, 5, 1, 6, 1, + 7, 1, 8, 1, 9, 1, 10, 1, + 11, 1, 12, 1, 13, 1, 14, 1, + 15, 1, 16 +}; + +static const short _psych_scalar_scanner_key_offsets[] = { + 0, 0, 18, 22, 24, 26, 28, 29, + 30, 31, 36, 42, 46, 49, 57, 59, + 60, 61, 62, 66, 69, 74, 78, 81, + 82, 84, 86, 87, 89, 91, 96, 98, + 100, 102, 104, 108, 109, 114, 115, 117, + 123, 126, 133, 139, 145, 147, 149, 150, + 151, 152, 153, 154, 155, 159, 160, 161, + 162, 163, 167, 168, 169, 171, 172, 173, + 174, 175, 177, 178, 179, 180, 182, 184, + 185, 186, 192, 196, 198, 198, 208, 216, + 220, 223, 225, 228, 236, 242, 247, 255, + 255, 265, 273, 281, 290, 296, 302, 305, + 308, 310, 310, 317, 321, 329, 335, 341, + 347, 353, 360, 360, 360, 360 +}; + +static const char _psych_scalar_scanner_trans_keys[] = { + 43, 45, 46, 48, 58, 70, 78, 79, + 84, 89, 102, 110, 111, 116, 121, 126, + 49, 57, 46, 48, 49, 57, 43, 45, + 48, 57, 78, 110, 70, 102, 110, 44, + 46, 95, 48, 57, 44, 46, 58, 95, + 48, 57, 48, 53, 54, 57, 95, 48, + 49, 44, 95, 48, 57, 65, 70, 97, + 102, 65, 97, 78, 97, 110, 48, 49, + 50, 57, 45, 48, 57, 51, 48, 50, + 52, 57, 9, 32, 48, 57, 58, 48, + 57, 58, 48, 57, 48, 57, 58, 48, + 57, 48, 57, 9, 32, 43, 45, 90, + 48, 57, 48, 57, 48, 57, 48, 57, + 9, 32, 84, 116, 45, 45, 48, 50, + 51, 57, 45, 48, 57, 9, 32, 84, + 116, 48, 57, 45, 48, 57, 44, 45, + 46, 58, 95, 48, 57, 44, 46, 58, + 95, 48, 57, 44, 46, 58, 95, 48, + 57, 34, 39, 65, 97, 76, 83, 69, + 108, 115, 101, 79, 85, 111, 117, 76, + 76, 108, 108, 70, 78, 102, 110, 70, + 102, 82, 114, 85, 69, 117, 101, 69, + 101, 83, 115, 97, 111, 117, 102, 110, + 114, 101, 69, 73, 101, 105, 48, 57, + 69, 101, 48, 57, 48, 57, 44, 46, + 58, 95, 98, 120, 48, 55, 56, 57, + 44, 46, 58, 95, 48, 55, 56, 57, + 46, 58, 48, 57, 95, 48, 57, 46, + 58, 95, 48, 49, 44, 95, 48, 57, + 65, 70, 97, 102, 44, 46, 58, 95, + 48, 57, 44, 46, 95, 48, 57, 69, + 73, 78, 101, 105, 110, 48, 57, 44, + 46, 58, 95, 98, 120, 48, 55, 56, + 57, 44, 46, 58, 95, 48, 55, 56, + 57, 44, 46, 58, 95, 48, 55, 56, + 57, 44, 45, 46, 58, 95, 48, 55, + 56, 57, 9, 32, 84, 116, 48, 57, + 9, 32, 43, 45, 46, 90, 58, 48, + 57, 58, 48, 57, 48, 57, 9, 32, + 43, 45, 90, 48, 57, 9, 32, 84, + 116, 9, 32, 84, 116, 48, 49, 50, + 57, 9, 32, 84, 116, 48, 57, 44, + 46, 58, 95, 48, 57, 44, 46, 58, + 95, 48, 57, 44, 46, 58, 95, 48, + 57, 44, 45, 46, 58, 95, 48, 57, + 0 +}; + +static const char _psych_scalar_scanner_single_lengths[] = { + 0, 16, 2, 2, 0, 2, 1, 1, + 1, 3, 4, 0, 1, 2, 2, 1, + 1, 1, 2, 1, 1, 2, 1, 1, + 0, 0, 1, 0, 0, 5, 0, 0, + 0, 0, 4, 1, 1, 1, 0, 4, + 1, 5, 4, 4, 2, 2, 1, 1, + 1, 1, 1, 1, 4, 1, 1, 1, + 1, 4, 1, 1, 2, 1, 1, 1, + 1, 2, 1, 1, 1, 2, 2, 1, + 1, 4, 2, 0, 0, 6, 4, 2, + 1, 2, 1, 2, 4, 3, 6, 0, + 6, 4, 4, 5, 4, 6, 1, 1, + 0, 0, 5, 4, 4, 4, 4, 4, + 4, 5, 0, 0, 0, 0 +}; + +static const char _psych_scalar_scanner_range_lengths[] = { + 0, 1, 1, 0, 1, 0, 0, 0, + 0, 1, 1, 2, 1, 3, 0, 0, + 0, 0, 1, 1, 2, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, + 1, 1, 0, 0, 2, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 0, 2, 2, 1, + 1, 0, 1, 3, 1, 1, 1, 0, + 2, 2, 2, 2, 1, 0, 1, 1, + 1, 0, 1, 0, 2, 1, 1, 1, + 1, 1, 0, 0, 0, 0 +}; + +static const short _psych_scalar_scanner_index_offsets[] = { + 0, 0, 18, 22, 25, 27, 30, 32, + 34, 36, 41, 47, 50, 53, 59, 62, + 64, 66, 68, 72, 75, 79, 83, 86, + 88, 90, 92, 94, 96, 98, 104, 106, + 108, 110, 112, 117, 119, 123, 125, 127, + 133, 136, 143, 149, 155, 158, 161, 163, + 165, 167, 169, 171, 173, 178, 180, 182, + 184, 186, 191, 193, 195, 198, 200, 202, + 204, 206, 209, 211, 213, 215, 218, 221, + 223, 225, 231, 235, 237, 238, 247, 254, + 258, 261, 264, 267, 273, 279, 284, 292, + 293, 302, 309, 316, 324, 330, 337, 340, + 343, 345, 346, 353, 358, 365, 371, 377, + 383, 389, 396, 397, 398, 399 +}; + +static const char _psych_scalar_scanner_indicies[] = { + 1, 1, 2, 3, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 17, 18, 19, 0, 20, 20, + 0, 21, 0, 22, 23, 0, 24, 0, + 24, 0, 23, 0, 25, 26, 25, 25, + 0, 25, 26, 28, 27, 27, 0, 29, + 30, 0, 31, 31, 0, 32, 32, 32, + 32, 32, 0, 33, 33, 0, 34, 0, + 35, 0, 34, 0, 36, 37, 38, 0, + 39, 40, 0, 42, 41, 43, 0, 44, + 44, 45, 0, 47, 46, 0, 47, 0, + 48, 0, 49, 0, 50, 0, 51, 0, + 52, 0, 53, 53, 54, 54, 55, 0, + 56, 0, 57, 0, 55, 0, 45, 0, + 44, 44, 58, 58, 0, 39, 0, 39, + 40, 59, 0, 60, 0, 61, 0, 44, + 44, 58, 58, 62, 0, 39, 59, 0, + 25, 63, 26, 28, 27, 27, 0, 25, + 26, 28, 27, 64, 0, 25, 26, 28, + 27, 65, 0, 67, 67, 66, 68, 69, + 0, 70, 0, 71, 0, 72, 0, 73, + 0, 74, 0, 72, 0, 72, 75, 72, + 76, 0, 77, 0, 16, 0, 78, 0, + 16, 0, 79, 80, 81, 80, 0, 72, + 0, 72, 0, 82, 83, 0, 84, 0, + 80, 0, 85, 0, 80, 0, 86, 87, + 0, 80, 0, 80, 0, 69, 0, 72, + 76, 0, 81, 80, 0, 83, 0, 87, + 0, 88, 89, 88, 90, 26, 0, 88, + 88, 26, 0, 21, 0, 0, 25, 26, + 28, 91, 92, 93, 91, 27, 0, 25, + 26, 28, 91, 91, 27, 0, 94, 28, + 30, 0, 94, 94, 0, 94, 28, 0, + 31, 31, 0, 32, 32, 32, 32, 32, + 0, 95, 26, 28, 19, 19, 0, 95, + 26, 95, 95, 0, 88, 89, 96, 88, + 90, 97, 26, 0, 0, 25, 26, 28, + 91, 92, 93, 98, 99, 0, 25, 26, + 28, 91, 100, 65, 0, 25, 26, 28, + 91, 101, 64, 0, 25, 63, 26, 28, + 91, 91, 27, 0, 44, 44, 58, 58, + 102, 0, 53, 53, 54, 54, 103, 55, + 0, 105, 104, 0, 105, 106, 0, 55, + 0, 0, 53, 53, 54, 54, 55, 103, + 0, 44, 44, 58, 58, 0, 44, 44, + 58, 58, 102, 62, 0, 44, 44, 58, + 58, 62, 0, 95, 26, 28, 19, 107, + 0, 95, 26, 28, 19, 108, 0, 95, + 26, 28, 19, 109, 0, 95, 63, 26, + 28, 19, 19, 0, 0, 0, 0, 0, + 0 +}; + +static const char _psych_scalar_scanner_trans_targs[] = { + 0, 2, 86, 88, 102, 44, 45, 52, + 57, 60, 65, 68, 69, 70, 71, 72, + 108, 73, 77, 84, 4, 75, 6, 7, + 76, 9, 74, 10, 11, 79, 81, 82, + 83, 15, 87, 17, 19, 36, 40, 20, + 35, 92, 100, 101, 21, 22, 23, 24, + 25, 26, 27, 28, 93, 29, 30, 97, + 94, 32, 33, 37, 38, 39, 34, 18, + 41, 42, 106, 106, 46, 49, 47, 48, + 107, 50, 51, 53, 55, 54, 56, 58, + 109, 59, 61, 63, 62, 64, 66, 67, + 3, 5, 8, 78, 12, 13, 80, 85, + 14, 16, 89, 43, 90, 91, 99, 98, + 95, 31, 96, 103, 104, 105 +}; + +static const char _psych_scalar_scanner_trans_actions[] = { + 33, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 31, 29, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +}; + +static const char _psych_scalar_scanner_eof_actions[] = { + 0, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 17, 17, 17, 21, 11, 9, 15, + 19, 15, 7, 13, 11, 11, 17, 23, + 11, 9, 9, 9, 25, 27, 27, 27, + 27, 27, 27, 25, 25, 25, 11, 11, + 11, 11, 0, 5, 1, 3 +}; + +static const int psych_scalar_scanner_start = 1; +static const int psych_scalar_scanner_first_final = 73; +static const int psych_scalar_scanner_error = 0; + +static const int psych_scalar_scanner_en_main = 1; + + +#line 85 "ext/psych/psych_scalar_scanner.rl" + + + +#include + +VALUE cPsychScalarScanner; +ID id_parse_null_value; +ID id_parse_bool_true; +ID id_parse_bool_false; +ID id_parse_int_base_2; +ID id_parse_int_base_8; +ID id_parse_int_base_10; +ID id_parse_int_base_16; +ID id_parse_int_base_60; +ID id_parse_float_base_10; +ID id_parse_float_base_60; +ID id_parse_float_inf; +ID id_parse_float_nan; +ID id_parse_time_ymd; +ID id_parse_time_full; +ID id_parse_symbol_quoted; +ID id_parse_symbol_unquoted; +ID id_parse_string; + +static VALUE ss_raw_tokenize(VALUE self, VALUE string) +{ + int cs; + char* p = StringValuePtr(string); + char* pe = p + RSTRING_LEN(string); + char* eof = pe; + + +#line 283 "ext/psych/psych_scalar_scanner.c" + { + cs = psych_scalar_scanner_start; + } + +#line 117 "ext/psych/psych_scalar_scanner.rl" + +#line 290 "ext/psych/psych_scalar_scanner.c" + { + int _klen; + unsigned int _trans; + const char *_acts; + unsigned int _nacts; + const char *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _psych_scalar_scanner_trans_keys + _psych_scalar_scanner_key_offsets[cs]; + _trans = _psych_scalar_scanner_index_offsets[cs]; + + _klen = _psych_scalar_scanner_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = _psych_scalar_scanner_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + _trans = _psych_scalar_scanner_indicies[_trans]; + cs = _psych_scalar_scanner_trans_targs[_trans]; + + if ( _psych_scalar_scanner_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _psych_scalar_scanner_actions + _psych_scalar_scanner_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 14: +#line 27 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_symbol_quoted, 1, string)); } + break; + case 15: +#line 28 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_symbol_unquoted, 1, string)); } + break; + case 16: +#line 29 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_string, 1, string)); } + break; +#line 376 "ext/psych/psych_scalar_scanner.c" + } + } + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + const char *__acts = _psych_scalar_scanner_actions + _psych_scalar_scanner_eof_actions[cs]; + unsigned int __nacts = (unsigned int) *__acts++; + while ( __nacts-- > 0 ) { + switch ( *__acts++ ) { + case 0: +#line 13 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_null_value, 1, string)); } + break; + case 1: +#line 14 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_bool_true, 1, string)); } + break; + case 2: +#line 15 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_bool_false, 1, string)); } + break; + case 3: +#line 16 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_int_base_2, 1, string)); } + break; + case 4: +#line 17 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_int_base_8, 1, string)); } + break; + case 5: +#line 18 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_int_base_10, 1, string)); } + break; + case 6: +#line 19 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_int_base_16, 1, string)); } + break; + case 7: +#line 20 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_int_base_60, 1, string)); } + break; + case 8: +#line 21 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_float_base_10, 1, string)); } + break; + case 9: +#line 22 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_float_base_60, 1, string)); } + break; + case 10: +#line 23 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_float_inf, 1, string)); } + break; + case 11: +#line 24 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_float_nan, 1, string)); } + break; + case 12: +#line 25 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_time_ymd, 1, string)); } + break; + case 13: +#line 26 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_time_full, 1, string)); } + break; + case 16: +#line 29 "ext/psych/psych_scalar_scanner.rl" + { return(rb_funcall(self, id_parse_string, 1, string)); } + break; +#line 452 "ext/psych/psych_scalar_scanner.c" + } + } + } + + _out: {} + } + +#line 118 "ext/psych/psych_scalar_scanner.rl" + + return string; +} + +void Init_psych_scalar_scanner() +{ + VALUE psych = rb_define_module("Psych"); + cPsychScalarScanner = rb_define_class_under(psych, "ScalarScanner", rb_cObject); + + rb_define_private_method(cPsychScalarScanner, "raw_tokenize", ss_raw_tokenize, 1); + + id_parse_null_value = rb_intern("parse_null_value"); + id_parse_bool_true = rb_intern("parse_bool_true"); + id_parse_bool_false = rb_intern("parse_bool_false"); + id_parse_int_base_2 = rb_intern("parse_int_base_2"); + id_parse_int_base_8 = rb_intern("parse_int_base_8"); + id_parse_int_base_10 = rb_intern("parse_int_base_10"); + id_parse_int_base_16 = rb_intern("parse_int_base_16"); + id_parse_int_base_60 = rb_intern("parse_int_base_60"); + id_parse_float_base_10 = rb_intern("parse_float_base_10"); + id_parse_float_base_60 = rb_intern("parse_float_base_60"); + id_parse_float_inf = rb_intern("parse_float_inf"); + id_parse_float_nan = rb_intern("parse_float_nan"); + id_parse_time_ymd = rb_intern("parse_time_ymd"); + id_parse_time_full = rb_intern("parse_time_full"); + id_parse_symbol_quoted = rb_intern("parse_symbol_quoted"); + id_parse_symbol_unquoted = rb_intern("parse_symbol_unquoted"); + id_parse_string = rb_intern("parse_string"); +} + +/* vim: set noet sws=4 sw=4: */ diff --git a/ext/psych/psych_scalar_scanner.h b/ext/psych/psych_scalar_scanner.h new file mode 100644 index 00000000..5f4082e9 --- /dev/null +++ b/ext/psych/psych_scalar_scanner.h @@ -0,0 +1,6 @@ +#ifndef PSYCH_PARSER_H +#define PSYCH_PARSER_H + +void Init_psych_scalar_scanner(); + +#endif diff --git a/ext/psych/psych_scalar_scanner.rl b/ext/psych/psych_scalar_scanner.rl new file mode 100644 index 00000000..feb36bd5 --- /dev/null +++ b/ext/psych/psych_scalar_scanner.rl @@ -0,0 +1,148 @@ +/* + * THIS FILE IS AUTOMATICALLY GENERATED. EDIT psych_scalar_scanner.rl INSTEAD + * + * To compile: ragel -L psych_scalar_scanner.rl + * To generate svg: ragel -V -p psych_scalar_scanner.rl | dot -Tsvg -o psych_scalar_scanner.svg + */ + +%%{ + machine psych_scalar_scanner; + + ### + # Actions + action on_null_value { return(rb_funcall(self, id_parse_null_value, 1, string)); } + action on_bool_true { return(rb_funcall(self, id_parse_bool_true, 1, string)); } + action on_bool_false { return(rb_funcall(self, id_parse_bool_false, 1, string)); } + action on_int_base_2 { return(rb_funcall(self, id_parse_int_base_2, 1, string)); } + action on_int_base_8 { return(rb_funcall(self, id_parse_int_base_8, 1, string)); } + action on_int_base_10 { return(rb_funcall(self, id_parse_int_base_10, 1, string)); } + action on_int_base_16 { return(rb_funcall(self, id_parse_int_base_16, 1, string)); } + action on_int_base_60 { return(rb_funcall(self, id_parse_int_base_60, 1, string)); } + action on_float_base_10 { return(rb_funcall(self, id_parse_float_base_10, 1, string)); } + action on_float_base_60 { return(rb_funcall(self, id_parse_float_base_60, 1, string)); } + action on_float_inf { return(rb_funcall(self, id_parse_float_inf, 1, string)); } + action on_float_nan { return(rb_funcall(self, id_parse_float_nan, 1, string)); } + action on_time_ymd { return(rb_funcall(self, id_parse_time_ymd, 1, string)); } + action on_time_full { return(rb_funcall(self, id_parse_time_full, 1, string)); } + action on_symbol_quoted { return(rb_funcall(self, id_parse_symbol_quoted, 1, string)); } + action on_symbol_unquoted { return(rb_funcall(self, id_parse_symbol_unquoted, 1, string)); } + action on_string { return(rb_funcall(self, id_parse_string, 1, string)); } + + ### + # Null - http://yaml.org/type/null.html + null_value = ('~'|'null'|'Null'|'NULL') %on_null_value ; + + ### + # Boolean - http://yaml.org/type/bool.html + bool_true = ('yes'|'Yes'|'YES'|'true'|'True'|'TRUE'|'on'|'On'|'ON') %on_bool_true ; # NOTE: 'y'|'Y' removed from YAML spec + bool_false = ('no'|'No'|'NO'|'false'|'False'|'FALSE'|'off'|'Off'|'OFF') %on_bool_false ; # NOTE: 'n'|'N' removed from YAML spec + + bool = bool_true | bool_false ; + + ### + # Integer - http://yaml.org/type/int.html + int_base_2 = [\-+]?'0b'[0-1_]+ %on_int_base_2 ; + int_base_8 = [\-+]?'0'[0-7_]+ %on_int_base_8 ; + int_base_10 = [\-+]?('0'|[1-9][0-9_,]*) %on_int_base_10 ; # NOTE: comma not in YAML spec + int_base_16 = [\-+]?'0x'[0-9a-fA-F_,]+ %on_int_base_16 ; # NOTE: comma not in YAML spec + int_base_60 = [\-+]?[0-9][0-9_]*(':'[0-5]?[0-9])+ %on_int_base_60 ; # NOTE: YAML spec has leading digit as [1-9] + + int = int_base_2 | int_base_8 | int_base_10 | int_base_16 | int_base_60 ; + + ### + # Float - http://yaml.org/type/float.html + float_base_10 = [\-+]?([0-9][0-9_,]*)?'.'[0-9]*([eE][\-+][0-9]+)? %on_float_base_10 ; # NOTE: comma not in YAML spec; dot removed from [0-9.] after initial decimal point + float_base_60 = [\-+]?[0-9][0-9_]*(':'[0-5]?[0-9])+'.'[0-9_]* %on_float_base_60 ; + float_inf = [\-+]?'.'('inf'|'Inf'|'INF') %on_float_inf ; + float_nan = '.'('nan'|'NaN'|'NAN') %on_float_nan ; + + float = float_base_10 | float_base_60 | float_inf | float_nan ; + + ### + # Time - http://yaml.org/type/timestamp.html + time_ymd = [0-9][0-9][0-9][0-9]'-'('1'[012]|'0'[0-9]|[0-9])'-'([12][0-9]|'3'[01]|'0'[0-9]|[0-9]) %on_time_ymd ; + # NOTE: YAML spec originally [0-9][0-9][0-9][0-9]'-'[0-9][0-9]'-'[0-9][0-9] + # - Individual date portions were made more explicit + + time_full = [0-9][0-9][0-9][0-9]'-'[0-9][0-9]?'-'[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?':'[0-9][0-9]':'[0-9][0-9]('.'[0-9]*)?([ \t]*('Z'|[\-+][0-9][0-9]?(':'?[0-9][0-9])?))? %on_time_full ; + # NOTE: YAML spec originally [0-9][0-9][0-9][0-9]'-'[0-9][0-9]?'-'[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?':'[0-9][0-9]':'[0-9][0-9]('.'[0-9]*)?(([ \t]*)'Z'|sign[0-9][0-9]?(':'[0-9][0-9])?)? %on_time_full ; + # - Spacing before time zone was moved to not only be before Z, but to also + # be before the numeric time zone. + # - Colon in time zone made optional + + time = time_ymd | time_full; + + ### + # Symbol + symbol_quoted = ':'['"] @on_symbol_quoted ; #' + symbol_unquoted = ':'[^'"] @on_symbol_unquoted ; #' + + symbol = symbol_quoted | symbol_unquoted ; + + + main := (null_value | bool | int | float | time | symbol) $!on_string ; + write data; +}%% + + +#include + +VALUE cPsychScalarScanner; +ID id_parse_null_value; +ID id_parse_bool_true; +ID id_parse_bool_false; +ID id_parse_int_base_2; +ID id_parse_int_base_8; +ID id_parse_int_base_10; +ID id_parse_int_base_16; +ID id_parse_int_base_60; +ID id_parse_float_base_10; +ID id_parse_float_base_60; +ID id_parse_float_inf; +ID id_parse_float_nan; +ID id_parse_time_ymd; +ID id_parse_time_full; +ID id_parse_symbol_quoted; +ID id_parse_symbol_unquoted; +ID id_parse_string; + +static VALUE ss_raw_tokenize(VALUE self, VALUE string) +{ + int cs; + char* p = StringValuePtr(string); + char* pe = p + RSTRING_LEN(string); + char* eof = pe; + + %% write init; + %% write exec; + + return string; +} + +void Init_psych_scalar_scanner() +{ + VALUE psych = rb_define_module("Psych"); + cPsychScalarScanner = rb_define_class_under(psych, "ScalarScanner", rb_cObject); + + rb_define_private_method(cPsychScalarScanner, "raw_tokenize", ss_raw_tokenize, 1); + + id_parse_null_value = rb_intern("parse_null_value"); + id_parse_bool_true = rb_intern("parse_bool_true"); + id_parse_bool_false = rb_intern("parse_bool_false"); + id_parse_int_base_2 = rb_intern("parse_int_base_2"); + id_parse_int_base_8 = rb_intern("parse_int_base_8"); + id_parse_int_base_10 = rb_intern("parse_int_base_10"); + id_parse_int_base_16 = rb_intern("parse_int_base_16"); + id_parse_int_base_60 = rb_intern("parse_int_base_60"); + id_parse_float_base_10 = rb_intern("parse_float_base_10"); + id_parse_float_base_60 = rb_intern("parse_float_base_60"); + id_parse_float_inf = rb_intern("parse_float_inf"); + id_parse_float_nan = rb_intern("parse_float_nan"); + id_parse_time_ymd = rb_intern("parse_time_ymd"); + id_parse_time_full = rb_intern("parse_time_full"); + id_parse_symbol_quoted = rb_intern("parse_symbol_quoted"); + id_parse_symbol_unquoted = rb_intern("parse_symbol_unquoted"); + id_parse_string = rb_intern("parse_string"); +} + +/* vim: set noet sws=4 sw=4: */ diff --git a/lib/psych/scalar_scanner.rb b/lib/psych/scalar_scanner.rb index 78c8bbd1..6c096a8e 100644 --- a/lib/psych/scalar_scanner.rb +++ b/lib/psych/scalar_scanner.rb @@ -1,316 +1,7 @@ - -# -# THIS FILE IS AUTOMATICALLY GENERATED. EDIT scalar_scanner.rl INSTEAD -# -# To compile: ragel -R -L scalar_scanner.rl -# To generate svg: ragel -R -V -p scalar_scanner.rl | dot -Tsvg -o scalar_scanner.svg -# - -=begin - - -=end - module Psych ### # Scan scalars for built in types class ScalarScanner - ### - # START OF SCANNER DATA STRUCTURES - -class << self - attr_accessor :_yaml_scalar_scanner_actions - private :_yaml_scalar_scanner_actions, :_yaml_scalar_scanner_actions= -end -self._yaml_scalar_scanner_actions = [ - 0, 1, 0, 1, 1, 1, 2, 1, - 3, 1, 4, 1, 5, 1, 6, 1, - 7, 1, 8, 1, 9, 1, 10, 1, - 11, 1, 12, 1, 13, 1, 14, 1, - 15, 1, 16 -] - -class << self - attr_accessor :_yaml_scalar_scanner_key_offsets - private :_yaml_scalar_scanner_key_offsets, :_yaml_scalar_scanner_key_offsets= -end -self._yaml_scalar_scanner_key_offsets = [ - 0, 0, 18, 22, 24, 26, 28, 29, - 30, 31, 36, 42, 46, 49, 57, 59, - 60, 61, 62, 66, 69, 74, 78, 81, - 82, 84, 86, 87, 89, 91, 96, 98, - 100, 102, 104, 108, 109, 114, 115, 117, - 123, 126, 133, 139, 145, 147, 149, 150, - 151, 152, 153, 154, 155, 159, 160, 161, - 162, 163, 167, 168, 169, 171, 172, 173, - 174, 175, 177, 178, 179, 180, 182, 184, - 185, 186, 192, 196, 198, 198, 208, 216, - 220, 223, 225, 228, 236, 242, 247, 255, - 255, 265, 273, 281, 290, 296, 302, 305, - 308, 310, 310, 317, 321, 329, 335, 341, - 347, 353, 360, 360, 360, 360 -] - -class << self - attr_accessor :_yaml_scalar_scanner_trans_keys - private :_yaml_scalar_scanner_trans_keys, :_yaml_scalar_scanner_trans_keys= -end -self._yaml_scalar_scanner_trans_keys = [ - 43, 45, 46, 48, 58, 70, 78, 79, - 84, 89, 102, 110, 111, 116, 121, 126, - 49, 57, 46, 48, 49, 57, 43, 45, - 48, 57, 78, 110, 70, 102, 110, 44, - 46, 95, 48, 57, 44, 46, 58, 95, - 48, 57, 48, 53, 54, 57, 95, 48, - 49, 44, 95, 48, 57, 65, 70, 97, - 102, 65, 97, 78, 97, 110, 48, 49, - 50, 57, 45, 48, 57, 51, 48, 50, - 52, 57, 9, 32, 48, 57, 58, 48, - 57, 58, 48, 57, 48, 57, 58, 48, - 57, 48, 57, 9, 32, 43, 45, 90, - 48, 57, 48, 57, 48, 57, 48, 57, - 9, 32, 84, 116, 45, 45, 48, 50, - 51, 57, 45, 48, 57, 9, 32, 84, - 116, 48, 57, 45, 48, 57, 44, 45, - 46, 58, 95, 48, 57, 44, 46, 58, - 95, 48, 57, 44, 46, 58, 95, 48, - 57, 34, 39, 65, 97, 76, 83, 69, - 108, 115, 101, 79, 85, 111, 117, 76, - 76, 108, 108, 70, 78, 102, 110, 70, - 102, 82, 114, 85, 69, 117, 101, 69, - 101, 83, 115, 97, 111, 117, 102, 110, - 114, 101, 69, 73, 101, 105, 48, 57, - 69, 101, 48, 57, 48, 57, 44, 46, - 58, 95, 98, 120, 48, 55, 56, 57, - 44, 46, 58, 95, 48, 55, 56, 57, - 46, 58, 48, 57, 95, 48, 57, 46, - 58, 95, 48, 49, 44, 95, 48, 57, - 65, 70, 97, 102, 44, 46, 58, 95, - 48, 57, 44, 46, 95, 48, 57, 69, - 73, 78, 101, 105, 110, 48, 57, 44, - 46, 58, 95, 98, 120, 48, 55, 56, - 57, 44, 46, 58, 95, 48, 55, 56, - 57, 44, 46, 58, 95, 48, 55, 56, - 57, 44, 45, 46, 58, 95, 48, 55, - 56, 57, 9, 32, 84, 116, 48, 57, - 9, 32, 43, 45, 46, 90, 58, 48, - 57, 58, 48, 57, 48, 57, 9, 32, - 43, 45, 90, 48, 57, 9, 32, 84, - 116, 9, 32, 84, 116, 48, 49, 50, - 57, 9, 32, 84, 116, 48, 57, 44, - 46, 58, 95, 48, 57, 44, 46, 58, - 95, 48, 57, 44, 46, 58, 95, 48, - 57, 44, 45, 46, 58, 95, 48, 57, - 0 -] - -class << self - attr_accessor :_yaml_scalar_scanner_single_lengths - private :_yaml_scalar_scanner_single_lengths, :_yaml_scalar_scanner_single_lengths= -end -self._yaml_scalar_scanner_single_lengths = [ - 0, 16, 2, 2, 0, 2, 1, 1, - 1, 3, 4, 0, 1, 2, 2, 1, - 1, 1, 2, 1, 1, 2, 1, 1, - 0, 0, 1, 0, 0, 5, 0, 0, - 0, 0, 4, 1, 1, 1, 0, 4, - 1, 5, 4, 4, 2, 2, 1, 1, - 1, 1, 1, 1, 4, 1, 1, 1, - 1, 4, 1, 1, 2, 1, 1, 1, - 1, 2, 1, 1, 1, 2, 2, 1, - 1, 4, 2, 0, 0, 6, 4, 2, - 1, 2, 1, 2, 4, 3, 6, 0, - 6, 4, 4, 5, 4, 6, 1, 1, - 0, 0, 5, 4, 4, 4, 4, 4, - 4, 5, 0, 0, 0, 0 -] - -class << self - attr_accessor :_yaml_scalar_scanner_range_lengths - private :_yaml_scalar_scanner_range_lengths, :_yaml_scalar_scanner_range_lengths= -end -self._yaml_scalar_scanner_range_lengths = [ - 0, 1, 1, 0, 1, 0, 0, 0, - 0, 1, 1, 2, 1, 3, 0, 0, - 0, 0, 1, 1, 2, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, - 1, 1, 0, 0, 2, 0, 1, 1, - 1, 1, 1, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 0, 2, 2, 1, - 1, 0, 1, 3, 1, 1, 1, 0, - 2, 2, 2, 2, 1, 0, 1, 1, - 1, 0, 1, 0, 2, 1, 1, 1, - 1, 1, 0, 0, 0, 0 -] - -class << self - attr_accessor :_yaml_scalar_scanner_index_offsets - private :_yaml_scalar_scanner_index_offsets, :_yaml_scalar_scanner_index_offsets= -end -self._yaml_scalar_scanner_index_offsets = [ - 0, 0, 18, 22, 25, 27, 30, 32, - 34, 36, 41, 47, 50, 53, 59, 62, - 64, 66, 68, 72, 75, 79, 83, 86, - 88, 90, 92, 94, 96, 98, 104, 106, - 108, 110, 112, 117, 119, 123, 125, 127, - 133, 136, 143, 149, 155, 158, 161, 163, - 165, 167, 169, 171, 173, 178, 180, 182, - 184, 186, 191, 193, 195, 198, 200, 202, - 204, 206, 209, 211, 213, 215, 218, 221, - 223, 225, 231, 235, 237, 238, 247, 254, - 258, 261, 264, 267, 273, 279, 284, 292, - 293, 302, 309, 316, 324, 330, 337, 340, - 343, 345, 346, 353, 358, 365, 371, 377, - 383, 389, 396, 397, 398, 399 -] - -class << self - attr_accessor :_yaml_scalar_scanner_indicies - private :_yaml_scalar_scanner_indicies, :_yaml_scalar_scanner_indicies= -end -self._yaml_scalar_scanner_indicies = [ - 1, 1, 2, 3, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, - 4, 0, 17, 18, 19, 0, 20, 20, - 0, 21, 0, 22, 23, 0, 24, 0, - 24, 0, 23, 0, 25, 26, 25, 25, - 0, 25, 26, 28, 27, 27, 0, 29, - 30, 0, 31, 31, 0, 32, 32, 32, - 32, 32, 0, 33, 33, 0, 34, 0, - 35, 0, 34, 0, 36, 37, 38, 0, - 39, 40, 0, 42, 41, 43, 0, 44, - 44, 45, 0, 47, 46, 0, 47, 0, - 48, 0, 49, 0, 50, 0, 51, 0, - 52, 0, 53, 53, 54, 54, 55, 0, - 56, 0, 57, 0, 55, 0, 45, 0, - 44, 44, 58, 58, 0, 39, 0, 39, - 40, 59, 0, 60, 0, 61, 0, 44, - 44, 58, 58, 62, 0, 39, 59, 0, - 25, 63, 26, 28, 27, 27, 0, 25, - 26, 28, 27, 64, 0, 25, 26, 28, - 27, 65, 0, 67, 67, 66, 68, 69, - 0, 70, 0, 71, 0, 72, 0, 73, - 0, 74, 0, 72, 0, 72, 75, 72, - 76, 0, 77, 0, 16, 0, 78, 0, - 16, 0, 79, 80, 81, 80, 0, 72, - 0, 72, 0, 82, 83, 0, 84, 0, - 80, 0, 85, 0, 80, 0, 86, 87, - 0, 80, 0, 80, 0, 69, 0, 72, - 76, 0, 81, 80, 0, 83, 0, 87, - 0, 88, 89, 88, 90, 26, 0, 88, - 88, 26, 0, 21, 0, 0, 25, 26, - 28, 91, 92, 93, 91, 27, 0, 25, - 26, 28, 91, 91, 27, 0, 94, 28, - 30, 0, 94, 94, 0, 94, 28, 0, - 31, 31, 0, 32, 32, 32, 32, 32, - 0, 95, 26, 28, 19, 19, 0, 95, - 26, 95, 95, 0, 88, 89, 96, 88, - 90, 97, 26, 0, 0, 25, 26, 28, - 91, 92, 93, 98, 99, 0, 25, 26, - 28, 91, 100, 65, 0, 25, 26, 28, - 91, 101, 64, 0, 25, 63, 26, 28, - 91, 91, 27, 0, 44, 44, 58, 58, - 102, 0, 53, 53, 54, 54, 103, 55, - 0, 105, 104, 0, 105, 106, 0, 55, - 0, 0, 53, 53, 54, 54, 55, 103, - 0, 44, 44, 58, 58, 0, 44, 44, - 58, 58, 102, 62, 0, 44, 44, 58, - 58, 62, 0, 95, 26, 28, 19, 107, - 0, 95, 26, 28, 19, 108, 0, 95, - 26, 28, 19, 109, 0, 95, 63, 26, - 28, 19, 19, 0, 0, 0, 0, 0, - 0 -] - -class << self - attr_accessor :_yaml_scalar_scanner_trans_targs - private :_yaml_scalar_scanner_trans_targs, :_yaml_scalar_scanner_trans_targs= -end -self._yaml_scalar_scanner_trans_targs = [ - 0, 2, 86, 88, 102, 44, 45, 52, - 57, 60, 65, 68, 69, 70, 71, 72, - 108, 73, 77, 84, 4, 75, 6, 7, - 76, 9, 74, 10, 11, 79, 81, 82, - 83, 15, 87, 17, 19, 36, 40, 20, - 35, 92, 100, 101, 21, 22, 23, 24, - 25, 26, 27, 28, 93, 29, 30, 97, - 94, 32, 33, 37, 38, 39, 34, 18, - 41, 42, 106, 106, 46, 49, 47, 48, - 107, 50, 51, 53, 55, 54, 56, 58, - 109, 59, 61, 63, 62, 64, 66, 67, - 3, 5, 8, 78, 12, 13, 80, 85, - 14, 16, 89, 43, 90, 91, 99, 98, - 95, 31, 96, 103, 104, 105 -] - -class << self - attr_accessor :_yaml_scalar_scanner_trans_actions - private :_yaml_scalar_scanner_trans_actions, :_yaml_scalar_scanner_trans_actions= -end -self._yaml_scalar_scanner_trans_actions = [ - 33, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 31, 29, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0 -] - -class << self - attr_accessor :_yaml_scalar_scanner_eof_actions - private :_yaml_scalar_scanner_eof_actions, :_yaml_scalar_scanner_eof_actions= -end -self._yaml_scalar_scanner_eof_actions = [ - 0, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, - 33, 17, 17, 17, 21, 11, 9, 15, - 19, 15, 7, 13, 11, 11, 17, 23, - 11, 9, 9, 9, 25, 27, 27, 27, - 27, 27, 27, 25, 25, 25, 11, 11, - 11, 11, 0, 5, 1, 3 -] - -class << self - attr_accessor :yaml_scalar_scanner_start -end -self.yaml_scalar_scanner_start = 1; -class << self - attr_accessor :yaml_scalar_scanner_first_final -end -self.yaml_scalar_scanner_first_final = 73; -class << self - attr_accessor :yaml_scalar_scanner_error -end -self.yaml_scalar_scanner_error = 0; - -class << self - attr_accessor :yaml_scalar_scanner_en_main -end -self.yaml_scalar_scanner_en_main = 1; - - - # %% this just fixes our syntax highlighting... - # END OF SCANNER DATA STRUCTURES - ### - def initialize @string_cache = {} @symbol_cache = {} @@ -320,203 +11,7 @@ def tokenize string return nil if string.empty? return string if @string_cache.key?(string) return @symbol_cache[string] if @symbol_cache.key?(string) - - data = string_to_data(string) - eof = data.length - - ### - # START OF SCANNER INITIALIZATION - -begin - p ||= 0 - pe ||= data.length - cs = yaml_scalar_scanner_start -end - - # %% this just fixes our syntax highlighting... - # END OF SCANNER INITIALIZATION - ### - - ### - # START OF SCANNER EXECUTION - -begin - _klen, _trans, _keys, _acts, _nacts = nil - _goto_level = 0 - _resume = 10 - _eof_trans = 15 - _again = 20 - _test_eof = 30 - _out = 40 - while true - _trigger_goto = false - if _goto_level <= 0 - if p == pe - _goto_level = _test_eof - next - end - if cs == 0 - _goto_level = _out - next - end - end - if _goto_level <= _resume - _keys = _yaml_scalar_scanner_key_offsets[cs] - _trans = _yaml_scalar_scanner_index_offsets[cs] - _klen = _yaml_scalar_scanner_single_lengths[cs] - _break_match = false - - begin - if _klen > 0 - _lower = _keys - _upper = _keys + _klen - 1 - - loop do - break if _upper < _lower - _mid = _lower + ( (_upper - _lower) >> 1 ) - - if data[p].ord < _yaml_scalar_scanner_trans_keys[_mid] - _upper = _mid - 1 - elsif data[p].ord > _yaml_scalar_scanner_trans_keys[_mid] - _lower = _mid + 1 - else - _trans += (_mid - _keys) - _break_match = true - break - end - end # loop - break if _break_match - _keys += _klen - _trans += _klen - end - _klen = _yaml_scalar_scanner_range_lengths[cs] - if _klen > 0 - _lower = _keys - _upper = _keys + (_klen << 1) - 2 - loop do - break if _upper < _lower - _mid = _lower + (((_upper-_lower) >> 1) & ~1) - if data[p].ord < _yaml_scalar_scanner_trans_keys[_mid] - _upper = _mid - 2 - elsif data[p].ord > _yaml_scalar_scanner_trans_keys[_mid+1] - _lower = _mid + 2 - else - _trans += ((_mid - _keys) >> 1) - _break_match = true - break - end - end # loop - break if _break_match - _trans += _klen - end - end while false - _trans = _yaml_scalar_scanner_indicies[_trans] - cs = _yaml_scalar_scanner_trans_targs[_trans] - if _yaml_scalar_scanner_trans_actions[_trans] != 0 - _acts = _yaml_scalar_scanner_trans_actions[_trans] - _nacts = _yaml_scalar_scanner_actions[_acts] - _acts += 1 - while _nacts > 0 - _nacts -= 1 - _acts += 1 - case _yaml_scalar_scanner_actions[_acts - 1] -when 14 then - begin - return parse_symbol_quoted string end -when 15 then - begin - return parse_symbol_unquoted string end -when 16 then - begin - return parse_string string end - end # action switch - end - end - if _trigger_goto - next - end - end - if _goto_level <= _again - if cs == 0 - _goto_level = _out - next - end - p += 1 - if p != pe - _goto_level = _resume - next - end - end - if _goto_level <= _test_eof - if p == eof - __acts = _yaml_scalar_scanner_eof_actions[cs] - __nacts = _yaml_scalar_scanner_actions[__acts] - __acts += 1 - while __nacts > 0 - __nacts -= 1 - __acts += 1 - case _yaml_scalar_scanner_actions[__acts - 1] -when 0 then - begin - return parse_null_value string end -when 1 then - begin - return parse_bool_true string end -when 2 then - begin - return parse_bool_false string end -when 3 then - begin - return parse_int_base_2 string end -when 4 then - begin - return parse_int_base_8 string end -when 5 then - begin - return parse_int_base_10 string end -when 6 then - begin - return parse_int_base_16 string end -when 7 then - begin - return parse_int_base_60 string end -when 8 then - begin - return parse_float_base_10 string end -when 9 then - begin - return parse_float_base_60 string end -when 10 then - begin - return parse_float_inf string end -when 11 then - begin - return parse_float_nan string end -when 12 then - begin - return parse_time_ymd string end -when 13 then - begin - return parse_time_full string end -when 16 then - begin - return parse_string string end - end # eof action switch - end - if _trigger_goto - next - end -end - end - if _goto_level <= _out - break - end - end - end - - # %% this just fixes our syntax highlighting... - # END OF SCANNER EXECUTION - ### + raw_tokenize string end def parse_time string @@ -525,30 +20,6 @@ def parse_time string private - # Instance methods to expose scanner methods defined at the class level - def _yaml_scalar_scanner_actions; self.class.send(:_yaml_scalar_scanner_actions); end - def _yaml_scalar_scanner_key_offsets; self.class.send(:_yaml_scalar_scanner_key_offsets); end - def _yaml_scalar_scanner_trans_keys; self.class.send(:_yaml_scalar_scanner_trans_keys); end - def _yaml_scalar_scanner_single_lengths; self.class.send(:_yaml_scalar_scanner_single_lengths); end - def _yaml_scalar_scanner_range_lengths; self.class.send(:_yaml_scalar_scanner_range_lengths); end - def _yaml_scalar_scanner_index_offsets; self.class.send(:_yaml_scalar_scanner_index_offsets); end - def _yaml_scalar_scanner_indicies; self.class.send(:_yaml_scalar_scanner_indicies); end - def _yaml_scalar_scanner_trans_targs; self.class.send(:_yaml_scalar_scanner_trans_targs); end - def _yaml_scalar_scanner_trans_actions; self.class.send(:_yaml_scalar_scanner_trans_actions); end - def _yaml_scalar_scanner_eof_actions; self.class.send(:_yaml_scalar_scanner_eof_actions); end - def yaml_scalar_scanner_start; self.class.yaml_scalar_scanner_start; end - def yaml_scalar_scanner_first_final; self.class.yaml_scalar_scanner_first_final; end - def yaml_scalar_scanner_error; self.class.yaml_scalar_scanner_error; end - def yaml_scalar_scanner_en_main; self.class.yaml_scalar_scanner_en_main; end - - def data_to_string data, ts, te - data[ts..te].pack("c*") - end - - def string_to_data string - string.unpack("c*") - end - def parse_null_value string nil end diff --git a/lib/psych/scalar_scanner.rl b/lib/psych/scalar_scanner.rl deleted file mode 100644 index fbf659cc..00000000 --- a/lib/psych/scalar_scanner.rl +++ /dev/null @@ -1,275 +0,0 @@ -# -# THIS FILE IS AUTOMATICALLY GENERATED. EDIT scalar_scanner.rl INSTEAD -# -# To compile: ragel -R -L scalar_scanner.rl -# To generate svg: ragel -R -V -p scalar_scanner.rl | dot -Tsvg -o scalar_scanner.svg -# - -=begin -%%{ - machine yaml_scalar_scanner; - - ### - # Actions - action on_null_value { return parse_null_value string } - action on_bool_true { return parse_bool_true string } - action on_bool_false { return parse_bool_false string } - action on_int_base_2 { return parse_int_base_2 string } - action on_int_base_8 { return parse_int_base_8 string } - action on_int_base_10 { return parse_int_base_10 string } - action on_int_base_16 { return parse_int_base_16 string } - action on_int_base_60 { return parse_int_base_60 string } - action on_float_base_10 { return parse_float_base_10 string } - action on_float_base_60 { return parse_float_base_60 string } - action on_float_inf { return parse_float_inf string } - action on_float_nan { return parse_float_nan string } - action on_time_ymd { return parse_time_ymd string } - action on_time_full { return parse_time_full string } - action on_symbol_quoted { return parse_symbol_quoted string } - action on_symbol_unquoted { return parse_symbol_unquoted string } - action on_string { return parse_string string } - - ### - # Null - http://yaml.org/type/null.html - null_value = ('~'|'null'|'Null'|'NULL') %on_null_value ; - - ### - # Boolean - http://yaml.org/type/bool.html - bool_true = ('yes'|'Yes'|'YES'|'true'|'True'|'TRUE'|'on'|'On'|'ON') %on_bool_true ; # NOTE: 'y'|'Y' removed from YAML spec - bool_false = ('no'|'No'|'NO'|'false'|'False'|'FALSE'|'off'|'Off'|'OFF') %on_bool_false ; # NOTE: 'n'|'N' removed from YAML spec - - bool = bool_true | bool_false ; - - ### - # Integer - http://yaml.org/type/int.html - int_base_2 = [\-+]?'0b'[0-1_]+ %on_int_base_2 ; - int_base_8 = [\-+]?'0'[0-7_]+ %on_int_base_8 ; - int_base_10 = [\-+]?('0'|[1-9][0-9_,]*) %on_int_base_10 ; # NOTE: comma not in YAML spec - int_base_16 = [\-+]?'0x'[0-9a-fA-F_,]+ %on_int_base_16 ; # NOTE: comma not in YAML spec - int_base_60 = [\-+]?[0-9][0-9_]*(':'[0-5]?[0-9])+ %on_int_base_60 ; # NOTE: YAML spec has leading digit as [1-9] - - int = int_base_2 | int_base_8 | int_base_10 | int_base_16 | int_base_60 ; - - ### - # Float - http://yaml.org/type/float.html - float_base_10 = [\-+]?([0-9][0-9_,]*)?'.'[0-9]*([eE][\-+][0-9]+)? %on_float_base_10 ; # NOTE: comma not in YAML spec; dot removed from [0-9.] after initial decimal point - float_base_60 = [\-+]?[0-9][0-9_]*(':'[0-5]?[0-9])+'.'[0-9_]* %on_float_base_60 ; - float_inf = [\-+]?'.'('inf'|'Inf'|'INF') %on_float_inf ; - float_nan = '.'('nan'|'NaN'|'NAN') %on_float_nan ; - - float = float_base_10 | float_base_60 | float_inf | float_nan ; - - ### - # Time - http://yaml.org/type/timestamp.html - time_ymd = [0-9][0-9][0-9][0-9]'-'('1'[012]|'0'[0-9]|[0-9])'-'([12][0-9]|'3'[01]|'0'[0-9]|[0-9]) %on_time_ymd ; - # NOTE: YAML spec originally [0-9][0-9][0-9][0-9]'-'[0-9][0-9]'-'[0-9][0-9] - # - Individual date portions were made more explicit - - time_full = [0-9][0-9][0-9][0-9]'-'[0-9][0-9]?'-'[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?':'[0-9][0-9]':'[0-9][0-9]('.'[0-9]*)?([ \t]*('Z'|[\-+][0-9][0-9]?(':'?[0-9][0-9])?))? %on_time_full ; - # NOTE: YAML spec originally [0-9][0-9][0-9][0-9]'-'[0-9][0-9]?'-'[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?':'[0-9][0-9]':'[0-9][0-9]('.'[0-9]*)?(([ \t]*)'Z'|sign[0-9][0-9]?(':'[0-9][0-9])?)? %on_time_full ; - # - Spacing before time zone was moved to not only be before Z, but to also - # be before the numeric time zone. - # - Colon in time zone made optional - - time = time_ymd | time_full; - - ### - # Symbol - symbol_quoted = ':'['"] @on_symbol_quoted ; - symbol_unquoted = ':'[^'"] @on_symbol_unquoted ; - - symbol = symbol_quoted | symbol_unquoted ; - - - main := (null_value | bool | int | float | time | symbol) $!on_string ; -}%% -=end - -module Psych - ### - # Scan scalars for built in types - class ScalarScanner - ### - # START OF SCANNER DATA STRUCTURES - %% write data; - # %% this just fixes our syntax highlighting... - # END OF SCANNER DATA STRUCTURES - ### - - def initialize - @string_cache = {} - @symbol_cache = {} - end - - def tokenize string - return nil if string.empty? - return string if @string_cache.key?(string) - return @symbol_cache[string] if @symbol_cache.key?(string) - - data = string_to_data(string) - eof = data.length - - ### - # START OF SCANNER INITIALIZATION - %% write init; - # %% this just fixes our syntax highlighting... - # END OF SCANNER INITIALIZATION - ### - - ### - # START OF SCANNER EXECUTION - %% write exec; - # %% this just fixes our syntax highlighting... - # END OF SCANNER EXECUTION - ### - end - - def parse_time string - parse_time_full string - end - - private - - # Instance methods to expose scanner methods defined at the class level - def _yaml_scalar_scanner_actions; self.class.send(:_yaml_scalar_scanner_actions); end - def _yaml_scalar_scanner_key_offsets; self.class.send(:_yaml_scalar_scanner_key_offsets); end - def _yaml_scalar_scanner_trans_keys; self.class.send(:_yaml_scalar_scanner_trans_keys); end - def _yaml_scalar_scanner_single_lengths; self.class.send(:_yaml_scalar_scanner_single_lengths); end - def _yaml_scalar_scanner_range_lengths; self.class.send(:_yaml_scalar_scanner_range_lengths); end - def _yaml_scalar_scanner_index_offsets; self.class.send(:_yaml_scalar_scanner_index_offsets); end - def _yaml_scalar_scanner_indicies; self.class.send(:_yaml_scalar_scanner_indicies); end - def _yaml_scalar_scanner_trans_targs; self.class.send(:_yaml_scalar_scanner_trans_targs); end - def _yaml_scalar_scanner_trans_actions; self.class.send(:_yaml_scalar_scanner_trans_actions); end - def _yaml_scalar_scanner_eof_actions; self.class.send(:_yaml_scalar_scanner_eof_actions); end - def yaml_scalar_scanner_start; self.class.yaml_scalar_scanner_start; end - def yaml_scalar_scanner_first_final; self.class.yaml_scalar_scanner_first_final; end - def yaml_scalar_scanner_error; self.class.yaml_scalar_scanner_error; end - def yaml_scalar_scanner_en_main; self.class.yaml_scalar_scanner_en_main; end - - def data_to_string data, ts, te - data[ts..te].pack("c*") - end - - def string_to_data string - string.unpack("c*") - end - - def parse_null_value string - nil - end - - def parse_bool_true string - true - end - - def parse_bool_false string - false - end - - def parse_int_base_10 string - Integer(string.gsub(/[,_]/, '')) - rescue - parse_string string - end - alias parse_int_base_2 parse_int_base_10 - alias parse_int_base_8 parse_int_base_10 - alias parse_int_base_16 parse_int_base_10 - - def parse_int_base_60 string - i = 0 - string.split(':').each_with_index do |n,e| - i += (n.to_i * 60 ** (e - 2).abs) - end - i - end - - def parse_float_base_10 string - Float(string.gsub(/[,_]|\.$/, '')) - rescue - parse_string string - end - - def parse_float_base_60 string - i = 0 - string.split(':').each_with_index do |n,e| - i += (n.to_f * 60 ** (e - 2).abs) - end - i - end - - def parse_float_inf string - if string[0] == '-' - -1 / 0.0 - else - 1 / 0.0 - end - end - - def parse_float_nan string - 0.0 / 0.0 - end - - def parse_time_ymd string - require 'date' - Date.strptime(string, '%Y-%m-%d') - rescue - parse_string string - end - - def parse_time_full string - date, time = *(string.split(/[ tT]/, 2)) - (yy, m, dd) = date.split('-').map { |x| x.to_i } - md = time.match(/(\d+:\d+:\d+)(?:\.(\d*))?\s*(Z|[-+]\d+(:\d\d)?)?/) - - (hh, mm, ss) = md[1].split(':').map { |x| x.to_i } - us = (md[2] ? Rational("0.#{md[2]}") : 0) * 1000000 - - time = Time.utc(yy, m, dd, hh, mm, ss, us) - - return time if 'Z' == md[3] - return Time.at(time.to_i, us) unless md[3] - - tz = md[3].match(/^([+\-]?\d{1,2})\:?(\d{1,2})?$/)[1..-1].compact.map { |digit| Integer(digit, 10) } - offset = tz.first * 3600 - - if offset < 0 - offset -= ((tz[1] || 0) * 60) - else - offset += ((tz[1] || 0) * 60) - end - - Time.at((time - offset).to_i, us) - rescue - parse_string string - end - - def parse_symbol_quoted string - last = string[-1] - return string unless last == '"' || last == "'" - @symbol_cache[string] = string[2..-2].to_sym - rescue - parse_string string - end - - def parse_symbol_unquoted string - @symbol_cache[string] = string[1..-1].to_sym - rescue - parse_string string - end - - def parse_string string - @string_cache[string] = true - string - end - end -end - -if $0 == __FILE__ - scanner = Psych::ScalarScanner.new - loop do - print "> " - string = gets.strip - break if string == "exit" - puts " => #{scanner.tokenize(string).inspect}" - end -end \ No newline at end of file