From 93b6f3c7b88a66121b3106f98b4a648f8219084d Mon Sep 17 00:00:00 2001 From: Jason Frey Date: Mon, 14 Jan 2013 15:44:41 -0500 Subject: [PATCH] Replaced ScalarScanner with a Ragel state machine (without caching). --- lib/psych/scalar_scanner.rb | 720 +++++++++++++++++++++++++++++++----- lib/psych/scalar_scanner.rl | 267 +++++++++++++ 2 files changed, 886 insertions(+), 101 deletions(-) create mode 100644 lib/psych/scalar_scanner.rl diff --git a/lib/psych/scalar_scanner.rb b/lib/psych/scalar_scanner.rb index 57594599..58c80b85 100644 --- a/lib/psych/scalar_scanner.rb +++ b/lib/psych/scalar_scanner.rb @@ -1,122 +1,610 @@ -require 'strscan' + +# +# THIS FILE IS AUTOMATICALLY GENERATED. EDIT scalar_scanner.rl INSTEAD +# +# To compile: ragel -R -L scalar_scanner.rl +# To generate svg: ragel -R -V -p scalar_scanner.rl | dot -Tsvg -o scalar_scanner.svg +# + +=begin + + +=end module Psych ### # Scan scalars for built in types class ScalarScanner - # Taken from http://yaml.org/type/timestamp.html - TIME = /^\d{4}-\d{1,2}-\d{1,2}([Tt]|\s+)\d{1,2}:\d\d:\d\d(\.\d*)?(\s*Z|[-+]\d{1,2}(:\d\d)?)?/ + ### + # START OF SCANNER DATA STRUCTURES + +class << self + attr_accessor :_yaml_scalar_scanner_actions + private :_yaml_scalar_scanner_actions, :_yaml_scalar_scanner_actions= +end +self._yaml_scalar_scanner_actions = [ + 0, 1, 0, 1, 1, 1, 2, 1, + 3, 1, 4, 1, 5, 1, 6, 1, + 7, 1, 8, 1, 9, 1, 10, 1, + 11, 1, 12, 1, 13, 1, 14, 1, + 15, 1, 16 +] - # Taken from http://yaml.org/type/float.html - FLOAT = /^(?:[-+]?([0-9][0-9_,]*)?\.[0-9]*([eE][-+][0-9]+)?(?# base 10) - |[-+]?[0-9][0-9_,]*(:[0-5]?[0-9])+\.[0-9_]*(?# base 60) - |[-+]?\.(inf|Inf|INF)(?# infinity) - |\.(nan|NaN|NAN)(?# not a number))$/x +class << self + attr_accessor :_yaml_scalar_scanner_key_offsets + private :_yaml_scalar_scanner_key_offsets, :_yaml_scalar_scanner_key_offsets= +end +self._yaml_scalar_scanner_key_offsets = [ + 0, 0, 18, 22, 24, 26, 28, 29, + 30, 31, 36, 42, 46, 49, 57, 59, + 60, 61, 62, 66, 69, 74, 78, 81, + 82, 84, 86, 87, 89, 91, 96, 98, + 100, 102, 104, 108, 109, 114, 115, 117, + 123, 126, 133, 139, 145, 147, 149, 150, + 151, 152, 153, 154, 155, 159, 160, 161, + 162, 163, 167, 168, 169, 171, 172, 173, + 174, 175, 177, 178, 179, 180, 182, 184, + 185, 186, 192, 196, 198, 198, 208, 216, + 220, 223, 225, 228, 236, 242, 247, 255, + 255, 265, 273, 281, 290, 296, 302, 305, + 308, 310, 310, 317, 321, 329, 335, 341, + 347, 353, 360, 360, 360, 360 +] - # Taken from http://yaml.org/type/int.html - INTEGER = /^(?:[-+]?0b[0-1_]+ (?# base 2) - |[-+]?0[0-7_]+ (?# base 8) - |[-+]?(?:0|[1-9][0-9_]*) (?# base 10) - |[-+]?0x[0-9a-fA-F_]+ (?# base 16))$/x +class << self + attr_accessor :_yaml_scalar_scanner_trans_keys + private :_yaml_scalar_scanner_trans_keys, :_yaml_scalar_scanner_trans_keys= +end +self._yaml_scalar_scanner_trans_keys = [ + 43, 45, 46, 48, 58, 70, 78, 79, + 84, 89, 102, 110, 111, 116, 121, 126, + 49, 57, 46, 48, 49, 57, 43, 45, + 48, 57, 78, 110, 70, 102, 110, 44, + 46, 95, 48, 57, 44, 46, 58, 95, + 48, 57, 48, 53, 54, 57, 95, 48, + 49, 44, 95, 48, 57, 65, 70, 97, + 102, 65, 97, 78, 97, 110, 48, 49, + 50, 57, 45, 48, 57, 51, 48, 50, + 52, 57, 9, 32, 48, 57, 58, 48, + 57, 58, 48, 57, 48, 57, 58, 48, + 57, 48, 57, 9, 32, 43, 45, 90, + 48, 57, 48, 57, 48, 57, 48, 57, + 9, 32, 84, 116, 45, 45, 48, 50, + 51, 57, 45, 48, 57, 9, 32, 84, + 116, 48, 57, 45, 48, 57, 44, 45, + 46, 58, 95, 48, 57, 44, 46, 58, + 95, 48, 57, 44, 46, 58, 95, 48, + 57, 34, 39, 65, 97, 76, 83, 69, + 108, 115, 101, 79, 85, 111, 117, 76, + 76, 108, 108, 70, 78, 102, 110, 70, + 102, 82, 114, 85, 69, 117, 101, 69, + 101, 83, 115, 97, 111, 117, 102, 110, + 114, 101, 69, 73, 101, 105, 48, 57, + 69, 101, 48, 57, 48, 57, 44, 46, + 58, 95, 98, 120, 48, 55, 56, 57, + 44, 46, 58, 95, 48, 55, 56, 57, + 46, 58, 48, 57, 95, 48, 57, 46, + 58, 95, 48, 49, 44, 95, 48, 57, + 65, 70, 97, 102, 44, 46, 58, 95, + 48, 57, 44, 46, 95, 48, 57, 69, + 73, 78, 101, 105, 110, 48, 57, 44, + 46, 58, 95, 98, 120, 48, 55, 56, + 57, 44, 46, 58, 95, 48, 55, 56, + 57, 44, 46, 58, 95, 48, 55, 56, + 57, 44, 45, 46, 58, 95, 48, 55, + 56, 57, 9, 32, 84, 116, 48, 57, + 9, 32, 43, 45, 46, 90, 58, 48, + 57, 58, 48, 57, 48, 57, 9, 32, + 43, 45, 90, 48, 57, 9, 32, 84, + 116, 9, 32, 84, 116, 48, 49, 50, + 57, 9, 32, 84, 116, 48, 57, 44, + 46, 58, 95, 48, 57, 44, 46, 58, + 95, 48, 57, 44, 46, 58, 95, 48, + 57, 44, 45, 46, 58, 95, 48, 57, + 0 +] - # Create a new scanner - def initialize - @string_cache = {} - @symbol_cache = {} - end +class << self + attr_accessor :_yaml_scalar_scanner_single_lengths + private :_yaml_scalar_scanner_single_lengths, :_yaml_scalar_scanner_single_lengths= +end +self._yaml_scalar_scanner_single_lengths = [ + 0, 16, 2, 2, 0, 2, 1, 1, + 1, 3, 4, 0, 1, 2, 2, 1, + 1, 1, 2, 1, 1, 2, 1, 1, + 0, 0, 1, 0, 0, 5, 0, 0, + 0, 0, 4, 1, 1, 1, 0, 4, + 1, 5, 4, 4, 2, 2, 1, 1, + 1, 1, 1, 1, 4, 1, 1, 1, + 1, 4, 1, 1, 2, 1, 1, 1, + 1, 2, 1, 1, 1, 2, 2, 1, + 1, 4, 2, 0, 0, 6, 4, 2, + 1, 2, 1, 2, 4, 3, 6, 0, + 6, 4, 4, 5, 4, 6, 1, 1, + 0, 0, 5, 4, 4, 4, 4, 4, + 4, 5, 0, 0, 0, 0 +] + +class << self + attr_accessor :_yaml_scalar_scanner_range_lengths + private :_yaml_scalar_scanner_range_lengths, :_yaml_scalar_scanner_range_lengths= +end +self._yaml_scalar_scanner_range_lengths = [ + 0, 1, 1, 0, 1, 0, 0, 0, + 0, 1, 1, 2, 1, 3, 0, 0, + 0, 0, 1, 1, 2, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, + 1, 1, 0, 0, 2, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 0, 2, 2, 1, + 1, 0, 1, 3, 1, 1, 1, 0, + 2, 2, 2, 2, 1, 0, 1, 1, + 1, 0, 1, 0, 2, 1, 1, 1, + 1, 1, 0, 0, 0, 0 +] + +class << self + attr_accessor :_yaml_scalar_scanner_index_offsets + private :_yaml_scalar_scanner_index_offsets, :_yaml_scalar_scanner_index_offsets= +end +self._yaml_scalar_scanner_index_offsets = [ + 0, 0, 18, 22, 25, 27, 30, 32, + 34, 36, 41, 47, 50, 53, 59, 62, + 64, 66, 68, 72, 75, 79, 83, 86, + 88, 90, 92, 94, 96, 98, 104, 106, + 108, 110, 112, 117, 119, 123, 125, 127, + 133, 136, 143, 149, 155, 158, 161, 163, + 165, 167, 169, 171, 173, 178, 180, 182, + 184, 186, 191, 193, 195, 198, 200, 202, + 204, 206, 209, 211, 213, 215, 218, 221, + 223, 225, 231, 235, 237, 238, 247, 254, + 258, 261, 264, 267, 273, 279, 284, 292, + 293, 302, 309, 316, 324, 330, 337, 340, + 343, 345, 346, 353, 358, 365, 371, 377, + 383, 389, 396, 397, 398, 399 +] + +class << self + attr_accessor :_yaml_scalar_scanner_indicies + private :_yaml_scalar_scanner_indicies, :_yaml_scalar_scanner_indicies= +end +self._yaml_scalar_scanner_indicies = [ + 1, 1, 2, 3, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 17, 18, 19, 0, 20, 20, + 0, 21, 0, 22, 23, 0, 24, 0, + 24, 0, 23, 0, 25, 26, 25, 25, + 0, 25, 26, 28, 27, 27, 0, 29, + 30, 0, 31, 31, 0, 32, 32, 32, + 32, 32, 0, 33, 33, 0, 34, 0, + 35, 0, 34, 0, 36, 37, 38, 0, + 39, 40, 0, 42, 41, 43, 0, 44, + 44, 45, 0, 47, 46, 0, 47, 0, + 48, 0, 49, 0, 50, 0, 51, 0, + 52, 0, 53, 53, 54, 54, 55, 0, + 56, 0, 57, 0, 55, 0, 45, 0, + 44, 44, 58, 58, 0, 39, 0, 39, + 40, 59, 0, 60, 0, 61, 0, 44, + 44, 58, 58, 62, 0, 39, 59, 0, + 25, 63, 26, 28, 27, 27, 0, 25, + 26, 28, 27, 64, 0, 25, 26, 28, + 27, 65, 0, 67, 67, 66, 68, 69, + 0, 70, 0, 71, 0, 72, 0, 73, + 0, 74, 0, 72, 0, 72, 75, 72, + 76, 0, 77, 0, 16, 0, 78, 0, + 16, 0, 79, 80, 81, 80, 0, 72, + 0, 72, 0, 82, 83, 0, 84, 0, + 80, 0, 85, 0, 80, 0, 86, 87, + 0, 80, 0, 80, 0, 69, 0, 72, + 76, 0, 81, 80, 0, 83, 0, 87, + 0, 88, 89, 88, 90, 26, 0, 88, + 88, 26, 0, 21, 0, 0, 25, 26, + 28, 91, 92, 93, 91, 27, 0, 25, + 26, 28, 91, 91, 27, 0, 94, 28, + 30, 0, 94, 94, 0, 94, 28, 0, + 31, 31, 0, 32, 32, 32, 32, 32, + 0, 95, 26, 28, 19, 19, 0, 95, + 26, 95, 95, 0, 88, 89, 96, 88, + 90, 97, 26, 0, 0, 25, 26, 28, + 91, 92, 93, 98, 99, 0, 25, 26, + 28, 91, 100, 65, 0, 25, 26, 28, + 91, 101, 64, 0, 25, 63, 26, 28, + 91, 91, 27, 0, 44, 44, 58, 58, + 102, 0, 53, 53, 54, 54, 103, 55, + 0, 105, 104, 0, 105, 106, 0, 55, + 0, 0, 53, 53, 54, 54, 55, 103, + 0, 44, 44, 58, 58, 0, 44, 44, + 58, 58, 102, 62, 0, 44, 44, 58, + 58, 62, 0, 95, 26, 28, 19, 107, + 0, 95, 26, 28, 19, 108, 0, 95, + 26, 28, 19, 109, 0, 95, 63, 26, + 28, 19, 19, 0, 0, 0, 0, 0, + 0 +] + +class << self + attr_accessor :_yaml_scalar_scanner_trans_targs + private :_yaml_scalar_scanner_trans_targs, :_yaml_scalar_scanner_trans_targs= +end +self._yaml_scalar_scanner_trans_targs = [ + 0, 2, 86, 88, 102, 44, 45, 52, + 57, 60, 65, 68, 69, 70, 71, 72, + 108, 73, 77, 84, 4, 75, 6, 7, + 76, 9, 74, 10, 11, 79, 81, 82, + 83, 15, 87, 17, 19, 36, 40, 20, + 35, 92, 100, 101, 21, 22, 23, 24, + 25, 26, 27, 28, 93, 29, 30, 97, + 94, 32, 33, 37, 38, 39, 34, 18, + 41, 42, 106, 106, 46, 49, 47, 48, + 107, 50, 51, 53, 55, 54, 56, 58, + 109, 59, 61, 63, 62, 64, 66, 67, + 3, 5, 8, 78, 12, 13, 80, 85, + 14, 16, 89, 43, 90, 91, 99, 98, + 95, 31, 96, 103, 104, 105 +] + +class << self + attr_accessor :_yaml_scalar_scanner_trans_actions + private :_yaml_scalar_scanner_trans_actions, :_yaml_scalar_scanner_trans_actions= +end +self._yaml_scalar_scanner_trans_actions = [ + 33, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 31, 29, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +] + +class << self + attr_accessor :_yaml_scalar_scanner_eof_actions + private :_yaml_scalar_scanner_eof_actions, :_yaml_scalar_scanner_eof_actions= +end +self._yaml_scalar_scanner_eof_actions = [ + 0, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, + 33, 17, 17, 17, 21, 11, 9, 15, + 19, 15, 7, 13, 11, 11, 17, 23, + 11, 9, 9, 9, 25, 27, 27, 27, + 27, 27, 27, 25, 25, 25, 11, 11, + 11, 11, 0, 5, 1, 3 +] + +class << self + attr_accessor :yaml_scalar_scanner_start +end +self.yaml_scalar_scanner_start = 1; +class << self + attr_accessor :yaml_scalar_scanner_first_final +end +self.yaml_scalar_scanner_first_final = 73; +class << self + attr_accessor :yaml_scalar_scanner_error +end +self.yaml_scalar_scanner_error = 0; + +class << self + attr_accessor :yaml_scalar_scanner_en_main +end +self.yaml_scalar_scanner_en_main = 1; + + + # %% this just fixes our syntax highlighting... + # END OF SCANNER DATA STRUCTURES + ### - # Tokenize +string+ returning the ruby object def tokenize string return nil if string.empty? - return string if @string_cache.key?(string) - return @symbol_cache[string] if @symbol_cache.key?(string) - - case string - # Check for a String type, being careful not to get caught by hash keys, hex values, and - # special floats (e.g., -.inf). - when /^[^\d\.:-]?[A-Za-z_\s!@#\$%\^&\*\(\)\{\}\<\>\|\/\\~;=]+/ - if string.length > 5 - @string_cache[string] = true - return string - end - - case string - when /^[^ytonf~]/i - @string_cache[string] = true - string - when '~', /^null$/i - nil - when /^(yes|true|on)$/i - true - when /^(no|false|off)$/i - false - else - @string_cache[string] = true - string - end - when TIME - begin - parse_time string - rescue ArgumentError - string - end - when /^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/ - require 'date' - begin - Date.strptime(string, '%Y-%m-%d') - rescue ArgumentError - string - end - when /^\.inf$/i - 1 / 0.0 - when /^-\.inf$/i + + data = string_to_data(string) + eof = data.length + + ### + # START OF SCANNER INITIALIZATION + +begin + p ||= 0 + pe ||= data.length + cs = yaml_scalar_scanner_start +end + + # %% this just fixes our syntax highlighting... + # END OF SCANNER INITIALIZATION + ### + + ### + # START OF SCANNER EXECUTION + +begin + _klen, _trans, _keys, _acts, _nacts = nil + _goto_level = 0 + _resume = 10 + _eof_trans = 15 + _again = 20 + _test_eof = 30 + _out = 40 + while true + _trigger_goto = false + if _goto_level <= 0 + if p == pe + _goto_level = _test_eof + next + end + if cs == 0 + _goto_level = _out + next + end + end + if _goto_level <= _resume + _keys = _yaml_scalar_scanner_key_offsets[cs] + _trans = _yaml_scalar_scanner_index_offsets[cs] + _klen = _yaml_scalar_scanner_single_lengths[cs] + _break_match = false + + begin + if _klen > 0 + _lower = _keys + _upper = _keys + _klen - 1 + + loop do + break if _upper < _lower + _mid = _lower + ( (_upper - _lower) >> 1 ) + + if data[p].ord < _yaml_scalar_scanner_trans_keys[_mid] + _upper = _mid - 1 + elsif data[p].ord > _yaml_scalar_scanner_trans_keys[_mid] + _lower = _mid + 1 + else + _trans += (_mid - _keys) + _break_match = true + break + end + end # loop + break if _break_match + _keys += _klen + _trans += _klen + end + _klen = _yaml_scalar_scanner_range_lengths[cs] + if _klen > 0 + _lower = _keys + _upper = _keys + (_klen << 1) - 2 + loop do + break if _upper < _lower + _mid = _lower + (((_upper-_lower) >> 1) & ~1) + if data[p].ord < _yaml_scalar_scanner_trans_keys[_mid] + _upper = _mid - 2 + elsif data[p].ord > _yaml_scalar_scanner_trans_keys[_mid+1] + _lower = _mid + 2 + else + _trans += ((_mid - _keys) >> 1) + _break_match = true + break + end + end # loop + break if _break_match + _trans += _klen + end + end while false + _trans = _yaml_scalar_scanner_indicies[_trans] + cs = _yaml_scalar_scanner_trans_targs[_trans] + if _yaml_scalar_scanner_trans_actions[_trans] != 0 + _acts = _yaml_scalar_scanner_trans_actions[_trans] + _nacts = _yaml_scalar_scanner_actions[_acts] + _acts += 1 + while _nacts > 0 + _nacts -= 1 + _acts += 1 + case _yaml_scalar_scanner_actions[_acts - 1] +when 14 then + begin + return parse_symbol_quoted string end +when 15 then + begin + return parse_symbol_unquoted string end +when 16 then + begin + return parse_string string end + end # action switch + end + end + if _trigger_goto + next + end + end + if _goto_level <= _again + if cs == 0 + _goto_level = _out + next + end + p += 1 + if p != pe + _goto_level = _resume + next + end + end + if _goto_level <= _test_eof + if p == eof + __acts = _yaml_scalar_scanner_eof_actions[cs] + __nacts = _yaml_scalar_scanner_actions[__acts] + __acts += 1 + while __nacts > 0 + __nacts -= 1 + __acts += 1 + case _yaml_scalar_scanner_actions[__acts - 1] +when 0 then + begin + return parse_null_value string end +when 1 then + begin + return parse_bool_true string end +when 2 then + begin + return parse_bool_false string end +when 3 then + begin + return parse_int_base_2 string end +when 4 then + begin + return parse_int_base_8 string end +when 5 then + begin + return parse_int_base_10 string end +when 6 then + begin + return parse_int_base_16 string end +when 7 then + begin + return parse_int_base_60 string end +when 8 then + begin + return parse_float_base_10 string end +when 9 then + begin + return parse_float_base_60 string end +when 10 then + begin + return parse_float_inf string end +when 11 then + begin + return parse_float_nan string end +when 12 then + begin + return parse_time_ymd string end +when 13 then + begin + return parse_time_full string end +when 16 then + begin + return parse_string string end + end # eof action switch + end + if _trigger_goto + next + end +end + end + if _goto_level <= _out + break + end + end + end + + # %% this just fixes our syntax highlighting... + # END OF SCANNER EXECUTION + ### + end + + def parse_time string + parse_time_full string + end + + private + + # Instance methods to expose scanner methods defined at the class level + def _yaml_scalar_scanner_actions; self.class.send(:_yaml_scalar_scanner_actions); end + def _yaml_scalar_scanner_key_offsets; self.class.send(:_yaml_scalar_scanner_key_offsets); end + def _yaml_scalar_scanner_trans_keys; self.class.send(:_yaml_scalar_scanner_trans_keys); end + def _yaml_scalar_scanner_single_lengths; self.class.send(:_yaml_scalar_scanner_single_lengths); end + def _yaml_scalar_scanner_range_lengths; self.class.send(:_yaml_scalar_scanner_range_lengths); end + def _yaml_scalar_scanner_index_offsets; self.class.send(:_yaml_scalar_scanner_index_offsets); end + def _yaml_scalar_scanner_indicies; self.class.send(:_yaml_scalar_scanner_indicies); end + def _yaml_scalar_scanner_trans_targs; self.class.send(:_yaml_scalar_scanner_trans_targs); end + def _yaml_scalar_scanner_trans_actions; self.class.send(:_yaml_scalar_scanner_trans_actions); end + def _yaml_scalar_scanner_eof_actions; self.class.send(:_yaml_scalar_scanner_eof_actions); end + def yaml_scalar_scanner_start; self.class.yaml_scalar_scanner_start; end + def yaml_scalar_scanner_first_final; self.class.yaml_scalar_scanner_first_final; end + def yaml_scalar_scanner_error; self.class.yaml_scalar_scanner_error; end + def yaml_scalar_scanner_en_main; self.class.yaml_scalar_scanner_en_main; end + + def data_to_string data, ts, te + data[ts..te].pack("c*") + end + + def string_to_data string + string.unpack("c*") + end + + def parse_null_value string + nil + end + + def parse_bool_true string + true + end + + def parse_bool_false string + false + end + + def parse_int_base_10 string + Integer(string.gsub(/[,_]/, '')) + rescue + string + end + alias parse_int_base_2 parse_int_base_10 + alias parse_int_base_8 parse_int_base_10 + alias parse_int_base_16 parse_int_base_10 + + def parse_int_base_60 string + i = 0 + string.split(':').each_with_index do |n,e| + i += (n.to_i * 60 ** (e - 2).abs) + end + i + end + + def parse_float_base_10 string + Float(string.gsub(/[,_]|\.$/, '')) + rescue + string + end + + def parse_float_base_60 string + i = 0 + string.split(':').each_with_index do |n,e| + i += (n.to_f * 60 ** (e - 2).abs) + end + i + end + + def parse_float_inf string + if string[0] == '-' -1 / 0.0 - when /^\.nan$/i - 0.0 / 0.0 - when /^:./ - if string =~ /^:(["'])(.*)\1/ - @symbol_cache[string] = $2.sub(/^:/, '').to_sym - else - @symbol_cache[string] = string.sub(/^:/, '').to_sym - end - when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+$/ - i = 0 - string.split(':').each_with_index do |n,e| - i += (n.to_i * 60 ** (e - 2).abs) - end - i - when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+\.[0-9_]*$/ - i = 0 - string.split(':').each_with_index do |n,e| - i += (n.to_f * 60 ** (e - 2).abs) - end - i - when FLOAT - if string == '.' - @string_cache[string] = true - string - else - Float(string.gsub(/[,_]|\.$/, '')) - end else - int = parse_int string.gsub(/[,_]/, '') - return int if int - - @string_cache[string] = true - string + 1 / 0.0 end end - ### - # Parse and return an int from +string+ - def parse_int string - return unless INTEGER === string - Integer(string) + def parse_float_nan string + 0.0 / 0.0 end - ### - # Parse and return a Time from +string+ - def parse_time string + def parse_time_ymd string + require 'date' + Date.strptime(string, '%Y-%m-%d') + rescue + string + end + + def parse_time_full string date, time = *(string.split(/[ tT]/, 2)) (yy, m, dd) = date.split('-').map { |x| x.to_i } md = time.match(/(\d+:\d+:\d+)(?:\.(\d*))?\s*(Z|[-+]\d+(:\d\d)?)?/) @@ -139,6 +627,36 @@ def parse_time string end Time.at((time - offset).to_i, us) + rescue + string + end + + def parse_symbol_quoted string + last = string[-1] + return string unless last == '"' || last == "'" + string[2..-2].to_sym + rescue + string + end + + def parse_symbol_unquoted string + string[1..-1].to_sym + rescue + string + end + + def parse_string string + string end end end + +if $0 == __FILE__ + scanner = Psych::ScalarScanner.new + loop do + print "> " + string = gets.strip + break if string == "exit" + puts " => #{scanner.tokenize(string).inspect}" + end +end \ No newline at end of file diff --git a/lib/psych/scalar_scanner.rl b/lib/psych/scalar_scanner.rl new file mode 100644 index 00000000..8931b1c2 --- /dev/null +++ b/lib/psych/scalar_scanner.rl @@ -0,0 +1,267 @@ +# +# THIS FILE IS AUTOMATICALLY GENERATED. EDIT scalar_scanner.rl INSTEAD +# +# To compile: ragel -R -L scalar_scanner.rl +# To generate svg: ragel -R -V -p scalar_scanner.rl | dot -Tsvg -o scalar_scanner.svg +# + +=begin +%%{ + machine yaml_scalar_scanner; + + ### + # Actions + action on_null_value { return parse_null_value string } + action on_bool_true { return parse_bool_true string } + action on_bool_false { return parse_bool_false string } + action on_int_base_2 { return parse_int_base_2 string } + action on_int_base_8 { return parse_int_base_8 string } + action on_int_base_10 { return parse_int_base_10 string } + action on_int_base_16 { return parse_int_base_16 string } + action on_int_base_60 { return parse_int_base_60 string } + action on_float_base_10 { return parse_float_base_10 string } + action on_float_base_60 { return parse_float_base_60 string } + action on_float_inf { return parse_float_inf string } + action on_float_nan { return parse_float_nan string } + action on_time_ymd { return parse_time_ymd string } + action on_time_full { return parse_time_full string } + action on_symbol_quoted { return parse_symbol_quoted string } + action on_symbol_unquoted { return parse_symbol_unquoted string } + action on_string { return parse_string string } + + ### + # Null - http://yaml.org/type/null.html + null_value = ('~'|'null'|'Null'|'NULL') %on_null_value ; + + ### + # Boolean - http://yaml.org/type/bool.html + bool_true = ('yes'|'Yes'|'YES'|'true'|'True'|'TRUE'|'on'|'On'|'ON') %on_bool_true ; # NOTE: 'y'|'Y' removed from YAML spec + bool_false = ('no'|'No'|'NO'|'false'|'False'|'FALSE'|'off'|'Off'|'OFF') %on_bool_false ; # NOTE: 'n'|'N' removed from YAML spec + + bool = bool_true | bool_false ; + + ### + # Integer - http://yaml.org/type/int.html + int_base_2 = [\-+]?'0b'[0-1_]+ %on_int_base_2 ; + int_base_8 = [\-+]?'0'[0-7_]+ %on_int_base_8 ; + int_base_10 = [\-+]?('0'|[1-9][0-9_,]*) %on_int_base_10 ; # NOTE: comma not in YAML spec + int_base_16 = [\-+]?'0x'[0-9a-fA-F_,]+ %on_int_base_16 ; # NOTE: comma not in YAML spec + int_base_60 = [\-+]?[0-9][0-9_]*(':'[0-5]?[0-9])+ %on_int_base_60 ; # NOTE: YAML spec has leading digit as [1-9] + + int = int_base_2 | int_base_8 | int_base_10 | int_base_16 | int_base_60 ; + + ### + # Float - http://yaml.org/type/float.html + float_base_10 = [\-+]?([0-9][0-9_,]*)?'.'[0-9]*([eE][\-+][0-9]+)? %on_float_base_10 ; # NOTE: comma not in YAML spec; dot removed from [0-9.] after initial decimal point + float_base_60 = [\-+]?[0-9][0-9_]*(':'[0-5]?[0-9])+'.'[0-9_]* %on_float_base_60 ; + float_inf = [\-+]?'.'('inf'|'Inf'|'INF') %on_float_inf ; + float_nan = '.'('nan'|'NaN'|'NAN') %on_float_nan ; + + float = float_base_10 | float_base_60 | float_inf | float_nan ; + + ### + # Time - http://yaml.org/type/timestamp.html + time_ymd = [0-9][0-9][0-9][0-9]'-'('1'[012]|'0'[0-9]|[0-9])'-'([12][0-9]|'3'[01]|'0'[0-9]|[0-9]) %on_time_ymd ; + # NOTE: YAML spec originally [0-9][0-9][0-9][0-9]'-'[0-9][0-9]'-'[0-9][0-9] + # - Individual date portions were made more explicit + + time_full = [0-9][0-9][0-9][0-9]'-'[0-9][0-9]?'-'[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?':'[0-9][0-9]':'[0-9][0-9]('.'[0-9]*)?([ \t]*('Z'|[\-+][0-9][0-9]?(':'?[0-9][0-9])?))? %on_time_full ; + # NOTE: YAML spec originally [0-9][0-9][0-9][0-9]'-'[0-9][0-9]?'-'[0-9][0-9]?([Tt]|[ \t]+)[0-9][0-9]?':'[0-9][0-9]':'[0-9][0-9]('.'[0-9]*)?(([ \t]*)'Z'|sign[0-9][0-9]?(':'[0-9][0-9])?)? %on_time_full ; + # - Spacing before time zone was moved to not only be before Z, but to also + # be before the numeric time zone. + # - Colon in time zone made optional + + time = time_ymd | time_full; + + ### + # Symbol + symbol_quoted = ':'['"] @on_symbol_quoted ; + symbol_unquoted = ':'[^'"] @on_symbol_unquoted ; + + symbol = symbol_quoted | symbol_unquoted ; + + + main := (null_value | bool | int | float | time | symbol) $!on_string ; +}%% +=end + +module Psych + ### + # Scan scalars for built in types + class ScalarScanner + ### + # START OF SCANNER DATA STRUCTURES + %% write data; + # %% this just fixes our syntax highlighting... + # END OF SCANNER DATA STRUCTURES + ### + + def tokenize string + return nil if string.empty? + + data = string_to_data(string) + eof = data.length + + ### + # START OF SCANNER INITIALIZATION + %% write init; + # %% this just fixes our syntax highlighting... + # END OF SCANNER INITIALIZATION + ### + + ### + # START OF SCANNER EXECUTION + %% write exec; + # %% this just fixes our syntax highlighting... + # END OF SCANNER EXECUTION + ### + end + + def parse_time string + parse_time_full string + end + + private + + # Instance methods to expose scanner methods defined at the class level + def _yaml_scalar_scanner_actions; self.class.send(:_yaml_scalar_scanner_actions); end + def _yaml_scalar_scanner_key_offsets; self.class.send(:_yaml_scalar_scanner_key_offsets); end + def _yaml_scalar_scanner_trans_keys; self.class.send(:_yaml_scalar_scanner_trans_keys); end + def _yaml_scalar_scanner_single_lengths; self.class.send(:_yaml_scalar_scanner_single_lengths); end + def _yaml_scalar_scanner_range_lengths; self.class.send(:_yaml_scalar_scanner_range_lengths); end + def _yaml_scalar_scanner_index_offsets; self.class.send(:_yaml_scalar_scanner_index_offsets); end + def _yaml_scalar_scanner_indicies; self.class.send(:_yaml_scalar_scanner_indicies); end + def _yaml_scalar_scanner_trans_targs; self.class.send(:_yaml_scalar_scanner_trans_targs); end + def _yaml_scalar_scanner_trans_actions; self.class.send(:_yaml_scalar_scanner_trans_actions); end + def _yaml_scalar_scanner_eof_actions; self.class.send(:_yaml_scalar_scanner_eof_actions); end + def yaml_scalar_scanner_start; self.class.yaml_scalar_scanner_start; end + def yaml_scalar_scanner_first_final; self.class.yaml_scalar_scanner_first_final; end + def yaml_scalar_scanner_error; self.class.yaml_scalar_scanner_error; end + def yaml_scalar_scanner_en_main; self.class.yaml_scalar_scanner_en_main; end + + def data_to_string data, ts, te + data[ts..te].pack("c*") + end + + def string_to_data string + string.unpack("c*") + end + + def parse_null_value string + nil + end + + def parse_bool_true string + true + end + + def parse_bool_false string + false + end + + def parse_int_base_10 string + Integer(string.gsub(/[,_]/, '')) + rescue + string + end + alias parse_int_base_2 parse_int_base_10 + alias parse_int_base_8 parse_int_base_10 + alias parse_int_base_16 parse_int_base_10 + + def parse_int_base_60 string + i = 0 + string.split(':').each_with_index do |n,e| + i += (n.to_i * 60 ** (e - 2).abs) + end + i + end + + def parse_float_base_10 string + Float(string.gsub(/[,_]|\.$/, '')) + rescue + string + end + + def parse_float_base_60 string + i = 0 + string.split(':').each_with_index do |n,e| + i += (n.to_f * 60 ** (e - 2).abs) + end + i + end + + def parse_float_inf string + if string[0] == '-' + -1 / 0.0 + else + 1 / 0.0 + end + end + + def parse_float_nan string + 0.0 / 0.0 + end + + def parse_time_ymd string + require 'date' + Date.strptime(string, '%Y-%m-%d') + rescue + string + end + + def parse_time_full string + date, time = *(string.split(/[ tT]/, 2)) + (yy, m, dd) = date.split('-').map { |x| x.to_i } + md = time.match(/(\d+:\d+:\d+)(?:\.(\d*))?\s*(Z|[-+]\d+(:\d\d)?)?/) + + (hh, mm, ss) = md[1].split(':').map { |x| x.to_i } + us = (md[2] ? Rational("0.#{md[2]}") : 0) * 1000000 + + time = Time.utc(yy, m, dd, hh, mm, ss, us) + + return time if 'Z' == md[3] + return Time.at(time.to_i, us) unless md[3] + + tz = md[3].match(/^([+\-]?\d{1,2})\:?(\d{1,2})?$/)[1..-1].compact.map { |digit| Integer(digit, 10) } + offset = tz.first * 3600 + + if offset < 0 + offset -= ((tz[1] || 0) * 60) + else + offset += ((tz[1] || 0) * 60) + end + + Time.at((time - offset).to_i, us) + rescue + string + end + + def parse_symbol_quoted string + last = string[-1] + return string unless last == '"' || last == "'" + string[2..-2].to_sym + rescue + string + end + + def parse_symbol_unquoted string + string[1..-1].to_sym + rescue + string + end + + def parse_string string + string + end + end +end + +if $0 == __FILE__ + scanner = Psych::ScalarScanner.new + loop do + print "> " + string = gets.strip + break if string == "exit" + puts " => #{scanner.tokenize(string).inspect}" + end +end \ No newline at end of file