Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update and fix OkJson #71

Merged
merged 2 commits into from May 17, 2012
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
124 changes: 57 additions & 67 deletions lib/queue_classic/okjson.rb
@@ -1,4 +1,3 @@
module QC
# encoding: UTF-8 # encoding: UTF-8
# #
# Copyright 2011, 2012 Keith Rarick # Copyright 2011, 2012 Keith Rarick
Expand Down Expand Up @@ -28,6 +27,7 @@ module QC
# Some parts adapted from # Some parts adapted from
# http://golang.org/src/pkg/json/decode.go and # http://golang.org/src/pkg/json/decode.go and
# http://golang.org/src/pkg/utf8/utf8.go # http://golang.org/src/pkg/utf8/utf8.go
module QC
module OkJson module OkJson
extend self extend self


Expand Down Expand Up @@ -220,9 +220,9 @@ def tok(s)
end end




def nulltok(s); s[0,4] == 'null' && [:val, 'null', nil] end def nulltok(s); s[0,4] == 'null' ? [:val, 'null', nil] : [] end
def truetok(s); s[0,4] == 'true' && [:val, 'true', true] end def truetok(s); s[0,4] == 'true' ? [:val, 'true', true] : [] end
def falsetok(s); s[0,5] == 'false' && [:val, 'false', false] end def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end




def numtok(s) def numtok(s)
Expand All @@ -235,6 +235,8 @@ def numtok(s)
else else
[:val, m[0], Integer(m[0])] [:val, m[0], Integer(m[0])]
end end
else
[]
end end
end end


Expand Down Expand Up @@ -375,15 +377,6 @@ def subst(u1, u2)
end end




def unsubst(u)
if u < Usurrself || u > Umax || surrogate?(u)
return Ucharerr, Ucharerr
end
u -= Usurrself
[Usurr1 + ((u>>10)&0x3ff), Usurr2 + (u&0x3ff)]
end


def surrogate?(u) def surrogate?(u)
Usurr1 <= u && u < Usurr3 Usurr1 <= u && u < Usurr3
end end
Expand Down Expand Up @@ -473,15 +466,18 @@ def strenc(s)
else else
c = s[r] c = s[r]
case true case true
when rubydoesenc
begin
c.ord # will raise an error if c is invalid UTF-8
t.write(c)
rescue
t.write(Ustrerr)
end
when Spc <= c && c <= ?~ when Spc <= c && c <= ?~
t.putc(c) t.putc(c)
when rubydoesenc
u = c.ord
surrenc(t, u)
else else
u, size = uchardec(s, r) n = ucharcopy(t, s, r) # ensure valid UTF-8 output
r += size - 1 # we add one more at the bottom of the loop r += n - 1 # r is incremented below
surrenc(t, u)
end end
end end
r += 1 r += 1
Expand All @@ -491,28 +487,6 @@ def strenc(s)
end end




def surrenc(t, u)
if u < 0x10000
t.print('\\u')
hexenc4(t, u)
else
u1, u2 = unsubst(u)
t.print('\\u')
hexenc4(t, u1)
t.print('\\u')
hexenc4(t, u2)
end
end


def hexenc4(t, u)
t.putc(Hex[(u>>12)&0xf])
t.putc(Hex[(u>>8)&0xf])
t.putc(Hex[(u>>4)&0xf])
t.putc(Hex[u&0xf])
end


def numenc(x) def numenc(x)
if ((x.nan? || x.infinite?) rescue false) if ((x.nan? || x.infinite?) rescue false)
raise Error, "Numeric cannot be represented: #{x}" raise Error, "Numeric cannot be represented: #{x}"
Expand All @@ -521,60 +495,77 @@ def numenc(x)
end end




# Decodes unicode character u from UTF-8 # Copies the valid UTF-8 bytes of a single character
# bytes in string s at position i. # from string s at position i to I/O object t, and
# Returns u and the number of bytes read. # returns the number of bytes copied.
def uchardec(s, i) # If no valid UTF-8 char exists at position i,
# ucharcopy writes Ustrerr and returns 1.
def ucharcopy(t, s, i)
n = s.length - i n = s.length - i
return [Ucharerr, 1] if n < 1 raise Utf8Error if n < 1


c0 = s[i].ord c0 = s[i].ord


# 1-byte, 7-bit sequence? # 1-byte, 7-bit sequence?
if c0 < Utagx if c0 < Utagx
return [c0, 1] t.putc(c0)
return 1
end end


# unexpected continuation byte? raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
return [Ucharerr, 1] if c0 < Utag2


# need continuation byte raise Utf8Error if n < 2 # need continuation byte
return [Ucharerr, 1] if n < 2
c1 = s[i+1].ord c1 = s[i+1].ord
return [Ucharerr, 1] if c1 < Utagx || Utag2 <= c1 raise Utf8Error if c1 < Utagx || Utag2 <= c1


# 2-byte, 11-bit sequence? # 2-byte, 11-bit sequence?
if c0 < Utag3 if c0 < Utag3
u = (c0&Umask2)<<6 | (c1&Umaskx) raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max
return [Ucharerr, 1] if u <= Uchar1max t.putc(c0)
return [u, 2] t.putc(c1)
return 2
end end


# need second continuation byte # need second continuation byte
return [Ucharerr, 1] if n < 3 raise Utf8Error if n < 3

c2 = s[i+2].ord c2 = s[i+2].ord
return [Ucharerr, 1] if c2 < Utagx || Utag2 <= c2 raise Utf8Error if c2 < Utagx || Utag2 <= c2


# 3-byte, 16-bit sequence? # 3-byte, 16-bit sequence?
if c0 < Utag4 if c0 < Utag4
u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx) u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
return [Ucharerr, 1] if u <= Uchar2max raise Utf8Error if u <= Uchar2max
return [u, 3] t.putc(c0)
t.putc(c1)
t.putc(c2)
return 3
end end


# need third continuation byte # need third continuation byte
return [Ucharerr, 1] if n < 4 raise Utf8Error if n < 4
c3 = s[i+3].ord c3 = s[i+3].ord
return [Ucharerr, 1] if c3 < Utagx || Utag2 <= c3 raise Utf8Error if c3 < Utagx || Utag2 <= c3


# 4-byte, 21-bit sequence? # 4-byte, 21-bit sequence?
if c0 < Utag5 if c0 < Utag5
u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx) u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
return [Ucharerr, 1] if u <= Uchar3max raise Utf8Error if u <= Uchar3max
return [u, 4] t.putc(c0)
end t.putc(c1)
t.putc(c2)
t.putc(c3)
return 4
end

raise Utf8Error
rescue Utf8Error
t.write(Ustrerr)
return 1
end



return [Ucharerr, 1] class Utf8Error < ::StandardError
end end




Expand All @@ -595,14 +586,13 @@ class Error < ::StandardError
Uchar2max = (1<<11) - 1 Uchar2max = (1<<11) - 1
Uchar3max = (1<<16) - 1 Uchar3max = (1<<16) - 1
Ucharerr = 0xFFFD # unicode "replacement char" Ucharerr = 0xFFFD # unicode "replacement char"
Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
Usurrself = 0x10000 Usurrself = 0x10000
Usurr1 = 0xd800 Usurr1 = 0xd800
Usurr2 = 0xdc00 Usurr2 = 0xdc00
Usurr3 = 0xe000 Usurr3 = 0xe000
Umax = 0x10ffff


Spc = ' '[0] Spc = ' '[0]
Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t} Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
Hex = '0123456789abcdef'
end end
end end