Permalink
Browse files

faster and more compact serialization of symbols and strings

  • Loading branch information...
JeffBezanson committed May 21, 2014
1 parent a90fd40 commit bb67ff25e2799b27d10877692f74bae66ccc0270
Showing with 18 additions and 21 deletions.
  1. +5 −0 base/io.jl
  2. +12 −17 base/serialize.jl
  3. +1 −4 base/show.jl
View
@@ -94,6 +94,11 @@ function write(s::IO, p::Ptr, n::Integer)
n
end
function write(io::IO, s::Symbol)
pname = convert(Ptr{Uint8}, s)
write(io, pname, int(ccall(:strlen, Csize_t, (Ptr{Uint8},), pname)))
end
# all subtypes should implement this
read(s::IO, x::Type{Uint8}) = error(typeof(s)," does not support byte I/O")
View
@@ -1,7 +1,6 @@
## serializing values ##
# dummy types to tell number of bytes used to store length (4 or 1)
abstract LongSymbol
abstract LongTuple
abstract LongExpr
abstract UndefRefTag
@@ -14,11 +13,11 @@ let i = 2
for t = {Symbol, Int8, Uint8, Int16, Uint16, Int32, Uint32,
Int64, Uint64, Int128, Uint128, Float32, Float64, Char, Ptr,
DataType, UnionType, Function,
Tuple, Array, Expr, LongSymbol, LongTuple, LongExpr,
Tuple, Array, Expr, :reserved21, LongTuple, LongExpr,
LineNumberNode, SymbolNode, LabelNode, GotoNode,
QuoteNode, TopNode, TypeVar, Box, LambdaStaticData,
Module, UndefRefTag, Task, :reserved4,
:reserved5, :reserved6, :reserved7, :reserved8,
Module, UndefRefTag, Task, ASCIIString, UTF8String,
:reserved6, :reserved7, :reserved8,
:reserved9, :reserved10, :reserved11, :reserved12,
(), Bool, Any, :Any, None, Top, Undef, Type,
@@ -31,7 +30,7 @@ let i = 2
:mul_float, :unbox, :box,
:eq_int, :slt_int, :sle_int, :ne_int,
:arrayset, :arrayref,
:reserved13, :reserved14, :reserved15, :reserved16,
:Core, :Base, :reserved15, :reserved16,
:reserved17, :reserved18, :reserved19, :reserved20,
false, true, nothing, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
@@ -79,16 +78,9 @@ function serialize(s, x::Symbol)
if haskey(ser_tag, x)
return write_as_tag(s, x)
end
name = string(x)
ln = sizeof(name)
if ln <= 255
writetag(s, Symbol)
write(s, uint8(ln))
else
writetag(s, LongSymbol)
write(s, int32(ln))
end
write(s, name)
writetag(s, Symbol)
write(s, x)
write(s, 0x00)
end
function serialize_array_data(s, a)
@@ -323,8 +315,11 @@ end
deserialize_tuple(s, len) = ntuple(len, i->deserialize(s))
deserialize(s, ::Type{Symbol}) = symbol(read(s, Uint8, int32(read(s, Uint8))))
deserialize(s, ::Type{LongSymbol}) = symbol(read(s, Uint8, read(s, Int32)))
function deserialize(s, ::Type{Symbol})
r = readuntil(s,0x00)
pop!(r)
symbol(r)
end
function deserialize(s, ::Type{Module})
path = deserialize(s)
View
@@ -1,10 +1,7 @@
show(x) = show(STDOUT::IO, x)
function print(io::IO, s::Symbol)
pname = convert(Ptr{Uint8}, s)
write(io, pname, int(ccall(:strlen, Csize_t, (Ptr{Uint8},), pname)))
end
print(io::IO, s::Symbol) = (write(io,s);nothing)
function show(io::IO, x::ANY)
t = typeof(x)::DataType

3 comments on commit bb67ff2

@mbauman

This comment has been minimized.

Show comment
Hide comment
@mbauman

mbauman May 21, 2014

Member

This is incompatible with previous serializations, which is just fine, but should this bump the serialization version number? Actually, ser_version doesn't seem to be used anywhere in Base and isn't ever written to the output stream. Should it even be defined? I'm tempted to write Base.ser_version as the first 8 bytes of a serialized file, but perhaps that gives the wrong impression for the stability of this interface.

Member

mbauman replied May 21, 2014

This is incompatible with previous serializations, which is just fine, but should this bump the serialization version number? Actually, ser_version doesn't seem to be used anywhere in Base and isn't ever written to the output stream. Should it even be defined? I'm tempted to write Base.ser_version as the first 8 bytes of a serialized file, but perhaps that gives the wrong impression for the stability of this interface.

@JeffBezanson

This comment has been minimized.

Show comment
Hide comment
@JeffBezanson

JeffBezanson May 21, 2014

Member

I think I can at least allow the new version to read the old version.

Member

JeffBezanson replied May 21, 2014

I think I can at least allow the new version to read the old version.

@mbauman

This comment has been minimized.

Show comment
Hide comment
@mbauman

mbauman May 21, 2014

Member

That'd be even better! Thanks Jeff.

Member

mbauman replied May 21, 2014

That'd be even better! Thanks Jeff.

Please sign in to comment.