Skip to content

Commit

Permalink
faster and more compact serialization of symbols and strings
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffBezanson committed May 21, 2014
1 parent a90fd40 commit bb67ff2
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 21 deletions.
5 changes: 5 additions & 0 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ function write(s::IO, p::Ptr, n::Integer)
n
end

function write(io::IO, s::Symbol)
pname = convert(Ptr{Uint8}, s)
write(io, pname, int(ccall(:strlen, Csize_t, (Ptr{Uint8},), pname)))
end

# all subtypes should implement this
read(s::IO, x::Type{Uint8}) = error(typeof(s)," does not support byte I/O")

Expand Down
29 changes: 12 additions & 17 deletions base/serialize.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
## serializing values ##

# dummy types to tell number of bytes used to store length (4 or 1)
abstract LongSymbol
abstract LongTuple
abstract LongExpr
abstract UndefRefTag
Expand All @@ -14,11 +13,11 @@ let i = 2
for t = {Symbol, Int8, Uint8, Int16, Uint16, Int32, Uint32,
Int64, Uint64, Int128, Uint128, Float32, Float64, Char, Ptr,
DataType, UnionType, Function,
Tuple, Array, Expr, LongSymbol, LongTuple, LongExpr,
Tuple, Array, Expr, :reserved21, LongTuple, LongExpr,
LineNumberNode, SymbolNode, LabelNode, GotoNode,
QuoteNode, TopNode, TypeVar, Box, LambdaStaticData,
Module, UndefRefTag, Task, :reserved4,
:reserved5, :reserved6, :reserved7, :reserved8,
Module, UndefRefTag, Task, ASCIIString, UTF8String,
:reserved6, :reserved7, :reserved8,
:reserved9, :reserved10, :reserved11, :reserved12,

(), Bool, Any, :Any, None, Top, Undef, Type,
Expand All @@ -31,7 +30,7 @@ let i = 2
:mul_float, :unbox, :box,
:eq_int, :slt_int, :sle_int, :ne_int,
:arrayset, :arrayref,
:reserved13, :reserved14, :reserved15, :reserved16,
:Core, :Base, :reserved15, :reserved16,
:reserved17, :reserved18, :reserved19, :reserved20,
false, true, nothing, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
Expand Down Expand Up @@ -79,16 +78,9 @@ function serialize(s, x::Symbol)
if haskey(ser_tag, x)
return write_as_tag(s, x)
end
name = string(x)
ln = sizeof(name)
if ln <= 255
writetag(s, Symbol)
write(s, uint8(ln))
else
writetag(s, LongSymbol)
write(s, int32(ln))
end
write(s, name)
writetag(s, Symbol)
write(s, x)
write(s, 0x00)
end

function serialize_array_data(s, a)
Expand Down Expand Up @@ -323,8 +315,11 @@ end

deserialize_tuple(s, len) = ntuple(len, i->deserialize(s))

deserialize(s, ::Type{Symbol}) = symbol(read(s, Uint8, int32(read(s, Uint8))))
deserialize(s, ::Type{LongSymbol}) = symbol(read(s, Uint8, read(s, Int32)))
function deserialize(s, ::Type{Symbol})
r = readuntil(s,0x00)
pop!(r)
symbol(r)
end

function deserialize(s, ::Type{Module})
path = deserialize(s)
Expand Down
5 changes: 1 addition & 4 deletions base/show.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@

show(x) = show(STDOUT::IO, x)

function print(io::IO, s::Symbol)
pname = convert(Ptr{Uint8}, s)
write(io, pname, int(ccall(:strlen, Csize_t, (Ptr{Uint8},), pname)))
end
print(io::IO, s::Symbol) = (write(io,s);nothing)

function show(io::IO, x::ANY)
t = typeof(x)::DataType
Expand Down

3 comments on commit bb67ff2

@mbauman
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is incompatible with previous serializations, which is just fine, but should this bump the serialization version number? Actually, ser_version doesn't seem to be used anywhere in Base and isn't ever written to the output stream. Should it even be defined? I'm tempted to write Base.ser_version as the first 8 bytes of a serialized file, but perhaps that gives the wrong impression for the stability of this interface.

@JeffBezanson
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I can at least allow the new version to read the old version.

@mbauman
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That'd be even better! Thanks Jeff.

Please sign in to comment.