diff --git a/src/RData.jl b/src/RData.jl index dc559ce..5645b29 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -9,7 +9,8 @@ import FileIO: load export sexp2julia, DictoVec, - load # export FileIO.load() + load, # export FileIO.load() + readRDS include("config.jl") include("sxtypes.jl") @@ -87,4 +88,28 @@ end load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) +# TODO: +# * maybe throw error instead of warning on conversion? +# * tests +# * load stuff (e.g. FileIO req on detect_rdata) +# * maybe return tuple of (object, attribute_dict) for +# https://github.com/JuliaStats/RData.jl/issues/30 +function readRDS(f::AbstractString; kwoptions...) + io = open(f, "r") + try + gzipped = read(io, UInt8) == 0x1F && read(io, UInt8) == 0x8B # check GZip magic number + seekstart(io) + # if compressed, transcode gzipped stream + gzipped && (io = GzipDecompressorStream(io)) + ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions) + @assert ctx.fmtver == 2 # format version + convert2julia = get(ctx.kwdict,:convert,true) + return convert2julia ? sexp2julia(readitem(ctx)) : readitem(ctx) + catch + rethrow() + finally + close(io) + end +end + end # module diff --git a/test/RDS.jl b/test/RDS.jl new file mode 100644 index 0000000..e7f82fd --- /dev/null +++ b/test/RDS.jl @@ -0,0 +1,65 @@ +module TestRDS + using Base.Test + using DataFrames + using RData + + testdir = dirname(@__FILE__) + + @testset "RDS: Reading minimal rds" begin + df = DataFrame(num = [1.1, 2.2]) + @test isequal(sexp2julia(readRDS("$testdir/data/minimal.rds",convert=false))["df"], df) + @test isequal(readRDS("$testdir/data/minimal.rds",convert=true)["df"], df) + @test isequal(readRDS("$testdir/data/minimal_ascii.rds")["df"], df) + end + + @testset "RDS: Conversion to Julia types" begin + df = DataFrame(num = [1.1, 2.2], + int = Int32[1, 2], + logi = [true, false], + chr = ["ab", "c"], + factor = pool(["ab", "c"]), + cplx = Complex128[1.1+0.5im, 1.0im]) + rdf = sexp2julia(readRDS("$testdir/data/types.rds",convert=false))["df"] + @test eltypes(rdf) == eltypes(df) + @test isequal(rdf, df) + rdf_ascii = sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false))["df"] + @test eltypes(rdf_ascii) == eltypes(df) + @test isequal(rdf_ascii, df) + end + + + @testset "RDS: NAs conversion" begin + df = DataFrame(num = [1.1, 2.2], + int = Int32[1, 2], + logi = [true, false], + chr = ["ab", "c"], + factor = pool(["ab", "c"]), + cplx = Complex128[1.1+0.5im, 1.0im]) + + df[2, :] = NA + append!(df, df[2, :]) + df[3, :num] = NaN + df[:, :cplx] = @data [NA, Complex128(1,NaN), NaN] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs.rds",convert=false))["df"], df) + # ASCII format saves NaN as NA + df[3, :num] = NA + df[:, :cplx] = @data [NA, NA, NA] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs_ascii.rds",convert=false))["df"], df) + end + + @testset "RDS: Column names conversion" begin + rds_names = names(sexp2julia(readRDS("$testdir/data/names.rds",convert=false))["df"]) + expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] + @test rds_names == expected_names + rds_names = names(sexp2julia(readRDS("$testdir/data/names_ascii.rds",convert=false))["df"]) + @test rds_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] + end + + @testset "RDS: Reading RDA with complex types (environments, closures etc)" begin + rds_envs = readRDS("$testdir/data/envs.rds",convert=false) + rds_pairlists = readRDS("$testdir/data/pairlists.rds",convert=false) + rds_closures = readRDS("$testdir/data/closures.rds",convert=false) + rds_cmpfuns = readRDS("$testdir/data/cmpfun.rds",convert=false) + end +end + diff --git a/test/data/NAs.rds b/test/data/NAs.rds new file mode 100644 index 0000000..96cdad1 Binary files /dev/null and b/test/data/NAs.rds differ diff --git a/test/data/NAs_ascii.rds b/test/data/NAs_ascii.rds new file mode 100644 index 0000000..f9acd59 Binary files /dev/null and b/test/data/NAs_ascii.rds differ diff --git a/test/data/closures.rds b/test/data/closures.rds new file mode 100644 index 0000000..818ca3b Binary files /dev/null and b/test/data/closures.rds differ diff --git a/test/data/cmpfun.rds b/test/data/cmpfun.rds new file mode 100644 index 0000000..bff356c Binary files /dev/null and b/test/data/cmpfun.rds differ diff --git a/test/data/envs.rds b/test/data/envs.rds new file mode 100644 index 0000000..52bfaa5 Binary files /dev/null and b/test/data/envs.rds differ diff --git a/test/data/envs_ascii.rds b/test/data/envs_ascii.rds new file mode 100644 index 0000000..38d1195 Binary files /dev/null and b/test/data/envs_ascii.rds differ diff --git a/test/data/minimal.rds b/test/data/minimal.rds new file mode 100644 index 0000000..c50c234 Binary files /dev/null and b/test/data/minimal.rds differ diff --git a/test/data/minimal_ascii.rds b/test/data/minimal_ascii.rds new file mode 100644 index 0000000..c50c234 Binary files /dev/null and b/test/data/minimal_ascii.rds differ diff --git a/test/data/names.rds b/test/data/names.rds new file mode 100644 index 0000000..217a74f Binary files /dev/null and b/test/data/names.rds differ diff --git a/test/data/names_ascii.rds b/test/data/names_ascii.rds new file mode 100644 index 0000000..5148ffb Binary files /dev/null and b/test/data/names_ascii.rds differ diff --git a/test/data/pairlists.rds b/test/data/pairlists.rds new file mode 100644 index 0000000..7502413 Binary files /dev/null and b/test/data/pairlists.rds differ diff --git a/test/data/types.rds b/test/data/types.rds new file mode 100644 index 0000000..e6c2ff5 Binary files /dev/null and b/test/data/types.rds differ diff --git a/test/data/types_ascii.rds b/test/data/types_ascii.rds new file mode 100644 index 0000000..e6c2ff5 Binary files /dev/null and b/test/data/types_ascii.rds differ diff --git a/test/generate_rda.R b/test/generate_rda.R index 8a65996..a1a7bba 100644 --- a/test/generate_rda.R +++ b/test/generate_rda.R @@ -1,4 +1,4 @@ -# R script to generate test .rda files +# R script to generate test .rda and .rds files df <- data.frame(num = c(1.1, 2.2)) save(df, file = "data/minimal.rda") @@ -48,3 +48,15 @@ test.cmpfun0 <- cmpfun( test.fun0 ) test.cmpfun1 <- cmpfun( test.fun1 ) test.cmpfun2 <- cmpfun( test.fun2 ) save(test.cmpfun0, test.cmpfun1, test.cmpfun2, file = "data/cmpfun.rda") + + +# for converting rda files to rds to test with readRDS +rdafiles = list.files("data/", pattern="*.rda", full.names=T) +for (rdafile in rdafiles) { + en = new.env() + load(rdafile, envir=en) + lst = as.list(en) + rdsfile = gsub("\\.rda$", ".rds", rdafile) + saveRDS(lst, file=rdsfile) +} + diff --git a/test/runtests.jl b/test/runtests.jl index 2b8043e..3d43c8e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,3 +2,4 @@ using RData using Base.Test include("RDA.jl") +include("RDS.jl")