From 500aa93121850d97c1c4ad6a5169f94c63d00338 Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 2 Nov 2017 11:04:16 -0700 Subject: [PATCH 1/6] initial support for rds files --- src/RData.jl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/RData.jl b/src/RData.jl index 1dd7a42..57e3e60 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -11,7 +11,8 @@ import FileIO: load export sexp2julia, DictoVec, - load # export FileIO.load() + load, # export FileIO.load() + readRDS include("config.jl") include("sxtypes.jl") @@ -80,4 +81,20 @@ end load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) +# TODO: +# * maybe throw error instead of warning on conversion? +# * tests +# * load stuff (e.g. FileIO req on detect_rdata) +# * maybe return tuple of (object, attribute_dict) for +# https://github.com/JuliaStats/RData.jl/issues/30 +function readRDS(f::AbstractString) + obj = gzopen(f) do io + ctx = RDAContext(rdaio(io, chomp(readline(io)))) #, kwoptions) + @assert ctx.fmtver == 2 # format version + #convert2julia = get(ctx.kwdict,:convert,true) + return readitem(ctx) + end + return sexp2julia(obj) +end + end # module From d9dc2ecfb4049b1a03056c983b9217759347c67b Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 2 Nov 2017 14:09:52 -0700 Subject: [PATCH 2/6] add support for keyword arguments (can at least manually handle list of data frames now) --- src/RData.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/RData.jl b/src/RData.jl index 57e3e60..da6f0f8 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -87,14 +87,14 @@ load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) # * load stuff (e.g. FileIO req on detect_rdata) # * maybe return tuple of (object, attribute_dict) for # https://github.com/JuliaStats/RData.jl/issues/30 -function readRDS(f::AbstractString) +function readRDS(f::AbstractString; kwoptions...) obj = gzopen(f) do io - ctx = RDAContext(rdaio(io, chomp(readline(io)))) #, kwoptions) + ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions) @assert ctx.fmtver == 2 # format version - #convert2julia = get(ctx.kwdict,:convert,true) - return readitem(ctx) + convert2julia = get(ctx.kwdict,:convert,true) + return convert2julia ? sexp2julia(readitem(ctx)) : readitem(ctx) end - return sexp2julia(obj) + return obj end end # module From 97d47a72761cae1f3f6396fbe014fafadc9209a5 Mon Sep 17 00:00:00 2001 From: James Sams Date: Mon, 20 Nov 2017 18:05:14 -0800 Subject: [PATCH 3/6] added tests and test data --- test/RDS.jl | 56 ++++++++++++++++++++++++++++++++++++ test/data/NAs.rds | Bin 0 -> 236 bytes test/data/NAs_ascii.rds | Bin 0 -> 232 bytes test/data/closures.rds | Bin 0 -> 334 bytes test/data/cmpfun.rds | Bin 0 -> 339 bytes test/data/envs.rds | Bin 0 -> 172 bytes test/data/envs_ascii.rds | Bin 0 -> 147 bytes test/data/minimal.rds | Bin 0 -> 132 bytes test/data/minimal_ascii.rds | Bin 0 -> 132 bytes test/data/names.rds | Bin 0 -> 239 bytes test/data/names_ascii.rds | Bin 0 -> 236 bytes test/data/pairlists.rds | Bin 0 -> 119 bytes test/data/types.rds | Bin 0 -> 220 bytes test/data/types_ascii.rds | Bin 0 -> 220 bytes test/generate_rda.R | 12 ++++++++ test/runtests.jl | 1 + 16 files changed, 69 insertions(+) create mode 100644 test/RDS.jl create mode 100644 test/data/NAs.rds create mode 100644 test/data/NAs_ascii.rds create mode 100644 test/data/closures.rds create mode 100644 test/data/cmpfun.rds create mode 100644 test/data/envs.rds create mode 100644 test/data/envs_ascii.rds create mode 100644 test/data/minimal.rds create mode 100644 test/data/minimal_ascii.rds create mode 100644 test/data/names.rds create mode 100644 test/data/names_ascii.rds create mode 100644 test/data/pairlists.rds create mode 100644 test/data/types.rds create mode 100644 test/data/types_ascii.rds diff --git a/test/RDS.jl b/test/RDS.jl new file mode 100644 index 0000000..735175b --- /dev/null +++ b/test/RDS.jl @@ -0,0 +1,56 @@ +module TestRDS + using Base.Test + using DataFrames + using RData + using Compat + + # think this is redundant for rds vs rda + # check for Float64 NA + # @test !RData.isna_float64(reinterpret(UInt64, 1.0)) + # @test !RData.isna_float64(reinterpret(UInt64, NaN)) + # @test !RData.isna_float64(reinterpret(UInt64, Inf)) + # @test !RData.isna_float64(reinterpret(UInt64, -Inf)) + # @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64)) + # # check that alternative NA is also recognized (#10) + # @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64 | ((Base.significand_mask(Float64) + 1) >> 1))) + + testdir = dirname(@__FILE__) + + df = DataFrame(num = [1.1, 2.2]) + @test isequal(sexp2julia(readRDS("$testdir/data/minimal.rds",convert=false))["df"], df) + @test isequal(readRDS("$testdir/data/minimal.rds",convert=true)["df"], df) + @test isequal(readRDS("$testdir/data/minimal_ascii.rds")["df"], df) + + df[:int] = Int32[1, 2] + df[:logi] = [true, false] + df[:chr] = ["ab", "c"] + df[:factor] = pool(df[:chr]) + df[:cplx] = Complex128[1.1+0.5im, 1.0im] + @test isequal(sexp2julia(readRDS("$testdir/data/types.rds",convert=false))["df"], df) + @test isequal(sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false))["df"], df) + + df[2, :] = NA + append!(df, df[2, :]) + df[3, :num] = NaN + df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs.rds",convert=false))["df"], df) + # ASCII format saves NaN as NA + df[3, :num] = NA + df[:, :cplx] = @data [NA, NA, NA] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs_ascii.rds",convert=false))["df"], df) + + rds_names = names(sexp2julia(readRDS("$testdir/data/names.rds",convert=false))["df"]) + expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] + @test rds_names == expected_names + rds_names = names(sexp2julia(readRDS("$testdir/data/names_ascii.rds",convert=false))["df"]) + @test rds_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] + + rds_envs = readRDS("$testdir/data/envs.rds",convert=false) + + rds_pairlists = readRDS("$testdir/data/pairlists.rds",convert=false) + + rds_closures = readRDS("$testdir/data/closures.rds",convert=false) + + rds_cmpfuns = readRDS("$testdir/data/cmpfun.rds",convert=false) +end + diff --git a/test/data/NAs.rds b/test/data/NAs.rds new file mode 100644 index 0000000000000000000000000000000000000000..96cdad15630a547a37cf310ecee2221269454ef7 GIT binary patch literal 236 zcmVR6u0H?E$K0@cS+nMZgYVihx>*)^8hV)J~4Ti>W zaR)bfb0I`DL@Y2yPqgpJJ38T3w2i6it8AaC%JyPEy$lMZSJ0D6R4)J1B^RYHtqdoGrNbc0h i>|m{TX1o%ONOFxE&8bus`c-U|Q}zv0O0BpB0ssK7{BWfJ literal 0 HcmV?d00001 diff --git a/test/data/closures.rds b/test/data/closures.rds new file mode 100644 index 0000000000000000000000000000000000000000..818ca3b5f8fa16d5c5ed804d8c5ea472134e17d3 GIT binary patch literal 334 zcmV-U0kQrciwFP!000001I?37OT$1Ah9@6QoA`ks-gBumq=@NV`~#&o!D|T{2*gy9 z6q0`}UW=}?&1_629#jw<*v#y^yEC&-*hdclOt5SdOiP8A_`$%zff|4THn`}WvWN?j z;Nlt1X2%tPEAk=5AKe{c=z0l??NdY$x3vv zbRl>n@AP-I-5;xUJ2iMi{tUG#$82rh*9d3S$60qg>oC$dCp-tk*WLXOhz)*^QRCa5Y#LsKYpV4c}_cV}iF**6;ibTCXEbVG!5{9s_=Kn*|x6Q<~`EDcf? z;^H;VCe;eS9eIypkLH?kdW@&TC}tR%P}_&HWfVsV%P_cYi7SUJXIcIb9+OAs$w3$7 z(LPzaPRjCQnk-*lHyhVh5>7TZPGMj5=r$0<+e|i~lXyjq2m3?WzYC?XxKg5l zB@4kPd8@sv?!Kzl?3Ca&ei>?=_t~0iUp<_D8)wn*EIeQ09H}`Je9hhefLQ1EXa#;w zXN+S`;MzzvuNJ3r@kWTx?;!1`lou2)enI(OrM&J?UX`v>c;!FgTah}ctjjKIn|HC! lI1IcW@z&(~F-V+M41(Q3yxjrKAJEn!`8TTg)iRj`007Ocp&tMM literal 0 HcmV?d00001 diff --git a/test/data/envs.rds b/test/data/envs.rds new file mode 100644 index 0000000000000000000000000000000000000000..52bfaa55d2750b7328b97acba4d06f7230c51d4d GIT binary patch literal 172 zcmb2|=3oE==I#ec2?+^F32BK*2}x{*t&J?&0-FsS4yY^qV?Ux$7^yLvgU6)5u~9IC zOTEL7MW{xhtvQ?dN=y>3lGI@tpM{4ro7ivg>ilO<+~IKd@Q?X5-xsKDQelfw`kc_f zn8l{FDRoU{=gyT!9oiT=n1y|=c%Rj7F8UE%%oli&yhPcW9VkiWoFpM9diFE69WSPVemD- literal 0 HcmV?d00001 diff --git a/test/data/minimal.rds b/test/data/minimal.rds new file mode 100644 index 0000000000000000000000000000000000000000..c50c234f22aad0d76a7ce7fb032320ea36674c94 GIT binary patch literal 132 zcmb2|=3oE==I#ec2?+^F32BK*2}x{*t&J?o%*@79(jG8#-m0I&$+THULeYeoIl$Iq zUTdSU$L43M&z6ZXOEP&hovB>9Gvk#-_ovfe6S68D*~}R_{{w-uxq+qnMv1!)>!uu; h9Gvk#-_ovfe6S68D*~}R_{{w-uxq+qnMv1!)>!uu; h0B|Z`-U|{J`i4!2fCD>5tXDUT%D_|?v zfRPiHjfrA637@Ez?7aB@yzu~l0e2ZNPH>9G0pRrZQAenJ`M8qYsv3VVxK~dwGNd@w zT+m#U&+G6_TAvGohR7XS(V%=^-q8uSqHR=FKaG!ZTIgQvrDmbF^hu>4`G)sZ>HbZoeKaAxXFNVf(tYT04Hyu&QN*yx|2Mv4ZgsBYL8}0ajLnY zAw_XV{=Dwb1c!!*16rp=`FcLm3AdtcWR;aArzERXFZR<*A|o0UOrlIwI=}Cdil{D1 zL~qpj7`w2ME03^M@;AmQi*t~RV^XPeFh!XU_t8mScU$?%lRVZ0mKv-Tuvhzy?`-}4 mN-BPlomo2|=f=WtLRHmF}Ii2 U!kT4GG84iWs*VdeJ_FhZ08|4dHUIzs literal 0 HcmV?d00001 diff --git a/test/data/types.rds b/test/data/types.rds new file mode 100644 index 0000000000000000000000000000000000000000..e6c2ff545ecfc63952c3c0020436bd47cc981ef2 GIT binary patch literal 220 zcmV<203-h&iwFP!000001AS0k4uUWYF54u5L?!VaTmg^a>l~mY1I)mmN3Wr$^9XtY z=6;xgJ=moCzII>Rb-Ne<5MY7;VNEyc2mr9gQ29zHv(H1@a~oJxxuvNyT7&T<)|Qx< zK%|lGM;0*E+AAfNTS3T+OToLp4Q2%k#%bsDzb++ZeJd0jl&_~NWG|Rm_I7O6HWKYM zZD?I|Zr8gyq?Cg{--#0QVK#}IbM0u=^dlv_Jyb5uj>3EFC}JmmvonzN){1l5)3g+7 WqFqg^h_6Cl~mY1I)mmN3Wr$^9XtY z=6;xgJ=moCzII>Rb-Ne<5MY7;VNEyc2mr9gQ29zHv(H1@a~oJxxuvNyT7&T<)|Qx< zK%|lGM;0*E+AAfNTS3T+OToLp4Q2%k#%bsDzb++ZeJd0jl&_~NWG|Rm_I7O6HWKYM zZD?I|Zr8gyq?Cg{--#0QVK#}IbM0u=^dlv_Jyb5uj>3EFC}JmmvonzN){1l5)3g+7 WqFqg^h_6C Date: Mon, 20 Nov 2017 18:11:48 -0800 Subject: [PATCH 4/6] update comments in generate_rda --- test/generate_rda.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/generate_rda.R b/test/generate_rda.R index 788595e..a1a7bba 100644 --- a/test/generate_rda.R +++ b/test/generate_rda.R @@ -1,4 +1,4 @@ -# R script to generate test .rda files +# R script to generate test .rda and .rds files df <- data.frame(num = c(1.1, 2.2)) save(df, file = "data/minimal.rda") @@ -50,7 +50,7 @@ test.cmpfun2 <- cmpfun( test.fun2 ) save(test.cmpfun0, test.cmpfun1, test.cmpfun2, file = "data/cmpfun.rda") -# for converting rda files to rds +# for converting rda files to rds to test with readRDS rdafiles = list.files("data/", pattern="*.rda", full.names=T) for (rdafile in rdafiles) { en = new.env() From de6e1875ab63441a4f9ed71003bec6759e055e1c Mon Sep 17 00:00:00 2001 From: James Sams Date: Mon, 20 Nov 2017 18:57:56 -0800 Subject: [PATCH 5/6] update readRDS to use new CodecZlib library --- src/RData.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/RData.jl b/src/RData.jl index 5d60620..5645b29 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -95,13 +95,21 @@ load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) # * maybe return tuple of (object, attribute_dict) for # https://github.com/JuliaStats/RData.jl/issues/30 function readRDS(f::AbstractString; kwoptions...) - obj = gzopen(f) do io + io = open(f, "r") + try + gzipped = read(io, UInt8) == 0x1F && read(io, UInt8) == 0x8B # check GZip magic number + seekstart(io) + # if compressed, transcode gzipped stream + gzipped && (io = GzipDecompressorStream(io)) ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions) @assert ctx.fmtver == 2 # format version convert2julia = get(ctx.kwdict,:convert,true) return convert2julia ? sexp2julia(readitem(ctx)) : readitem(ctx) + catch + rethrow() + finally + close(io) end - return obj end end # module From 4c2edd4e788a5e88ea07e0ed96c578aab865fc3d Mon Sep 17 00:00:00 2001 From: James Sams Date: Mon, 20 Nov 2017 18:58:13 -0800 Subject: [PATCH 6/6] update RDS tests to use testsets --- test/RDS.jl | 103 ++++++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/test/RDS.jl b/test/RDS.jl index 735175b..e7f82fd 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -2,55 +2,64 @@ module TestRDS using Base.Test using DataFrames using RData - using Compat - - # think this is redundant for rds vs rda - # check for Float64 NA - # @test !RData.isna_float64(reinterpret(UInt64, 1.0)) - # @test !RData.isna_float64(reinterpret(UInt64, NaN)) - # @test !RData.isna_float64(reinterpret(UInt64, Inf)) - # @test !RData.isna_float64(reinterpret(UInt64, -Inf)) - # @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64)) - # # check that alternative NA is also recognized (#10) - # @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64 | ((Base.significand_mask(Float64) + 1) >> 1))) testdir = dirname(@__FILE__) - df = DataFrame(num = [1.1, 2.2]) - @test isequal(sexp2julia(readRDS("$testdir/data/minimal.rds",convert=false))["df"], df) - @test isequal(readRDS("$testdir/data/minimal.rds",convert=true)["df"], df) - @test isequal(readRDS("$testdir/data/minimal_ascii.rds")["df"], df) - - df[:int] = Int32[1, 2] - df[:logi] = [true, false] - df[:chr] = ["ab", "c"] - df[:factor] = pool(df[:chr]) - df[:cplx] = Complex128[1.1+0.5im, 1.0im] - @test isequal(sexp2julia(readRDS("$testdir/data/types.rds",convert=false))["df"], df) - @test isequal(sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false))["df"], df) - - df[2, :] = NA - append!(df, df[2, :]) - df[3, :num] = NaN - df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN] - @test isequal(sexp2julia(readRDS("$testdir/data/NAs.rds",convert=false))["df"], df) - # ASCII format saves NaN as NA - df[3, :num] = NA - df[:, :cplx] = @data [NA, NA, NA] - @test isequal(sexp2julia(readRDS("$testdir/data/NAs_ascii.rds",convert=false))["df"], df) - - rds_names = names(sexp2julia(readRDS("$testdir/data/names.rds",convert=false))["df"]) - expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] - @test rds_names == expected_names - rds_names = names(sexp2julia(readRDS("$testdir/data/names_ascii.rds",convert=false))["df"]) - @test rds_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] - - rds_envs = readRDS("$testdir/data/envs.rds",convert=false) - - rds_pairlists = readRDS("$testdir/data/pairlists.rds",convert=false) - - rds_closures = readRDS("$testdir/data/closures.rds",convert=false) - - rds_cmpfuns = readRDS("$testdir/data/cmpfun.rds",convert=false) + @testset "RDS: Reading minimal rds" begin + df = DataFrame(num = [1.1, 2.2]) + @test isequal(sexp2julia(readRDS("$testdir/data/minimal.rds",convert=false))["df"], df) + @test isequal(readRDS("$testdir/data/minimal.rds",convert=true)["df"], df) + @test isequal(readRDS("$testdir/data/minimal_ascii.rds")["df"], df) + end + + @testset "RDS: Conversion to Julia types" begin + df = DataFrame(num = [1.1, 2.2], + int = Int32[1, 2], + logi = [true, false], + chr = ["ab", "c"], + factor = pool(["ab", "c"]), + cplx = Complex128[1.1+0.5im, 1.0im]) + rdf = sexp2julia(readRDS("$testdir/data/types.rds",convert=false))["df"] + @test eltypes(rdf) == eltypes(df) + @test isequal(rdf, df) + rdf_ascii = sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false))["df"] + @test eltypes(rdf_ascii) == eltypes(df) + @test isequal(rdf_ascii, df) + end + + + @testset "RDS: NAs conversion" begin + df = DataFrame(num = [1.1, 2.2], + int = Int32[1, 2], + logi = [true, false], + chr = ["ab", "c"], + factor = pool(["ab", "c"]), + cplx = Complex128[1.1+0.5im, 1.0im]) + + df[2, :] = NA + append!(df, df[2, :]) + df[3, :num] = NaN + df[:, :cplx] = @data [NA, Complex128(1,NaN), NaN] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs.rds",convert=false))["df"], df) + # ASCII format saves NaN as NA + df[3, :num] = NA + df[:, :cplx] = @data [NA, NA, NA] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs_ascii.rds",convert=false))["df"], df) + end + + @testset "RDS: Column names conversion" begin + rds_names = names(sexp2julia(readRDS("$testdir/data/names.rds",convert=false))["df"]) + expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] + @test rds_names == expected_names + rds_names = names(sexp2julia(readRDS("$testdir/data/names_ascii.rds",convert=false))["df"]) + @test rds_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] + end + + @testset "RDS: Reading RDA with complex types (environments, closures etc)" begin + rds_envs = readRDS("$testdir/data/envs.rds",convert=false) + rds_pairlists = readRDS("$testdir/data/pairlists.rds",convert=false) + rds_closures = readRDS("$testdir/data/closures.rds",convert=false) + rds_cmpfuns = readRDS("$testdir/data/cmpfun.rds",convert=false) + end end