From faddb458d2b58403ce34eb13413352f1b577a7ee Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Mon, 26 Sep 2016 18:39:52 -0400 Subject: [PATCH 1/9] Update to use NullableArrays and CategoricalArrays --- REQUIRE | 11 ++++++----- src/RCall.jl | 6 +++--- src/convert-base.jl | 8 ++++++++ src/convert-data.jl | 37 +++++++++++++++++----------------- src/convert-default.jl | 12 +++++------ src/methods.jl | 10 ++++++++-- test/dataframe.jl | 45 +++++++++++++++++++++++------------------- test/rstr.jl | 2 +- 8 files changed, 76 insertions(+), 55 deletions(-) diff --git a/REQUIRE b/REQUIRE index dbdc2913..4434a8e6 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,6 +1,7 @@ -julia 0.4 -DataStructures -DataArrays 0.3.8 -DataFrames 0.7.6 +julia 0.5 +DataStructures 0.4.3 +DataFrames 0.8.3+ +NullableArrays 0.0.9 +CategoricalArrays 0.0.6 Compat 0.8 -@windows WinReg +@windows WinReg 0.2.0 diff --git a/src/RCall.jl b/src/RCall.jl index 6f13b7a1..8229a7be 100644 --- a/src/RCall.jl +++ b/src/RCall.jl @@ -1,11 +1,11 @@ __precompile__() module RCall -using Compat, DataFrames, DataArrays +using Compat, DataFrames, NullableArrays, CategoricalArrays import Compat.String import DataStructures: OrderedDict -import Base: eltype, show, convert, isascii, +import Base: eltype, show, convert, isascii, isnull, length, size, getindex, setindex!, start, next, done, names export RObject, @@ -13,7 +13,7 @@ export RObject, ListSxp, VecSxp, EnvSxp, LangSxp, ClosSxp, S4Sxp, getattrib, setattrib!, getnames, setnames!, getclass, setclass!, attributes, globalEnv, - isna, anyna, isnull, + isna, anyna, rcopy, rparse, rprint, reval, rcall, rlang, @rimport, @rlibrary, @rput, @rget, @var_str, @R_str diff --git a/src/convert-base.jl b/src/convert-base.jl index 4e2afc2f..311bc812 100644 --- a/src/convert-base.jl +++ b/src/convert-base.jl @@ -30,6 +30,14 @@ sexp(::Type{Cint},x) = convert(Cint,x) sexp(::Type{Float64},x) = convert(Float64,x) sexp(::Type{Complex128},x) = convert(Complex128,x) +# handle Nullable objects +function sexp{T}(x::Nullable{T}) + if x.isnull + return sexp(natype(T)) + else + return sexp(x.value) + end +end # NilSxp sexp(::Void) = sexp(Const.NilValue) diff --git a/src/convert-data.jl b/src/convert-data.jl index f1e509b1..6676ea19 100644 --- a/src/convert-data.jl +++ b/src/convert-data.jl @@ -1,34 +1,35 @@ -# conversion methods for DataArrays and DataFrames +# conversion methods for NullableArrays, CategoricalArrays and DataFrames -function rcopy{T,S<:VectorSxp}(::Type{DataArray{T}}, s::Ptr{S}) - DataArray(rcopy(Array{T},s), isna(s)) +function rcopy{T,S<:VectorSxp}(::Type{NullableArray{T}}, s::Ptr{S}) + NullableArray(rcopy(Array{T},s), isna(s)) end -function rcopy{S<:VectorSxp}(::Type{DataArray}, s::Ptr{S}) - DataArray(rcopy(Array,s), isna(s)) +function rcopy{S<:VectorSxp}(::Type{NullableArray}, s::Ptr{S}) + NullableArray(rcopy(Array,s), isna(s)) end -function rcopy(::Type{DataArray}, s::Ptr{IntSxp}) +function rcopy(::Type{NullableArray}, s::Ptr{IntSxp}) isFactor(s) && error("$s is a R factor") - DataArray(rcopy(Array,s), isna(s)) + NullableArray(rcopy(Array,s), isna(s)) end -function rcopy(::Type{PooledDataArray}, s::Ptr{IntSxp}) +function rcopy(::Type{NullableCategoricalArray}, s::Ptr{IntSxp}) isFactor(s) || error("$s is not a R factor") - refs = DataArrays.RefArray([isna(x) ? zero(Int32) : x for x in s]) - compact(PooledDataArray(refs,rcopy(getattrib(s,Const.LevelsSymbol)))) + refs = UInt32[isna(x) ? zero(UInt32) : UInt32(x) for x in s] + pool = CategoricalPool(rcopy(getattrib(s,Const.LevelsSymbol))) + NullableCategoricalArray(refs, pool) end function rcopy(::Type{DataFrame}, s::Ptr{VecSxp}) isFrame(s) || error("s is not a R data frame") - DataFrame(Any[isFactor(c)? rcopy(PooledDataArray, c) : rcopy(DataArray, c) for c in s], + DataFrame(Any[isFactor(c)? rcopy(NullableCategoricalArray, c) : rcopy(NullableArray, c) for c in s], rcopy(Array{Symbol},getnames(s))) end -## DataArray to sexp conversion. -function sexp(v::DataArray) - rv = protect(sexp(v.data)) +## NullableArray to sexp conversion. +function sexp(v::NullableArray) + rv = protect(sexp(v.values)) try - for (i,isna) = enumerate(v.na) + for (i,isna) = enumerate(v.isnull) if isna rv[i] = naeltype(eltype(rv)) end @@ -39,10 +40,10 @@ function sexp(v::DataArray) rv end -## PooledDataArray to sexp conversion. -function sexp{T<:Compat.String,R<:Integer}(v::PooledDataArray{T,R}) +## NullableCategoricalArray to sexp conversion. +function sexp{T<:Compat.String,N,R<:Integer}(v::NullableCategoricalArray{T,N,R}) rv = sexp(v.refs) - setattrib!(rv, Const.LevelsSymbol, sexp(v.pool)) + setattrib!(rv, Const.LevelsSymbol, sexp(v.pool.levels)) setattrib!(rv, Const.ClassSymbol, sexp("factor")) rv end diff --git a/src/convert-default.jl b/src/convert-default.jl index 684199a7..81037acd 100644 --- a/src/convert-default.jl +++ b/src/convert-default.jl @@ -8,7 +8,7 @@ rcopy(s::CharSxpPtr) = rcopy(Compat.String,s) function rcopy(s::StrSxpPtr) if anyna(s) - rcopy(DataArray,s) + rcopy(NullableArray,s) elseif length(s) == 1 rcopy(Compat.String,s) else @@ -17,7 +17,7 @@ function rcopy(s::StrSxpPtr) end function rcopy(s::RealSxpPtr) if anyna(s) - rcopy(DataArray{Float64},s) + rcopy(NullableArray{Float64},s) elseif length(s) == 1 rcopy(Float64,s) else @@ -26,7 +26,7 @@ function rcopy(s::RealSxpPtr) end function rcopy(s::CplxSxpPtr) if anyna(s) - rcopy(DataArray{Complex128},s) + rcopy(NullableArray{Complex128},s) elseif length(s) == 1 rcopy(Complex128,s) else @@ -35,7 +35,7 @@ function rcopy(s::CplxSxpPtr) end function rcopy(s::LglSxpPtr) if anyna(s) - rcopy(DataArray{Bool},s) + rcopy(NullableArray{Bool},s) elseif length(s) == 1 rcopy(Bool,s) else @@ -44,9 +44,9 @@ function rcopy(s::LglSxpPtr) end function rcopy(s::IntSxpPtr) if isFactor(s) - rcopy(PooledDataArray,s) + rcopy(NullableCategoricalArray,s) elseif anyna(s) - rcopy(DataArray{Int},s) + rcopy(NullableArray{Int},s) elseif length(s) == 1 rcopy(Cint,s) else diff --git a/src/methods.jl b/src/methods.jl index f0c3952f..fb209b4e 100644 --- a/src/methods.jl +++ b/src/methods.jl @@ -140,7 +140,9 @@ start{S<:VectorSxp}(s::Ptr{S}) = 0 next{S<:VectorSxp}(s::Ptr{S},state) = (state += 1;(s[state],state)) done{S<:VectorSxp}(s::Ptr{S},state) = state ≥ length(s) - +start{S<:VectorSxp}(s::RObject{S}) = start(s.p) +next{S<:VectorSxp}(s::RObject{S},state) = next(s.p, state) +done{S<:VectorSxp}(s::RObject{S},state) = done(s.p, state) # PairListSxps @@ -298,6 +300,10 @@ naeltype(::Type{CplxSxp}) = complex(Const.NaReal,Const.NaReal) naeltype(::Type{StrSxp}) = sexp(Const.NaString) naeltype(::Type{VecSxp}) = sexp(LglSxp,Const.NaInt) # used for setting +natype{S<:Integer}(::Type{S}) = Const.NaInt +natype{S<:Real}(::Type{S}) = Const.NaReal +natype(::Type{Complex}) = complex(Const.NaReal,Const.NaReal) +natype{S<:Compat.String}(::Type{S}) = sexp(Const.NaString) """ Check if values correspond to R's sentinel NA values. @@ -310,7 +316,7 @@ isna(s::CharSxpPtr) = s === sexp(Const.NaString) # this doesn't allow us to check VecSxp s function isna{S<:VectorSxp}(s::Ptr{S}) - b = BitArray(size(s)...) + b = Array{Bool}(size(s)...) for (i,e) in enumerate(s) b[i] = isna(e) end diff --git a/test/dataframe.jl b/test/dataframe.jl index 18fabc08..f4a0dd69 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1,31 +1,36 @@ -using DataArrays,DataFrames +using NullableArrays,CategoricalArrays,DataFrames -v110 = rcopy(DataArray,reval("x <- 1:10")) -@test isa(v110,DataVector) -@test eltype(v110) == Cint +v110 = rcopy(NullableArray,reval("c(1L, NA)")) +@test isa(v110,NullableVector) +@test eltype(v110) == Nullable{Int32} +@test rcopy(NullableArray, RObject(v110[2]))[1].isnull attenu = rcopy(DataFrame,:attenu) @test isa(attenu,DataFrame) @test size(attenu) == (182,5) dist = attenu[:dist] -@test isa(dist,DataArray{Float64}) +@test isa(dist,NullableArray{Float64}) -@test rcopy(DataArray,"c(NA,TRUE)").na == @data([NA,true]).na -@test rcopy(DataArray,"c(NA,1)").na == @data([NA,1.0]).na -@test rcopy(DataArray,"c(NA,1+0i)").na == @data([NA,1.0+0.0*im]).na -@test rcopy(DataArray,"c(NA,1L)").na == @data([NA,one(Int32)]).na -@test rcopy(DataArray,"c(NA,'NA')").na == @data([NA,"NA"]).na -@test_throws ErrorException rcopy(DataArray,"as.factor(c('a','a','c'))") -@test rcopy(PooledDataArray,"as.factor(c('a','a','c'))").pool == ["a","c"] +@test rcopy(NullableArray,"c(NA,TRUE)").isnull == NullableArray([Nullable(),true]).isnull +@test rcopy(NullableArray,"c(NA,1)").isnull == NullableArray([Nullable(),1.0]).isnull +@test rcopy(NullableArray,"c(NA,1+0i)").isnull == NullableArray([Nullable(),1.0+0.0*im]).isnull +@test rcopy(NullableArray,"c(NA,1L)").isnull == NullableArray([Nullable(),one(Int32)]).isnull +@test rcopy(NullableArray,"c(NA,'NA')").isnull == NullableArray([Nullable(),"NA"]).isnull +@test_throws ErrorException rcopy(NullableArray,"as.factor(c('a','a','c'))") +@test rcopy(NullableCategoricalArray,"as.factor(c('a','a','c'))").pool.levels == ["a","c"] -@test rcopy(DataArray,RObject(@data([NA,true]))).na == @data([NA,true]).na -@test rcopy(DataArray,RObject(@data([NA,1]))).na == @data([NA,1]).na -@test rcopy(DataArray,RObject(@data([NA,1.]))).na == @data([NA,1.]).na -@test rcopy(DataArray,RObject(@data([NA,1.+0*im]))).na == @data([NA,1.+0*im]).na -@test rcopy(DataArray,RObject(@data([NA,NA,"a","b"]))).na == @data([NA,NA,"a","b"]).na -pda = PooledDataArray(repeat(["a", "b"], inner = [5])) -@test rcopy(PooledDataArray,RObject(pda)).refs == repeat([1,2], inner = [5]) +v = NullableArray([true,true], [true,false]) +@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +v = NullableArray([1,2], [true,false]) +@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +v = NullableArray([1.,2.], [true,false]) +@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +v = NullableArray([0,1.+0*im], [true,false]) +@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +v = NullableArray(["","abc"], [true,false]) +@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +pda = NullableCategoricalArray(repeat(["a", "b"], inner = [5])) +@test rcopy(NullableCategoricalArray,RObject(pda)).refs == repeat([1,2], inner = [5]) @test rcopy(rcall(:dim,RObject(attenu))) == [182,5] - diff --git a/test/rstr.jl b/test/rstr.jl index 4d4ad6d2..e5ec3f43 100644 --- a/test/rstr.jl +++ b/test/rstr.jl @@ -23,4 +23,4 @@ using RCall iris = rcopy(:iris) model = R"lm(Sepal.Length ~ Sepal.Width,data=$iris)" @test rcopy(RCall.getclass(model)) == "lm" -@test isapprox(rcopy(R"sum($iris$Sepal.Length)"), sum(iris[Symbol("Sepal.Length")]), rtol=4*eps()) +@test isapprox(rcopy(R"sum($iris$Sepal.Length)"), sum(iris[Symbol("Sepal.Length")]).value, rtol=4*eps()) From 6f5937da738807b3c7922565afb4b5bb6b9369c3 Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Mon, 26 Sep 2016 21:38:03 -0400 Subject: [PATCH 2/9] julia 0.5 above only --- .travis.yml | 1 - appveyor.yml | 2 -- 2 files changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3acf5913..74ec9ffd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,6 @@ os: - linux - osx julia: - - 0.4 - 0.5 - nightly notifications: diff --git a/appveyor.yml b/appveyor.yml index 3f5501a2..a41638e6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,7 +1,5 @@ environment: matrix: - - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe" - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" From 7293165c21dfc636b29ad1091133e2e7ff860cca Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Mon, 26 Sep 2016 21:47:37 -0400 Subject: [PATCH 3/9] DataFrames 0.8.3 is okay --- REQUIRE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/REQUIRE b/REQUIRE index 4434a8e6..c7fecd31 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,6 +1,6 @@ julia 0.5 DataStructures 0.4.3 -DataFrames 0.8.3+ +DataFrames 0.8.3 NullableArrays 0.0.9 CategoricalArrays 0.0.6 Compat 0.8 From 51cd71cea85459c6be40cf8dc2a9e62ad8a6e719 Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Mon, 26 Sep 2016 23:06:04 -0400 Subject: [PATCH 4/9] improve CategoricalArray and NullableCategoricalArray --- src/convert-data.jl | 42 ++++++++++++++++++++++++++++++++---------- src/convert-default.jl | 6 +++++- test/dataframe.jl | 29 ++++++++++++++++++++--------- test/rstr.jl | 2 +- 4 files changed, 58 insertions(+), 21 deletions(-) diff --git a/src/convert-data.jl b/src/convert-data.jl index 6676ea19..c437af82 100644 --- a/src/convert-data.jl +++ b/src/convert-data.jl @@ -11,17 +11,23 @@ function rcopy(::Type{NullableArray}, s::Ptr{IntSxp}) isFactor(s) && error("$s is a R factor") NullableArray(rcopy(Array,s), isna(s)) end +function rcopy(::Type{CategoricalArray}, s::Ptr{IntSxp}) + isFactor(s) || error("$s is not a R factor") + refs = UInt32[x for x in s] + levels = rcopy(getattrib(s,Const.LevelsSymbol)) + pool = CategoricalPool(levels, isOrdered(s)) + CategoricalArray(refs, pool) +end function rcopy(::Type{NullableCategoricalArray}, s::Ptr{IntSxp}) isFactor(s) || error("$s is not a R factor") refs = UInt32[isna(x) ? zero(UInt32) : UInt32(x) for x in s] - pool = CategoricalPool(rcopy(getattrib(s,Const.LevelsSymbol))) + levels = rcopy(getattrib(s,Const.LevelsSymbol)) + pool = CategoricalPool(levels, isOrdered(s)) NullableCategoricalArray(refs, pool) end - function rcopy(::Type{DataFrame}, s::Ptr{VecSxp}) isFrame(s) || error("s is not a R data frame") - DataFrame(Any[isFactor(c)? rcopy(NullableCategoricalArray, c) : rcopy(NullableArray, c) for c in s], - rcopy(Array{Symbol},getnames(s))) + DataFrame(Any[rcopy(c) for c in s], rcopy(Array{Symbol},getnames(s))) end @@ -40,12 +46,28 @@ function sexp(v::NullableArray) rv end -## NullableCategoricalArray to sexp conversion. -function sexp{T<:Compat.String,N,R<:Integer}(v::NullableCategoricalArray{T,N,R}) - rv = sexp(v.refs) - setattrib!(rv, Const.LevelsSymbol, sexp(v.pool.levels)) - setattrib!(rv, Const.ClassSymbol, sexp("factor")) - rv +## CategoricalArray to sexp conversion. +for typ in [:NullableCategoricalArray, :CategoricalArray] + @eval begin + function sexp{T<:Compat.String,N,R<:Integer}(v::$typ{T,N,R}) + rv = protect(sexp(v.refs)) + try + for (i,isna) = enumerate(v.refs .== 0) + if isna + rv[i] = naeltype(eltype(rv)) + end + end + setattrib!(rv, Const.LevelsSymbol, sexp(v.pool.index)) + setattrib!(rv, Const.ClassSymbol, sexp(["factor"])) + if v.pool.ordered + rv = rcall(:ordered, rv, v.pool.levels) + end + finally + unprotect(1) + end + rv + end + end end ## DataFrame to sexp conversion. diff --git a/src/convert-default.jl b/src/convert-default.jl index 81037acd..d3cb3812 100644 --- a/src/convert-default.jl +++ b/src/convert-default.jl @@ -44,7 +44,11 @@ function rcopy(s::LglSxpPtr) end function rcopy(s::IntSxpPtr) if isFactor(s) - rcopy(NullableCategoricalArray,s) + if anyna(s) + rcopy(NullableCategoricalArray,s) + else + rcopy(CategoricalArray,s) + end elseif anyna(s) rcopy(NullableArray{Int},s) elseif length(s) == 1 diff --git a/test/dataframe.jl b/test/dataframe.jl index f4a0dd69..fff1a154 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -10,15 +10,20 @@ attenu = rcopy(DataFrame,:attenu) @test size(attenu) == (182,5) dist = attenu[:dist] -@test isa(dist,NullableArray{Float64}) +@test isa(dist,Vector{Float64}) +station = attenu[:station] +@test isa(station,NullableCategoricalArray) -@test rcopy(NullableArray,"c(NA,TRUE)").isnull == NullableArray([Nullable(),true]).isnull -@test rcopy(NullableArray,"c(NA,1)").isnull == NullableArray([Nullable(),1.0]).isnull -@test rcopy(NullableArray,"c(NA,1+0i)").isnull == NullableArray([Nullable(),1.0+0.0*im]).isnull -@test rcopy(NullableArray,"c(NA,1L)").isnull == NullableArray([Nullable(),one(Int32)]).isnull -@test rcopy(NullableArray,"c(NA,'NA')").isnull == NullableArray([Nullable(),"NA"]).isnull +@test rcopy(NullableArray,"c(NA,TRUE)").isnull == NullableArray([true,true], [true,false]).isnull +@test rcopy(NullableArray,"c(NA,1)").isnull == NullableArray([true,1.], [true,false]).isnull +@test rcopy(NullableArray,"c(NA,1+0i)").isnull == NullableArray([true,1.+0*im], [true,false]).isnull +@test rcopy(NullableArray,"c(NA,1L)").isnull == NullableArray([true,one(Int32)], [true,false]).isnull +@test rcopy(NullableArray,"c(NA,'NA')").isnull == NullableArray(["", "NA"], [true,false]).isnull @test_throws ErrorException rcopy(NullableArray,"as.factor(c('a','a','c'))") -@test rcopy(NullableCategoricalArray,"as.factor(c('a','a','c'))").pool.levels == ["a","c"] +@test rcopy(CategoricalArray,"factor(c('a','a','c'))").pool.levels == ["a","c"] +@test rcopy(NullableCategoricalArray,"factor(c('a',NA,'c'))").pool.levels == ["a","c"] +@test rcopy(CategoricalArray,"ordered(c('a','a','c'))").pool.ordered +@test rcopy(NullableCategoricalArray,"ordered(c('a',NA,'c'))").pool.ordered v = NullableArray([true,true], [true,false]) @test rcopy(NullableArray,RObject(v)).isnull == v.isnull @@ -30,7 +35,13 @@ v = NullableArray([0,1.+0*im], [true,false]) @test rcopy(NullableArray,RObject(v)).isnull == v.isnull v = NullableArray(["","abc"], [true,false]) @test rcopy(NullableArray,RObject(v)).isnull == v.isnull -pda = NullableCategoricalArray(repeat(["a", "b"], inner = [5])) -@test rcopy(NullableCategoricalArray,RObject(pda)).refs == repeat([1,2], inner = [5]) +ca = CategoricalArray(repeat(["a", "b"], inner = 5)) +@test rcopy(CategoricalArray,RObject(ca)).refs == ca.refs +nca = NullableCategoricalArray(repeat(["a", "b"], inner = 5), repeat([true, false], outer = 5)) +@test rcopy(NullableCategoricalArray,RObject(nca)).refs == nca.refs +ca = CategoricalArray(repeat(["a", "b"], inner = 5), ordered=true) +@test rcopy(CategoricalArray,RObject(ca)).pool.ordered +nca = NullableCategoricalArray(repeat(["a", "b"], inner = 5), repeat([true, false], outer = 5), ordered=true) +@test rcopy(NullableCategoricalArray,RObject(ca)).pool.ordered @test rcopy(rcall(:dim,RObject(attenu))) == [182,5] diff --git a/test/rstr.jl b/test/rstr.jl index e5ec3f43..4d4ad6d2 100644 --- a/test/rstr.jl +++ b/test/rstr.jl @@ -23,4 +23,4 @@ using RCall iris = rcopy(:iris) model = R"lm(Sepal.Length ~ Sepal.Width,data=$iris)" @test rcopy(RCall.getclass(model)) == "lm" -@test isapprox(rcopy(R"sum($iris$Sepal.Length)"), sum(iris[Symbol("Sepal.Length")]).value, rtol=4*eps()) +@test isapprox(rcopy(R"sum($iris$Sepal.Length)"), sum(iris[Symbol("Sepal.Length")]), rtol=4*eps()) From 7cfcb07ccc0d0d33115020d9de02e6ff20226c32 Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Tue, 27 Sep 2016 00:02:42 -0400 Subject: [PATCH 5/9] handle Nullable objects --- src/convert-base.jl | 9 --------- src/convert-data.jl | 22 ++++++++++++++++++++++ test/dataframe.jl | 5 +++++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/convert-base.jl b/src/convert-base.jl index 311bc812..cbed6a70 100644 --- a/src/convert-base.jl +++ b/src/convert-base.jl @@ -30,15 +30,6 @@ sexp(::Type{Cint},x) = convert(Cint,x) sexp(::Type{Float64},x) = convert(Float64,x) sexp(::Type{Complex128},x) = convert(Complex128,x) -# handle Nullable objects -function sexp{T}(x::Nullable{T}) - if x.isnull - return sexp(natype(T)) - else - return sexp(x.value) - end -end - # NilSxp sexp(::Void) = sexp(Const.NilValue) rcopy(::Ptr{NilSxp}) = nothing diff --git a/src/convert-data.jl b/src/convert-data.jl index c437af82..bf2443b0 100644 --- a/src/convert-data.jl +++ b/src/convert-data.jl @@ -1,5 +1,18 @@ # conversion methods for NullableArrays, CategoricalArrays and DataFrames +function rcopy{T,S<:Sxp}(::Type{Nullable{T}}, s::Ptr{S}) + length(s) == 1 || error("length of $s must be 1.") + rcopy(NullableArray{T}, s)[1] +end + +function rcopy{S<:VectorSxp}(::Type{Nullable}, s::Ptr{S}) + rcopy(Nullable{eltype(S)}, s) +end + +function rcopy{S<:StrSxp}(::Type{Nullable}, s::Ptr{S}) + rcopy(Nullable{Compat.String}, s) +end + function rcopy{T,S<:VectorSxp}(::Type{NullableArray{T}}, s::Ptr{S}) NullableArray(rcopy(Array{T},s), isna(s)) end @@ -31,6 +44,15 @@ function rcopy(::Type{DataFrame}, s::Ptr{VecSxp}) end +# Nullable to sexp conversion. +function sexp{T}(x::Nullable{T}) + if x.isnull + return sexp(natype(T)) + else + return sexp(x.value) + end +end + ## NullableArray to sexp conversion. function sexp(v::NullableArray) rv = protect(sexp(v.values)) diff --git a/test/dataframe.jl b/test/dataframe.jl index fff1a154..d2903245 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1,5 +1,10 @@ using NullableArrays,CategoricalArrays,DataFrames +@test rcopy(Nullable, RObject(1)).value == 1 +@test rcopy(Nullable, RObject("abc")).value == "abc" +@test rcopy(RObject(Nullable(1))) == 1 +@test rcopy(Nullable, RObject(Nullable(1, true))).isnull + v110 = rcopy(NullableArray,reval("c(1L, NA)")) @test isa(v110,NullableVector) @test eltype(v110) == Nullable{Int32} From c98e28d5ef9f8909bf7a5f79bc4577bfbabb056b Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Tue, 27 Sep 2016 11:20:18 -0400 Subject: [PATCH 6/9] improve field access and tests --- src/convert-data.jl | 15 ++++++++------- test/dataframe.jl | 40 ++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/convert-data.jl b/src/convert-data.jl index bf2443b0..61add59d 100644 --- a/src/convert-data.jl +++ b/src/convert-data.jl @@ -46,7 +46,7 @@ end # Nullable to sexp conversion. function sexp{T}(x::Nullable{T}) - if x.isnull + if isnull(x) return sexp(natype(T)) else return sexp(x.value) @@ -57,7 +57,7 @@ end function sexp(v::NullableArray) rv = protect(sexp(v.values)) try - for (i,isna) = enumerate(v.isnull) + for (i,isna) = enumerate(isnull(v)) if isna rv[i] = naeltype(eltype(rv)) end @@ -74,15 +74,16 @@ for typ in [:NullableCategoricalArray, :CategoricalArray] function sexp{T<:Compat.String,N,R<:Integer}(v::$typ{T,N,R}) rv = protect(sexp(v.refs)) try - for (i,isna) = enumerate(v.refs .== 0) - if isna + for (i,ref) = enumerate(v.refs) + if ref == 0 rv[i] = naeltype(eltype(rv)) end end - setattrib!(rv, Const.LevelsSymbol, sexp(v.pool.index)) + # due to a bug of CategoricalArrays, we use index(v.pool) instead of index(v) + setattrib!(rv, Const.LevelsSymbol, sexp(CategoricalArrays.index(v.pool))) setattrib!(rv, Const.ClassSymbol, sexp(["factor"])) - if v.pool.ordered - rv = rcall(:ordered, rv, v.pool.levels) + if CategoricalArrays.ordered(v) + rv = rcall(:ordered, rv, CategoricalArrays.levels(v)) end finally unprotect(1) diff --git a/test/dataframe.jl b/test/dataframe.jl index d2903245..beab301f 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -3,12 +3,12 @@ using NullableArrays,CategoricalArrays,DataFrames @test rcopy(Nullable, RObject(1)).value == 1 @test rcopy(Nullable, RObject("abc")).value == "abc" @test rcopy(RObject(Nullable(1))) == 1 -@test rcopy(Nullable, RObject(Nullable(1, true))).isnull +@test isnull(rcopy(Nullable, RObject(Nullable(1, true)))) v110 = rcopy(NullableArray,reval("c(1L, NA)")) @test isa(v110,NullableVector) @test eltype(v110) == Nullable{Int32} -@test rcopy(NullableArray, RObject(v110[2]))[1].isnull +@test isnull(rcopy(NullableArray, RObject(v110[2]))[1]) attenu = rcopy(DataFrame,:attenu) @test isa(attenu,DataFrame) @@ -19,11 +19,11 @@ dist = attenu[:dist] station = attenu[:station] @test isa(station,NullableCategoricalArray) -@test rcopy(NullableArray,"c(NA,TRUE)").isnull == NullableArray([true,true], [true,false]).isnull -@test rcopy(NullableArray,"c(NA,1)").isnull == NullableArray([true,1.], [true,false]).isnull -@test rcopy(NullableArray,"c(NA,1+0i)").isnull == NullableArray([true,1.+0*im], [true,false]).isnull -@test rcopy(NullableArray,"c(NA,1L)").isnull == NullableArray([true,one(Int32)], [true,false]).isnull -@test rcopy(NullableArray,"c(NA,'NA')").isnull == NullableArray(["", "NA"], [true,false]).isnull +@test isequal(rcopy(NullableArray,"c(NA,TRUE)"), NullableArray([true,true], [true,false])) +@test isequal(rcopy(NullableArray,"c(NA,1)"), NullableArray([true,1.], [true,false])) +@test isequal(rcopy(NullableArray,"c(NA,1+0i)"), NullableArray([true,1.+0*im], [true,false])) +@test isequal(rcopy(NullableArray,"c(NA,1L)"), NullableArray([true,one(Int32)], [true,false])) +@test isequal(rcopy(NullableArray,"c(NA,'NA')"), NullableArray(["", "NA"], [true,false])) @test_throws ErrorException rcopy(NullableArray,"as.factor(c('a','a','c'))") @test rcopy(CategoricalArray,"factor(c('a','a','c'))").pool.levels == ["a","c"] @test rcopy(NullableCategoricalArray,"factor(c('a',NA,'c'))").pool.levels == ["a","c"] @@ -31,22 +31,22 @@ station = attenu[:station] @test rcopy(NullableCategoricalArray,"ordered(c('a',NA,'c'))").pool.ordered v = NullableArray([true,true], [true,false]) -@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +@test isequal(rcopy(NullableArray,RObject(v)), v) v = NullableArray([1,2], [true,false]) -@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +@test isequal(rcopy(NullableArray,RObject(v)), v) v = NullableArray([1.,2.], [true,false]) -@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +@test isequal(rcopy(NullableArray,RObject(v)), v) v = NullableArray([0,1.+0*im], [true,false]) -@test rcopy(NullableArray,RObject(v)).isnull == v.isnull +@test isequal(rcopy(NullableArray,RObject(v)), v) v = NullableArray(["","abc"], [true,false]) -@test rcopy(NullableArray,RObject(v)).isnull == v.isnull -ca = CategoricalArray(repeat(["a", "b"], inner = 5)) -@test rcopy(CategoricalArray,RObject(ca)).refs == ca.refs -nca = NullableCategoricalArray(repeat(["a", "b"], inner = 5), repeat([true, false], outer = 5)) -@test rcopy(NullableCategoricalArray,RObject(nca)).refs == nca.refs -ca = CategoricalArray(repeat(["a", "b"], inner = 5), ordered=true) -@test rcopy(CategoricalArray,RObject(ca)).pool.ordered -nca = NullableCategoricalArray(repeat(["a", "b"], inner = 5), repeat([true, false], outer = 5), ordered=true) -@test rcopy(NullableCategoricalArray,RObject(ca)).pool.ordered +@test isequal(rcopy(NullableArray,RObject(v)), v) +v = CategoricalArray(repeat(["a", "b"], inner = 5)) +@test isequal(rcopy(CategoricalArray,RObject(v)), v) +v = NullableCategoricalArray(repeat(["a", "b"], inner = 5), repeat([true, false], outer = 5)) +@test isequal(rcopy(NullableCategoricalArray,RObject(v)), v) +v = CategoricalArray(repeat(["a", "b"], inner = 5), ordered=true) +@test isequal(rcopy(CategoricalArray,RObject(v)), v) +v = NullableCategoricalArray(repeat(["a", "b"], inner = 5), repeat([true, false], outer = 5), ordered=true) +@test isequal(rcopy(NullableCategoricalArray,RObject(v)), v) @test rcopy(rcall(:dim,RObject(attenu))) == [182,5] From c5eec816049efd6d2d8f6ac05a3f79b9d96bbad8 Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Tue, 27 Sep 2016 11:28:28 -0400 Subject: [PATCH 7/9] more test improvement --- test/dataframe.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/dataframe.jl b/test/dataframe.jl index beab301f..7e9c6bda 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -25,10 +25,10 @@ station = attenu[:station] @test isequal(rcopy(NullableArray,"c(NA,1L)"), NullableArray([true,one(Int32)], [true,false])) @test isequal(rcopy(NullableArray,"c(NA,'NA')"), NullableArray(["", "NA"], [true,false])) @test_throws ErrorException rcopy(NullableArray,"as.factor(c('a','a','c'))") -@test rcopy(CategoricalArray,"factor(c('a','a','c'))").pool.levels == ["a","c"] -@test rcopy(NullableCategoricalArray,"factor(c('a',NA,'c'))").pool.levels == ["a","c"] -@test rcopy(CategoricalArray,"ordered(c('a','a','c'))").pool.ordered -@test rcopy(NullableCategoricalArray,"ordered(c('a',NA,'c'))").pool.ordered +@test CategoricalArrays.levels(rcopy(CategoricalArray,"factor(c('a','a','c'))")) == ["a","c"] +@test CategoricalArrays.levels(rcopy(NullableCategoricalArray,"factor(c('a',NA,'c'))")) == ["a","c"] +@test CategoricalArrays.ordered(rcopy(CategoricalArray,"ordered(c('a','a','c'))")) +@test CategoricalArrays.ordered(rcopy(NullableCategoricalArray,"ordered(c('a',NA,'c'))")) v = NullableArray([true,true], [true,false]) @test isequal(rcopy(NullableArray,RObject(v)), v) From 2f762d7726632ddf68a22f216f4d2474533832ff Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Tue, 27 Sep 2016 13:55:15 -0400 Subject: [PATCH 8/9] scalar is okay --- src/convert-data.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/convert-data.jl b/src/convert-data.jl index 61add59d..9cac9b8d 100644 --- a/src/convert-data.jl +++ b/src/convert-data.jl @@ -81,7 +81,7 @@ for typ in [:NullableCategoricalArray, :CategoricalArray] end # due to a bug of CategoricalArrays, we use index(v.pool) instead of index(v) setattrib!(rv, Const.LevelsSymbol, sexp(CategoricalArrays.index(v.pool))) - setattrib!(rv, Const.ClassSymbol, sexp(["factor"])) + setattrib!(rv, Const.ClassSymbol, sexp("factor")) if CategoricalArrays.ordered(v) rv = rcall(:ordered, rv, CategoricalArrays.levels(v)) end From 371f8cf2c6e60425c9eb69b7a65b80edabfcec05 Mon Sep 17 00:00:00 2001 From: Randy Lai Date: Tue, 27 Sep 2016 15:14:48 -0400 Subject: [PATCH 9/9] improve tests for Nullable --- test/dataframe.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/dataframe.jl b/test/dataframe.jl index 7e9c6bda..b4a48e60 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1,7 +1,7 @@ using NullableArrays,CategoricalArrays,DataFrames -@test rcopy(Nullable, RObject(1)).value == 1 -@test rcopy(Nullable, RObject("abc")).value == "abc" +@test isequal(rcopy(Nullable, RObject(1)), Nullable(1)) +@test isequal(rcopy(Nullable, RObject("abc")), Nullable("abc")) @test rcopy(RObject(Nullable(1))) == 1 @test isnull(rcopy(Nullable, RObject(Nullable(1, true))))