Skip to content

Commit

Permalink
Merge 6882872 into 1d40335
Browse files Browse the repository at this point in the history
  • Loading branch information
alyst committed Oct 1, 2016
2 parents 1d40335 + 6882872 commit 23aa933
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 49 deletions.
7 changes: 3 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@ os:
- linux
- osx
julia:
- 0.4
- 0.5
- nightly
notifications:
email: false
# uncomment the following lines to override the default test script
#script:
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.build("RData"); Pkg.test("RData"; coverage=true)'
script:
- if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
- julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.build("RData"); Pkg.checkout("DataFrames", "master"); Pkg.test("RData"; coverage=true)'
after_success:
- julia -e 'cd(Pkg.dir("RData")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
11 changes: 11 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## RData v0.1.0 Release Notes

Updated to DataFrames 0.9, dropped Julia v0.4 support

##### Changes
* using NullableArrays.jl and CategoricalArrays.jl
instead of DataArrays.jl ([#19], see [JuliaStats/DataFrames.jl#1008])
* Julia v0.4 not supported (DataFrames.jl v0.9 requirements)

## RData v0.0.4 Release Notes

Now the recommended way to load `.RData`/`.rda` files is by `FileIO.load()`.
Expand All @@ -15,5 +24,7 @@ Initial release based on `DataFrames.read_rda()` ([JuliaStats/DataFrames.jl#1031
[#9]: https://github.com/JuliaStats/RData.jl/issues/9
[#10]: https://github.com/JuliaStats/RData.jl/issues/10
[#15]: https://github.com/JuliaStats/RData.jl/issues/15
[#19]: https://github.com/JuliaStats/RData.jl/issues/19

[JuliaStats/DataFrames.jl#1008]: https://github.com/JuliaStats/DataFrames.jl/pull/1008
[JuliaStats/DataFrames.jl#1031]: https://github.com/JuliaStats/DataFrames.jl/pull/1031
5 changes: 2 additions & 3 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
julia 0.4
DataFrames 0.7
DataArrays 0.3
julia 0.5
DataFrames 0.8+
FileIO 0.1.2
GZip 0.2
Compat 0.8
4 changes: 1 addition & 3 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
environment:
matrix:
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
- JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
Expand Down Expand Up @@ -35,7 +33,7 @@ build_script:
# Need to convert from shallow to complete for Pkg.clone to work
- IF EXIST .git\shallow (git fetch --unshallow)
- C:\projects\julia\bin\julia -F -e "versioninfo();
Pkg.clone(pwd(), \"RData\"); Pkg.build(\"RData\")"
Pkg.clone(pwd(), \"RData\"); Pkg.build(\"RData\"); Pkg.checkout(\"DataFrames\", \"master\")";

test_script:
- C:\projects\julia\bin\julia -e "Pkg.test(\"RData\")"
1 change: 0 additions & 1 deletion src/RData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ __precompile__()
module RData

using Compat, DataFrames, GZip, FileIO
import DataArrays: data
import DataFrames: identifier
import Compat: UTF8String, unsafe_string
import FileIO: load
Expand Down
74 changes: 45 additions & 29 deletions src/convert.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,53 @@ end

##############################################################################
##
## Conversion of intermediate R objects into DataArray and DataFrame objects
## Conversion of intermediate R objects into NullableArray and DataFrame objects
##
##############################################################################

namask(rl::RLogicalVector) = BitArray(rl.data .== R_NA_INT32)
namask(ri::RIntegerVector) = BitArray(ri.data .== R_NA_INT32)
namask(rn::RNumericVector) = BitArray(map(isna_float64, reinterpret(UInt64, rn.data)))
namask(ri::RVector{Int32}) = [i == R_NA_INT32 for i in ri.data]
namask(rn::RNumericVector) = map(isna_float64, reinterpret(UInt64, rn.data))
# if re or im is NA, the whole complex number is NA
# FIXME avoid temporary Vector{Bool}
namask(rc::RComplexVector) = BitArray([isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)])
namask(rc::RComplexVector) = [isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)]
namask(rv::RNullableVector) = rv.na

DataArrays.data(rv::RVEC) = DataArray(rv.data, namask(rv))
function _julia_vector{T}(::Type{T}, rv::RVEC, force_nullable::Bool)
na_mask = namask(rv)
(force_nullable || any(na_mask)) ? NullableArray(convert(Vector{T}, rv.data), na_mask) : rv.data
end

# convert R vector into either NullableArray
# or Array if force_nullable=false and there are no NAs
julia_vector(rv::RVEC, force_nullable::Bool) = _julia_vector(eltype(rv.data), rv, force_nullable)

function julia_vector(rl::RLogicalVector, force_nullable::Bool)
v = Bool[flag != 0 for flag in rl.data]
na_mask = namask(rl)
(force_nullable || any(na_mask)) ? NullableArray(v, na_mask) : v
end

# converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0
na2zero{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for x in v]

# convert to [Nullable]CategoricalArray{String} if `ri`is a factor,
# or to [Nullable]Array{Int32} otherwise
function julia_vector(ri::RIntegerVector, force_nullable::Bool)
!isfactor(ri) && return _julia_vector(eltype(ri.data), ri, force_nullable) # not a factor

function DataArrays.data(ri::RIntegerVector)
if !isfactor(ri) return DataArray(ri.data, namask(ri)) end
# convert factor into PooledDataArray
pool = getattr(ri, "levels", emptystrvec)
sz = length(pool)
# convert factor into [Nullable]CategoricalArray
rlevels = getattr(ri, "levels", emptystrvec)
sz = length(rlevels)
REFTYPE = sz <= typemax(UInt8) ? UInt8 :
sz <= typemax(UInt16) ? UInt16 :
sz <= typemax(UInt32) ? UInt32 :
UInt64
dd = ri.data
dd[namask(ri)] = 0
refs = convert(Vector{REFTYPE}, dd)
return PooledDataArray(DataArrays.RefArray(refs), pool)
# FIXME set ordered flag
refs = na2zero(REFTYPE, ri.data)
pool = CategoricalPool{String, REFTYPE}(rlevels)
(force_nullable || (findfirst(refs, zero(REFTYPE)) > 0)) ?
NullableCategoricalArray{String, 1, REFTYPE}(refs, pool) :
CategoricalArray{String, 1, REFTYPE}(refs, pool)
end

function sexp2julia(rex::RSEXPREC)
Expand All @@ -46,36 +66,32 @@ function sexp2julia(rex::RSEXPREC)
end

function sexp2julia(rv::RVEC)
# FIXME dimnames
# FIXME forceDataArrays option to always convert to DataArray
nas = namask(rv)
hasna = any(nas)
# TODO dimnames?
# FIXME forceNullable option to always convert to NullableArray
jv = julia_vector(rv, false)
if hasnames(rv)
# if data has no NA, convert to simple Vector
return DictoVec(hasna ? DataArray(rv.data, nas) : rv.data, names(rv))
return DictoVec(jv, names(rv))
else
hasdims = hasdim(rv)
if !hasdims && length(rv.data)==1
# scalar
# FIXME handle NAs
# if hasna
return rv.data[1]
return jv[1]
elseif !hasdims
# vectors
return hasna ? DataArray(rv.data, nas) : rv.data
return jv
else
# matrices and so on
dims = tuple(convert(Vector{Int64}, getattr(rv, "dim"))...)
return hasna ? DataArray(reshape(rv.data, dims), reshape(nas, dims)) :
reshape(rv.data, dims)
dims = tuple(convert(Vector{Int}, getattr(rv, "dim"))...)
return reshape(jv, dims)
end
end
end

function sexp2julia(rl::RList)
if isdataframe(rl)
# FIXME remove Any type assertion workaround
DataFrame(Any[data(col) for col in rl.data], map(identifier, names(rl)))
# FIXME forceNullable option to always convert to NullableArray
DataFrame(Any[julia_vector(col, true) for col in rl.data], map(identifier, names(rl)))
elseif hasnames(rl)
DictoVec(Any[sexp2julia(item) for item in rl.data], names(rl))
else
Expand Down
2 changes: 1 addition & 1 deletion src/sxtypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ typealias RComplexVector RVector{Complex128, CPLXSXP}
"""
immutable RNullableVector{T, S} <: RVEC{T, S}
data::Vector{T}
na::BitVector # mask of NA elements
na::Vector{Bool} # mask of NA elements
attr::Hash # collection of R object attributes
end

Expand Down
20 changes: 12 additions & 8 deletions test/RDA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,23 @@ module TestRDA
df[:int] = Int32[1, 2]
df[:logi] = [true, false]
df[:chr] = ["ab", "c"]
df[:factor] = pool(df[:chr])
df[:factor] = categorical(df[:chr], true)
df[:cplx] = Complex128[1.1+0.5im, 1.0im]
@test isequal(sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]), df)
@test isequal(sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]), df)

df[2, :] = NA
rdf = sexp2julia(load("$testdir/data/types.rda",convert=false)["df"])
@test eltypes(rdf) == eltypes(df)
@test isequal(rdf, df)
rdf_ascii = sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"])
@test eltypes(rdf_ascii) == eltypes(df)
@test isequal(rdf_ascii, df)

df[2, :] = Nullable()
append!(df, df[2, :])
df[3, :num] = NaN
df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN]
df[:, :cplx] = NullableArray([Nullable(), Complex128(1,NaN), NaN])
@test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df)
# ASCII format saves NaN as NA
df[3, :num] = NA
df[:, :cplx] = @data [NA, NA, NA]
df[3, :num] = Nullable()
df[:, :cplx] = NullableArray{Complex128}(3)
@test isequal(sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]), df)

rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"]))
Expand Down

0 comments on commit 23aa933

Please sign in to comment.