Skip to content

Commit

Permalink
Merge pull request #6 from JuliaStats/ast/fileio_integration
Browse files Browse the repository at this point in the history
Replace read_rda() by FileIO integration
  • Loading branch information
alyst committed Aug 19, 2016
2 parents b9de124 + a70946d commit 2ff1091
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 27 deletions.
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,27 @@
[![Build Status](https://travis-ci.org/JuliaStats/RData.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/RData.jl)
[![Build status](https://ci.appveyor.com/api/projects/status/github/JuliaStats/RData.jl?svg=true&branch=master)](https://ci.appveyor.com/project/alyst/rdata-jl/branch/master)

Read R data files (.rda, .RData) to native Julia objects.
Read R data files (.rda, .RData) and optionally convert the contents into Julia equivalents.

Can read any R data archive, although not all R types could be converted into Julia.

Usage
-----

To read R objects from "example.rda" file:
```julia
using RData

objs = load("path_to/example.rda")
```

The result is a dictionary of all R objects that are stored in "example.rda".

If `convert=true` keyword option is specified, `load()` will try to automatically
convert R objects into Julia equivalents:
* data frames into `DataFrames.DataFrame`
* named vectors into `DictoVec` objects that allow indexing both by element indices and by names
* ...

If the conversion to Julia type is not supported (e.g. R closure or language expression),
the internal RData representation of the object will be provided.
1 change: 1 addition & 0 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
julia 0.4
DataFrames
DataArrays
FileIO
GZip
Compat
43 changes: 31 additions & 12 deletions src/RData.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
module RData

using Compat, DataFrames, GZip
using Compat, DataFrames, GZip, FileIO
import DataArrays: data
import DataFrames: identifier
import Compat: UTF8String, unsafe_string
import FileIO: load

export
# read_rda,
sexp2julia,
DictoVec
DictoVec,
load # export FileIO.load()

include("config.jl")
include("sxtypes.jl")
Expand Down Expand Up @@ -37,11 +38,32 @@ include("convert.jl")
include("context.jl")
include("readers.jl")

function read_rda(io::IO, kwoptions::Vector{Any})
header = chomp(readline(io))
@assert header[1] == 'R' # readable header (or RDX2)
@assert header[2] == 'D'
@assert header[4] == '2'
##############################################################################
##
## FileIO integration
##
##############################################################################

# test for RD?2 magic sequence at the beginning of R data input stream
function detect_rdata(io)
read(io, UInt8) == UInt8('R') &&
read(io, UInt8) == UInt8('D') &&
(fmt = read(io, UInt8); fmt == UInt8('A') || fmt == UInt8('B') || fmt == UInt8('X')) &&
read(io, UInt8) == UInt8('2') &&
read(io, UInt8) == 0x0A
end

add_format(format"RData", detect_rdata, [".rdata", ".rda"], [:RData])

function load(f::File{format"RData"}; kwoptions...)
gzopen(filename(f)) do s
load(Stream(f, s), kwoptions)
end
end

function load(s::Stream{format"RData"}, kwoptions::Vector{Any})
io = stream(s)
@assert detect_rdata(io)
ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions)
@assert ctx.fmtver == 2 # format version
# println("Written by R version $(ctx.Rver)")
Expand Down Expand Up @@ -69,9 +91,6 @@ function read_rda(io::IO, kwoptions::Vector{Any})
return res
end

read_rda(io::IO; kwoptions...) = read_rda(io, kwoptions)

read_rda(fnm::AbstractString; kwoptions...) = gzopen(fnm) do io read_rda(io, kwoptions) end

load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions)

end # module
28 changes: 14 additions & 14 deletions test/RDA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ module TestRDA
# df['chr'] = c('ab', 'c')
# df['factor'] = factor(df$chr)
# df['cplx'] = complex( real=c(1.1,0.0), imaginary=c(0.5,1.0) )
# #utf=c('Ж', '∰')) R handles it, RData.read_rda doesn't.
# #utf=c('Ж', '∰')) R handles it, RData doesn't.
# save(df, file='types.rda')
# save(df, file='types_ascii.rda', ascii=TRUE)

Expand Down Expand Up @@ -57,39 +57,39 @@ module TestRDA
testdir = dirname(@__FILE__)

df = DataFrame(num = [1.1, 2.2])
@test isequal(sexp2julia(RData.read_rda("$testdir/data/minimal.rda",convert=false)["df"]), df)
@test isequal(RData.read_rda("$testdir/data/minimal.rda",convert=true)["df"], df)
@test isequal(open(RData.read_rda,"$testdir/data/minimal_ascii.rda")["df"], df)
@test isequal(sexp2julia(load("$testdir/data/minimal.rda",convert=false)["df"]), df)
@test isequal(load("$testdir/data/minimal.rda",convert=true)["df"], df)
@test isequal(load("$testdir/data/minimal_ascii.rda")["df"], df)

df[:int] = Int32[1, 2]
df[:logi] = [true, false]
df[:chr] = ["ab", "c"]
df[:factor] = pool(df[:chr])
df[:cplx] = Complex128[1.1+0.5im, 1.0im]
@test isequal(sexp2julia(RData.read_rda("$testdir/data/types.rda",convert=false)["df"]), df)
@test isequal(sexp2julia(RData.read_rda("$testdir/data/types_ascii.rda",convert=false)["df"]), df)
@test isequal(sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]), df)
@test isequal(sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]), df)

df[2, :] = NA
append!(df, df[2, :])
df[3, :num] = NaN
df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN]
@test isequal(sexp2julia(RData.read_rda("$testdir/data/NAs.rda",convert=false)["df"]), df)
@test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df)
# ASCII format saves NaN as NA
df[3, :num] = NA
df[:, :cplx] = @data [NA, NA, NA]
@test isequal(sexp2julia(RData.read_rda("$testdir/data/NAs_ascii.rda",convert=false)["df"]), df)
@test isequal(sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]), df)

rda_names = names(sexp2julia(RData.read_rda("$testdir/data/names.rda",convert=false)["df"]))
rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"]))
expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1]
@test rda_names == expected_names
rda_names = names(sexp2julia(RData.read_rda("$testdir/data/names_ascii.rda",convert=false)["df"]))
rda_names = names(sexp2julia(load("$testdir/data/names_ascii.rda",convert=false)["df"]))
@test rda_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1]

rda_envs = RData.read_rda("$testdir/data/envs.rda",convert=false)
rda_envs = load("$testdir/data/envs.rda",convert=false)

rda_pairlists = RData.read_rda("$testdir/data/pairlists.rda",convert=false)
rda_pairlists = load("$testdir/data/pairlists.rda",convert=false)

rda_closures = RData.read_rda("$testdir/data/closures.rda",convert=false)
rda_closures = load("$testdir/data/closures.rda",convert=false)

rda_cmpfuns = RData.read_rda("$testdir/data/cmpfun.rda",convert=false)
rda_cmpfuns = load("$testdir/data/cmpfun.rda",convert=false)
end

0 comments on commit 2ff1091

Please sign in to comment.