Skip to content

Commit

Permalink
Merge pull request #79 from JuliaStats/nl/dataframes0.11
Browse files Browse the repository at this point in the history
DataFrames 0.11 support
  • Loading branch information
tshort committed Nov 29, 2017
2 parents 024f498 + b404f19 commit 4f727e1
Show file tree
Hide file tree
Showing 12 changed files with 62 additions and 98 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: julia
julia:
- 0.5
- 0.6
- nightly
os:
Expand Down
11 changes: 3 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# DataFramesMeta.jl

[![DataFramesMeta](http://pkg.julialang.org/badges/DataFramesMeta_0.5.svg)](http://pkg.julialang.org/?pkg=DataFramesMeta?pkg=DataFramesMeta&ver=0.5)
[![DataFramesMeta](http://pkg.julialang.org/badges/DataFramesMeta_0.6.svg)](http://pkg.julialang.org/?pkg=DataFramesMeta?pkg=DataFramesMeta&ver=0.6)
[![Coveralls](https://coveralls.io/repos/github/JuliaStats/DataFramesMeta.jl/badge.svg?branch=master)](https://coveralls.io/github/JuliaStats/DataFramesMeta.jl?branch=master)
[![Travis](https://travis-ci.org/JuliaStats/DataFramesMeta.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/DataFramesMeta.jl)
Expand Down Expand Up @@ -290,19 +289,15 @@ To create a CompositeDataFrame, use `CompositeDataFrame`:

```julia
n = 10
d = CompositeDataFrame(a = 1:n, b = rand(10), c = DataArray(rand(1:3, n)))
d = CompositeDataFrame(a = 1:n, b = rand(10), c = rand(1:3, n))
```

Note that `CompositeDataFrame()` does not coerce to `DataArrays`. Ranges and other
`AbstractVectors` are left as is, so convert to `DataArray` or `NullableArray` as
appropriate.

You can also name the type of the `CompositeDataFrame` by including that as the
first symbol:

```julia
n = 10
d = CompositeDataFrame(:MyDF, a = 1:n, b = rand(n), c = DataArray(rand(1:3, n)))
d = CompositeDataFrame(:MyDF, a = 1:n, b = rand(n), c = rand(1:3, n))
```

You can also define a `CompositeDataFrame` manually as follows. If you do this,
Expand All @@ -315,7 +310,7 @@ immutable MyDF <: AbstractCompositeDataFrame
c::DataVector{Float64}
end

MyDF(n::Integer) = MyDF(zeros(Int, n), zeros(n), DataArray(zeros(n)))
MyDF(n::Integer) = MyDF(zeros(Int, n), zeros(n), zeros(n))
d = MyDF(10)
```

Expand Down
5 changes: 2 additions & 3 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
julia 0.5
DataFrames
Compat 0.17
julia 0.6
DataFrames 0.11
2 changes: 0 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
environment:
matrix:
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe"
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe"
- JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe"
Expand Down
19 changes: 0 additions & 19 deletions src/DataFramesMeta.jl
Original file line number Diff line number Diff line change
Expand Up @@ -746,23 +746,4 @@ Base.getindex(gd::GroupedDataFrame, I::AbstractArray{Int}) = GroupedDataFrame(gd
gd.starts[I],
gd.ends[I])


##############################################################################
##
## Extras for easier handling of Arrays
##
##############################################################################

export P, PassThrough

type PassThrough{T} <: AbstractVector{T}
x::AbstractVector{T}
end
const P = PassThrough

Base.size(x::PassThrough) = size(x.x)
Base.getindex(x::PassThrough, i) = getindex(x.x, i)

DataFrames.upgrade_vector(v::PassThrough) = v.x

end # module
12 changes: 5 additions & 7 deletions src/compositedataframe.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using Compat

export AbstractCompositeDataFrame, AbstractCompositeDataFrameRow,
CompositeDataFrame, row

Expand All @@ -9,9 +7,9 @@ export AbstractCompositeDataFrame, AbstractCompositeDataFrameRow,
An abstract type that is an `AbstractDataFrame`. Each type that inherits from
this is expected to be a type-stable data frame.
"""
@compat abstract type AbstractCompositeDataFrame <: AbstractDataFrame end
abstract type AbstractCompositeDataFrame <: AbstractDataFrame end

@compat abstract type AbstractCompositeDataFrameRow end
abstract type AbstractCompositeDataFrameRow end


"""
Expand Down Expand Up @@ -76,9 +74,9 @@ julia> df = CompositeDataFrame(:MyDF, x = 1:3, y = [2, 1, 2]);
"""
function CompositeDataFrame(columns::Vector{Any},
cnames::Vector{Symbol} = gennames(length(columns)),
typename::Symbol = @compat(Symbol("CompositeDF", gensym()));
typename::Symbol = Symbol("CompositeDF", gensym());
inmodule = DataFramesMeta)
rowtypename = @compat Symbol(typename, "Row")
rowtypename = Symbol(typename, "Row")
# TODO: length checks
type_definition = :(type $typename <: AbstractCompositeDataFrame end)
type_definition.args[3].args = Any[:($(cnames[i]) :: $(typeof(columns[i]))) for i in 1:length(columns)]
Expand Down Expand Up @@ -117,7 +115,7 @@ DataFrames.DataFrame(cdf::AbstractCompositeDataFrame) = DataFrame(DataFrames.col
## basic stuff
#########################################

Base.names{T <: AbstractCompositeDataFrame}(cdf::T) = @compat fieldnames(T)
Base.names{T <: AbstractCompositeDataFrame}(cdf::T) = fieldnames(T)

DataFrames.ncol(cdf::AbstractCompositeDataFrame) = length(names(cdf))
DataFrames.nrow(cdf::AbstractCompositeDataFrame) = ncol(cdf) > 0 ? length(getfield(cdf, 1))::Int : 0
Expand Down
1 change: 1 addition & 0 deletions test/REQUIRE
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
Lazy
DataArrays 0.7.0
5 changes: 0 additions & 5 deletions test/compositedataframes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,4 @@ df3 = CompositeDataFrame(:DF3, C = [1, 2, 3], D = [2, 1, 2])
@test names(df[[1, 2]]) == [:A, :B]
@test names(df[1:1]) == [:A]

p = PassThrough([1, 2, 3])
@test size(p) == (3,)
@test getindex(p, 1) == 1
@test DataFrames.upgrade_vector(p) == [1, 2, 3]

end # module
94 changes: 47 additions & 47 deletions test/data.table.timings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,65 +2,65 @@
# https://github.com/Rdatatable/data.table/wiki/Benchmarks-:-Grouping

using DataFrames, DataFramesMeta
using NullableArrays
using CategoricalArrays, DataArrays

N=10_000_000; K=100
N=10_0000; K=100
srand(1)

# Array version

DA = DataFrame(
id1 = P(rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id2 = P(rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id3 = P(rand([@compat(Symbol("id", i)) for i=1:N÷K], N)), # small groups (char)
id4 = P(rand(1:K, N)), # large groups (int)
id5 = P(rand(1:K, N)), # large groups (int)
id6 = P(rand(1:N÷K, N)), # small groups (int)
v1 = P(rand(1:5, N)), # int in range [1,5]
v2 = P(rand(1:5, N)), # int in range [1,5]
v3 = P(rand(N)) # numeric e.g. 23.5749
id1 = rand([Symbol("id", i) for i=1:K], N), # large groups (char)
id2 = rand([Symbol("id", i) for i=1:K], N), # large groups (char)
id3 = rand([Symbol("id", i) for i=1:N÷K], N), # small groups (char)
id4 = rand(1:K, N), # large groups (int)
id5 = rand(1:K, N), # large groups (int)
id6 = rand(1:N÷K, N), # small groups (int)
v1 = rand(1:5, N), # int in range [1,5]
v2 = rand(1:5, N), # int in range [1,5]
v3 = rand(N) # numeric e.g. 23.5749
);

# PooledDataArray version
# CategoricalArray version

DPDA = DataFrame(
id1 = PooledDataArray(rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id2 = PooledDataArray(rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id3 = PooledDataArray(rand([@compat(Symbol("id", i)) for i=1:N÷K], N)), # small groups (char)
id4 = PooledDataArray(rand(1:K, N)), # large groups (int)
id5 = PooledDataArray(rand(1:K, N)), # large groups (int)
id6 = PooledDataArray(rand(1:N÷K, N)), # small groups (int)
v1 = P(rand(1:5, N)), # int in range [1,5]
v2 = P(rand(1:5, N)), # int in range [1,5]
v3 = P(rand(N)) # numeric e.g. 23.5749
DCA = DataFrame(
id1 = CategoricalArray(rand([Symbol("id", i) for i=1:K], N)), # large groups (char)
id2 = CategoricalArray(rand([Symbol("id", i) for i=1:K], N)), # large groups (char)
id3 = CategoricalArray(rand([Symbol("id", i) for i=1:N÷K], N)), # small groups (char)
id4 = CategoricalArray(rand(1:K, N)), # large groups (int)
id5 = CategoricalArray(rand(1:K, N)), # large groups (int)
id6 = CategoricalArray(rand(1:N÷K, N)), # small groups (int)
v1 = rand(1:5, N), # int in range [1,5]
v2 = rand(1:5, N), # int in range [1,5]
v3 = rand(N) # numeric e.g. 23.5749
);

# DataArray version
# Array{Union{T, Missing}} version

DDA = DataFrame(
id1 = (rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id2 = (rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id3 = (rand([@compat(Symbol("id", i)) for i=1:N÷K], N)), # small groups (char)
id4 = (rand(1:K, N)), # large groups (int)
id5 = (rand(1:K, N)), # large groups (int)
id6 = (rand(1:N÷K, N)), # small groups (int)
v1 = (rand(1:5, N)), # int in range [1,5]
v2 = (rand(1:5, N)), # int in range [1,5]
v3 = (rand(N)) # numeric e.g. 23.5749
DMA = DataFrame(
id1 = Array{Union{Symbol, Missing}}(rand([Symbol("id", i) for i=1:K], N)), # large groups (char)
id2 = Array{Union{Symbol, Missing}}(rand([Symbol("id", i) for i=1:K], N)), # large groups (char)
id3 = Array{Union{Symbol, Missing}}(rand([Symbol("id", i) for i=1:N÷K], N)), # small groups (char)
id4 = Array{Union{Int, Missing}}(rand(1:K, N)), # large groups (int)
id5 = Array{Union{Int, Missing}}(rand(1:K, N)), # large groups (int)
id6 = Array{Union{Int, Missing}}(rand(1:N÷K, N)), # small groups (int)
v1 = Array{Union{Int, Missing}}(rand(1:5, N)), # int in range [1,5]
v2 = Array{Union{Int, Missing}}(rand(1:5, N)), # int in range [1,5]
v3 = Array{Union{Float64, Missing}}(rand(N)) # numeric e.g. 23.5749
);

# NullableArray version
# DataArray version

DNA = DataFrame(
id1 = P(NullableArray(rand([@compat(Symbol("id", i)) for i=1:K], N))), # large groups (char)
id2 = P(NullableArray(rand([@compat(Symbol("id", i)) for i=1:K], N))), # large groups (char)
id3 = P(NullableArray(rand([@compat(Symbol("id", i)) for i=1:N÷K], N))), # small groups (char)
id4 = P(NullableArray(rand(1:K, N))), # large groups (int)
id5 = P(NullableArray(rand(1:K, N))), # large groups (int)
id6 = P(NullableArray(rand(1:N÷K, N))), # small groups (int)
v1 = P(NullableArray(rand(1:5, N))), # int in range [1,5]
v2 = P(NullableArray(rand(1:5, N))), # int in range [1,5]
v3 = P(NullableArray(rand(N))) # numeric e.g. 23.5749
DDA = DataFrame(
id1 = DataArray(rand([Symbol("id", i) for i=1:K], N)), # large groups (char)
id2 = DataArray(rand([Symbol("id", i) for i=1:K], N)), # large groups (char)
id3 = DataArray(rand([Symbol("id", i) for i=1:N÷K], N)), # small groups (char)
id4 = DataArray(rand(1:K, N)), # large groups (int)
id5 = DataArray(rand(1:K, N)), # large groups (int)
id6 = DataArray(rand(1:N÷K, N)), # small groups (int)
v1 = DataArray(rand(1:5, N)), # int in range [1,5]
v2 = DataArray(rand(1:5, N)), # int in range [1,5]
v3 = DataArray(rand(N)) # numeric e.g. 23.5749
);


Expand All @@ -79,8 +79,8 @@ function dt_timings(D)
end

dt_timings(DA)
dt_timings(DPDA)
dt_timings(DNA)
dt_timings(DCA)
dt_timings(DMA)
dt_timings(DDA)

@profile @by(D, :id1, sv =sum(:v1));
@profile @by(DA, :id1, sv =sum(:v1));
1 change: 0 additions & 1 deletion test/dataframes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ module TestDataFrames
using Base.Test
using DataArrays, DataFrames
using DataFramesMeta
using Compat

df = DataFrame(A = 1:3, B = [2, 1, 2])

Expand Down
5 changes: 2 additions & 3 deletions test/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@ module TestDicts

using Base.Test
using DataFramesMeta
using Compat

y = 3
@compat d = Dict(:s => 3, :y => 44, :d => 5, :e => :(a + b))
d = Dict(:s => 3, :y => 44, :d => 5, :e => :(a + b))
@test @with(d, :s + :y) == d[:s] + d[:y]
@test @with(d, :s + y) == d[:s] + y
@test @with(d, d) == d
@test @with(d, :s + d[^(:y)]) == d[:s] + d[:y]
@test @with(d, :e.head) == d[:e].head
@test @compat @with(Dict(:s => 3, :y => 44, :d => 5, :e => :(a + b)), :e.head) == d[:e].head
@test @with(Dict(:s => 3, :y => 44, :d => 5, :e => :(a + b)), :e.head) == d[:e].head

x = @with d begin
z = y + :y - 1
Expand Down
4 changes: 2 additions & 2 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using DataArrays, DataFrames
using DataFramesMeta

d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 3, 1, 1, 2])
g = groupby(d, :x)
g = groupby(d, :x, sort=true)

@test @where(d, :x .== 3) == where(d, x -> x[:x] .== 3)
@test DataFrame(@where(g, length(:x) > 5)) == DataFrame(where(g, x -> length(x[:x]) > 5))
Expand All @@ -18,7 +18,7 @@ g = groupby(d, :x)
@test (@transform(g, y = :n - median(:n)))[1,:y] == -5.0

d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 3, 1, 1, 2])
g = groupby(d, :x)
g = groupby(d, :x, sort=true)
@test @based_on(g, nsum = sum(:n))[:nsum] == [99, 84, 27]

end # module

0 comments on commit 4f727e1

Please sign in to comment.