Skip to content

Commit

Permalink
Merge dd767d3 into 13f1ae0
Browse files Browse the repository at this point in the history
  • Loading branch information
ararslan committed Jul 28, 2016
2 parents 13f1ae0 + dd767d3 commit f15561d
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 44 deletions.
7 changes: 4 additions & 3 deletions .travis.yml
@@ -1,12 +1,13 @@
language: julia
julia:
- 0.4
- nightly
os:
- linux
notifications:
email: false
script:
- if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
- julia -e 'Pkg.clone(pwd()); Pkg.build("DataFramesMeta"); Pkg.test("DataFramesMeta", coverage=true)'
# script:
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia -e 'Pkg.clone(pwd()); Pkg.build("DataFramesMeta"); Pkg.test("DataFramesMeta", coverage=true)'
after_success:
- julia -e 'cd(Pkg.dir("DataFramesMeta")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
3 changes: 2 additions & 1 deletion README.md
@@ -1,7 +1,8 @@
# DataFramesMeta.jl

[![DataFramesMeta](http://pkg.julialang.org/badges/DataFramesMeta_0.4.svg)](http://pkg.julialang.org/?pkg=DataFramesMeta&ver=0.4)
[![Coverage Status](http://img.shields.io/coveralls/JuliaStats/DataFramesMeta.jl.svg)](https://coveralls.io/r/JuliaStats/DataFramesMeta.jl)
[![DataFramesMeta](http://pkg.julialang.org/badges/DataFramesMeta_0.5.svg)](http://pkg.julialang.org/?pkg=DataFramesMeta)
[![Coverage Status](https://coveralls.io/repos/github/JuliaStats/DataFramesMeta.jl/badge.svg?branch=master)](https://coveralls.io/github/JuliaStats/DataFramesMeta.jl?branch=master)
[![Build Status](https://travis-ci.org/JuliaStats/DataFramesMeta.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/DataFramesMeta.jl)

Metaprogramming tools for DataFrames and Associative objects.
Expand Down
42 changes: 42 additions & 0 deletions appveyor.yml
@@ -0,0 +1,42 @@
environment:
matrix:
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"

branches:
only:
- master
- /release-.*/

skip_commits:
message: /\[av skip\]/

notifications:
- provider: Email
on_build_success: false
on_build_failure: false
on_build_status_changed: false

install:
# If there's a newer build queued for the same PR, cancel this one
- ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
throw "There are newer queued builds for this pull request, failing early." }
# Download most recent Julia Windows binary
- ps: (new-object net.webclient).DownloadFile(
$("http://s3.amazonaws.com/"+$env:JULIAVERSION),
"C:\projects\julia-binary.exe")
# Run installer silently, output to C:\projects\julia
- C:\projects\julia-binary.exe /S /D=C:\projects\julia

build_script:
# Need to convert from shallow to complete for Pkg.clone to work
- IF EXIST .git\shallow (git fetch --unshallow)
- C:\projects\julia\bin\julia -e "versioninfo();
Pkg.clone(pwd(), \"DataFramesMeta\"); Pkg.build(\"DataFramesMeta\")"

test_script:
- C:\projects\julia\bin\julia --check-bounds=yes -e "Pkg.test(\"DataFramesMeta\")"
2 changes: 1 addition & 1 deletion src/byrow.jl
Expand Up @@ -27,7 +27,7 @@ end
byrow_replace(x) = x

function byrow_find_newcols(e::Expr, newcol_decl)
if e.head == :macrocall && e.args[1] == symbol("@newcol")
if e.head == :macrocall && e.args[1] == Symbol("@newcol")
ea = e.args[2]
# expression to assign a new column to df
return (nothing, Any[Expr(:kw, ea.args[1], Expr(:call, ea.args[2].args[1], ea.args[2].args[2], :_N))])
Expand Down
46 changes: 23 additions & 23 deletions src/compositedataframe.jl
@@ -1,13 +1,13 @@
using Compat

export AbstractCompositeDataFrame, AbstractCompositeDataFrameRow,
export AbstractCompositeDataFrame, AbstractCompositeDataFrameRow,
CompositeDataFrame, row

"""
AbstractCompositeDataFrame
An abstract type that is an `AbstractDataFrame`. Each type that inherits from
this is expected to be a type-stable data frame.
this is expected to be a type-stable data frame.
"""
abstract AbstractCompositeDataFrame <: AbstractDataFrame

Expand All @@ -16,10 +16,10 @@ abstract AbstractCompositeDataFrameRow

"""
row(cdf::AbstractCompositeDataFrame, i)
Return row `i` of `cdf` as a `CompositeDataFrameRow`. This object has
the same fields as `cdf` where the type of each field is taken from the `eltype`
of the field in `cdf`.
of the field in `cdf`.
See also `eachrow(cdf)`.
Expand All @@ -43,7 +43,7 @@ A constructor of an `AbstractCompositeDataFrame` that mimics the `DataFrame`
constructor. This returns a composite type (not immutable) that is an
`AbstractCompositeDataFrame`.
This uses `eval` to create a new type within the current module.
This uses `eval` to create a new type within the current module.
### Arguments
Expand All @@ -62,8 +62,8 @@ df = CompositeDataFrame(:MyDF, x = 1:3, y = [2, 1, 2])
"""
function CompositeDataFrame(columns::Vector{Any},
cnames::Vector{Symbol} = gennames(length(columns)),
typename::Symbol = symbol("CompositeDF" * string(gensym())))
rowtypename = symbol(string(typename, "Row"))
typename::Symbol = @compat(Symbol("CompositeDF", gensym())))
rowtypename = @compat Symbol(typename, "Row")
# TODO: length checks
e = :(type $(typename) <: AbstractCompositeDataFrame end)
e.args[3].args = Any[:($(cnames[i]) :: $(typeof(columns[i]))) for i in 1:length(columns)]
Expand All @@ -90,7 +90,7 @@ CompositeDataFrame(typename::Symbol; kwargs...) =

CompositeDataFrame(adf::AbstractDataFrame) =
CompositeDataFrame(DataFrames.columns(adf), names(adf))

CompositeDataFrame(adf::AbstractDataFrame, nms::Vector{Symbol}) =
CompositeDataFrame(DataFrames.columns(adf), nms)

Expand All @@ -105,11 +105,11 @@ DataFrames.DataFrame(cdf::AbstractCompositeDataFrame) = DataFrame(DataFrames.col
Base.names{T <: AbstractCompositeDataFrame}(cdf::T) = @compat fieldnames(T)

DataFrames.ncol(cdf::AbstractCompositeDataFrame) = length(names(cdf))
DataFrames.nrow(cdf::AbstractCompositeDataFrame) = ncol(cdf) > 0 ? length(cdf.(1))::Int : 0
DataFrames.nrow(cdf::AbstractCompositeDataFrame) = length(cdf.(1))
DataFrames.nrow(cdf::AbstractCompositeDataFrame) = ncol(cdf) > 0 ? length(getfield(cdf, 1))::Int : 0
DataFrames.nrow(cdf::AbstractCompositeDataFrame) = length(getfield(cdf, 1))

DataFrames.columns(cdf::AbstractCompositeDataFrame) = Any[ getfield(cdf, i) for i in 1:length(cdf) ]

DataFrames.columns(cdf::AbstractCompositeDataFrame) = Any[ cdf.(i) for i in 1:length(cdf) ]

function Base.hcat(df1::AbstractCompositeDataFrame, df2::AbstractCompositeDataFrame)
nms = DataFrames.make_unique([names(df1); names(df2)])
columns = Any[DataFrames.columns(df1)..., DataFrames.columns(df2)...]
Expand All @@ -125,19 +125,19 @@ DataFrames.index(cdf::AbstractCompositeDataFrame) = DataFrames.Index(names(cdf))
## getindex
#########################################

Base.getindex(cdf::AbstractCompositeDataFrame, col_inds::DataFrames.ColumnIndex) = cdf.(col_inds)
Base.getindex{T <: DataFrames.ColumnIndex}(cdf::AbstractCompositeDataFrame, col_inds::AbstractVector{T}) = CompositeDataFrame(Any[ cdf.(col_inds[i]) for i = 1:length(col_inds) ], names(cdf)[col_inds])
Base.getindex(cdf::AbstractCompositeDataFrame, row_inds, col_inds::DataFrames.ColumnIndex) = cdf.(col_inds)[row_inds]
Base.getindex(cdf::AbstractCompositeDataFrame, row_inds, col_inds) =
CompositeDataFrame(Any[ cdf.(col_inds[i])[row_inds] for i = 1:length(col_inds) ],
Base.getindex(cdf::AbstractCompositeDataFrame, col_inds::DataFrames.ColumnIndex) = getfield(cdf, col_inds)
Base.getindex{T <: DataFrames.ColumnIndex}(cdf::AbstractCompositeDataFrame, col_inds::AbstractVector{T}) = CompositeDataFrame(Any[ getfield(cdf, col_inds[i]) for i = 1:length(col_inds) ], names(cdf)[col_inds])
Base.getindex(cdf::AbstractCompositeDataFrame, row_inds, col_inds::DataFrames.ColumnIndex) = getfield(cdf, col_inds)[row_inds]
Base.getindex(cdf::AbstractCompositeDataFrame, row_inds, col_inds) =
CompositeDataFrame(Any[ getfield(cdf, col_inds[i])[row_inds] for i = 1:length(col_inds) ],
Symbol[ names(cdf)[i] for i = 1:length(col_inds) ])
Base.getindex(cdf::AbstractCompositeDataFrame, row_inds, ::Colon) = typeof(cdf)([cdf.(i)[row_inds] for i in 1:length(cdf)]...)
Base.getindex(cdf::AbstractCompositeDataFrame, row_inds, ::Colon) = typeof(cdf)([getfield(cdf, i)[row_inds] for i in 1:length(cdf)]...)

function Base.getindex(cdf::AbstractCompositeDataFrame, row_inds, col_inds::UnitRange)
if col_inds.start == 1 && col_inds.stop == length(cdf)
return typeof(cdf)([ cdf.(i)[row_inds] for i in 1:length(cdf) ]...)
return typeof(cdf)([ getfield(cdf, i)[row_inds] for i in 1:length(cdf) ]...)
else
return CompositeDataFrame(Any[ cdf.(col_inds[i])[row_inds] for i = 1:length(col_inds) ], names(cdf)[col_inds])
return CompositeDataFrame(Any[ getfield(cdf, col_inds[i])[row_inds] for i = 1:length(col_inds) ], names(cdf)[col_inds])
end
end

Expand All @@ -147,7 +147,7 @@ end

"""
CDFRowIterator
An iterator over the rows of an `AbstractCompositeDataFrame`. Each row
is an immutable type with the same names as the parent composite data frame.
This iterator is created by calling `eachrow(df)` where `df` is an
Expand Down Expand Up @@ -176,7 +176,7 @@ Base.map(f::Function, dfri::CDFRowIterator) = [f(row) for row in dfri]

order(d::AbstractCompositeDataFrame; args...) =
d[sortperm(DataFrame(args...)), :]

transform(d::AbstractCompositeDataFrame; kwargs...) =
CompositeDataFrame(Any[DataFrames.columns(d)..., [ isa(v, Function) ? v(d) : v for (k,v) in kwargs ]...],
Symbol[names(d)..., [ k for (k,v) in kwargs ]...])
Expand Down
32 changes: 16 additions & 16 deletions test/data.table.timings.jl
Expand Up @@ -10,12 +10,12 @@ srand(1)
# Array version

DA = DataFrame(
id1 = P(rand([symbol(string("id", i)) for i=1:K], N)), # large groups (char)
id2 = P(rand([symbol(string("id", i)) for i=1:K], N)), # large groups (char)
id3 = P(rand([symbol(string("id", i)) for i=1:Int(N/K)], N)), # small groups (char)
id1 = P(rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id2 = P(rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id3 = P(rand([@compat(Symbol("id", i)) for i=1:N÷K], N)), # small groups (char)
id4 = P(rand(1:K, N)), # large groups (int)
id5 = P(rand(1:K, N)), # large groups (int)
id6 = P(rand(1:Int(N/K), N)), # small groups (int)
id6 = P(rand(1:N÷K, N)), # small groups (int)
v1 = P(rand(1:5, N)), # int in range [1,5]
v2 = P(rand(1:5, N)), # int in range [1,5]
v3 = P(rand(N)) # numeric e.g. 23.5749
Expand All @@ -24,12 +24,12 @@ DA = DataFrame(
# PooledDataArray version

DPDA = DataFrame(
id1 = PooledDataArray(rand([symbol(string("id", i)) for i=1:K], N)), # large groups (char)
id2 = PooledDataArray(rand([symbol(string("id", i)) for i=1:K], N)), # large groups (char)
id3 = PooledDataArray(rand([symbol(string("id", i)) for i=1:Int(N/K)], N)), # small groups (char)
id1 = PooledDataArray(rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id2 = PooledDataArray(rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id3 = PooledDataArray(rand([@compat(Symbol("id", i)) for i=1:N÷K], N)), # small groups (char)
id4 = PooledDataArray(rand(1:K, N)), # large groups (int)
id5 = PooledDataArray(rand(1:K, N)), # large groups (int)
id6 = PooledDataArray(rand(1:Int(N/K), N)), # small groups (int)
id6 = PooledDataArray(rand(1:N÷K, N)), # small groups (int)
v1 = P(rand(1:5, N)), # int in range [1,5]
v2 = P(rand(1:5, N)), # int in range [1,5]
v3 = P(rand(N)) # numeric e.g. 23.5749
Expand All @@ -38,12 +38,12 @@ DPDA = DataFrame(
# DataArray version

DDA = DataFrame(
id1 = (rand([symbol(string("id", i)) for i=1:K], N)), # large groups (char)
id2 = (rand([symbol(string("id", i)) for i=1:K], N)), # large groups (char)
id3 = (rand([symbol(string("id", i)) for i=1:Int(N/K)], N)), # small groups (char)
id1 = (rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id2 = (rand([@compat(Symbol("id", i)) for i=1:K], N)), # large groups (char)
id3 = (rand([@compat(Symbol("id", i)) for i=1:N÷K], N)), # small groups (char)
id4 = (rand(1:K, N)), # large groups (int)
id5 = (rand(1:K, N)), # large groups (int)
id6 = (rand(1:Int(N/K), N)), # small groups (int)
id6 = (rand(1:N÷K, N)), # small groups (int)
v1 = (rand(1:5, N)), # int in range [1,5]
v2 = (rand(1:5, N)), # int in range [1,5]
v3 = (rand(N)) # numeric e.g. 23.5749
Expand All @@ -52,12 +52,12 @@ DDA = DataFrame(
# NullableArray version

DNA = DataFrame(
id1 = P(NullableArray(rand([symbol(string("id", i)) for i=1:K], N))), # large groups (char)
id2 = P(NullableArray(rand([symbol(string("id", i)) for i=1:K], N))), # large groups (char)
id3 = P(NullableArray(rand([symbol(string("id", i)) for i=1:Int(N/K)], N))), # small groups (char)
id1 = P(NullableArray(rand([@compat(Symbol("id", i)) for i=1:K], N))), # large groups (char)
id2 = P(NullableArray(rand([@compat(Symbol("id", i)) for i=1:K], N))), # large groups (char)
id3 = P(NullableArray(rand([@compat(Symbol("id", i)) for i=1:N÷K], N))), # small groups (char)
id4 = P(NullableArray(rand(1:K, N))), # large groups (int)
id5 = P(NullableArray(rand(1:K, N))), # large groups (int)
id6 = P(NullableArray(rand(1:Int(N/K), N))), # small groups (int)
id6 = P(NullableArray(rand(1:N÷K, N))), # small groups (int)
v1 = P(NullableArray(rand(1:5, N))), # int in range [1,5]
v2 = P(NullableArray(rand(1:5, N))), # int in range [1,5]
v3 = P(NullableArray(rand(N))) # numeric e.g. 23.5749
Expand Down

0 comments on commit f15561d

Please sign in to comment.