Skip to content

Commit

Permalink
Merge 194f8f2 into f8406ce
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed Dec 29, 2017
2 parents f8406ce + 194f8f2 commit 20ba041
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 33 deletions.
12 changes: 6 additions & 6 deletions src/abstractdataframe/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ d1s_name = melt(d1, [:a, :b, :e], variable_name=:somemeasure)
```
"""
function stack(df::AbstractDataFrame, measure_vars::Vector{Int},
id_vars::Vector{Int}; variable_name::Symbol=:variable,
function stack(df::AbstractDataFrame, measure_vars::AbstractVector{<:Integer},
id_vars::AbstractVector{<:Integer}; variable_name::Symbol=:variable,
value_name::Symbol=:value)
N = length(measure_vars)
cnames = names(df)[id_vars]
Expand All @@ -91,12 +91,12 @@ function stack(df::AbstractDataFrame, measure_var::Int, id_var::Int;
stack(df, [measure_var], [id_var];
variable_name=variable_name, value_name=value_name)
end
function stack(df::AbstractDataFrame, measure_vars::Vector{Int}, id_var::Int;
function stack(df::AbstractDataFrame, measure_vars::AbstractVector{<:Integer}, id_var::Int;
variable_name::Symbol=:variable, value_name::Symbol=:value)
stack(df, measure_vars, [id_var];
variable_name=variable_name, value_name=value_name)
end
function stack(df::AbstractDataFrame, measure_var::Int, id_vars::Vector{Int};
function stack(df::AbstractDataFrame, measure_var::Int, id_vars::AbstractVector{<:Integer};
variable_name::Symbol=:variable, value_name::Symbol=:value)
stack(df, [measure_var], id_vars;
variable_name=variable_name, value_name=value_name)
Expand Down Expand Up @@ -516,8 +516,8 @@ d1m = meltdf(d1, [:a, :b, :e])
```
"""
function stackdf(df::AbstractDataFrame, measure_vars::Vector{Int},
id_vars::Vector{Int}; variable_name::Symbol=:variable,
function stackdf(df::AbstractDataFrame, measure_vars::AbstractVector{<:Integer},
id_vars::AbstractVector{<:Integer}; variable_name::Symbol=:variable,
value_name::Symbol=:value)
N = length(measure_vars)
cnames = names(df)[id_vars]
Expand Down
23 changes: 7 additions & 16 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ ncol(df::DataFrame) = length(index(df))
# Let getindex(df.columns[j], row_inds) from AbstractVector() handle
# the resolution of row indices

# TODO: change Real to Integer in this union after deprecation period
const ColumnIndex = Union{Real, Symbol}

# df[SingleColumnIndex] => AbstractDataVector
Expand All @@ -228,8 +229,7 @@ function Base.getindex(df::DataFrame, col_ind::ColumnIndex)
end

# df[MultiColumnIndex] => DataFrame
function Base.getindex(df::DataFrame,
col_inds::AbstractVector{<:Union{ColumnIndex, Missing}})
function Base.getindex(df::DataFrame, col_inds::AbstractVector)
selected_columns = index(df)[col_inds]
new_columns = df.columns[selected_columns]
return DataFrame(new_columns, Index(_names(df)[selected_columns]))
Expand All @@ -245,43 +245,34 @@ function Base.getindex(df::DataFrame, row_ind::Real, col_ind::ColumnIndex)
end

# df[SingleRowIndex, MultiColumnIndex] => DataFrame
function Base.getindex(df::DataFrame,
row_ind::Real,
col_inds::AbstractVector{<:Union{ColumnIndex, Missing}})
function Base.getindex(df::DataFrame, row_ind::Real, col_inds::AbstractVector)
selected_columns = index(df)[col_inds]
new_columns = Any[dv[[row_ind]] for dv in df.columns[selected_columns]]
return DataFrame(new_columns, Index(_names(df)[selected_columns]))
end

# df[MultiRowIndex, SingleColumnIndex] => AbstractVector
function Base.getindex(df::DataFrame,
row_inds::AbstractVector{<:Union{Real, Missing}},
col_ind::ColumnIndex)
function Base.getindex(df::DataFrame, row_inds::AbstractVector, col_ind::ColumnIndex)
selected_column = index(df)[col_ind]
return df.columns[selected_column][row_inds]
end

# df[MultiRowIndex, MultiColumnIndex] => DataFrame
function Base.getindex(df::DataFrame,
row_inds::AbstractVector{<:Union{Real, Missing}},
col_inds::AbstractVector{<:Union{ColumnIndex, Missing}})
function Base.getindex(df::DataFrame, row_inds::AbstractVector, col_inds::AbstractVector)
selected_columns = index(df)[col_inds]
new_columns = Any[dv[row_inds] for dv in df.columns[selected_columns]]
return DataFrame(new_columns, Index(_names(df)[selected_columns]))
end

# df[:, SingleColumnIndex] => AbstractVector
# df[:, MultiColumnIndex] => DataFrame
Base.getindex(df::DataFrame, row_ind::Colon, col_inds::Union{T, AbstractVector{T}}) where
T <: Union{ColumnIndex, Missing} = df[col_inds]
Base.getindex(df::DataFrame, row_ind::Colon, col_inds) = df[col_inds]

# df[SingleRowIndex, :] => DataFrame
Base.getindex(df::DataFrame, row_ind::Real, col_inds::Colon) = df[[row_ind], col_inds]

# df[MultiRowIndex, :] => DataFrame
function Base.getindex(df::DataFrame,
row_inds::AbstractVector{<:Union{Real, Missing}},
col_inds::Colon)
function Base.getindex(df::DataFrame, row_inds::AbstractVector, col_inds::Colon)
new_columns = Any[dv[row_inds] for dv in df.columns]
return DataFrame(new_columns, copy(index(df)))
end
Expand Down
22 changes: 22 additions & 0 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1285,3 +1285,25 @@ import Base: |>
@deprecate colwise(f) x -> colwise(f, x)
@deprecate groupby(cols::Vector{T}; sort::Bool = false, skipmissing::Bool = false) where {T} x -> groupby(x, cols, sort = sort, skipmissing = skipmissing)
@deprecate groupby(cols; sort::Bool = false, skipmissing::Bool = false) x -> groupby(x, cols, sort = sort, skipmissing = skipmissing)

function Base.getindex(x::AbstractIndex, idx::Bool)
Base.depwarn("Indexing with Bool values is deprecated except for Vector{Bool}", :getindex)
1
end

function Base.getindex(x::AbstractIndex, idx::Real)
Base.depwarn("Indexing with values that are not Integer is deprecated", :getindex)
Int(idx)
end

function Base.getindex(x::AbstractIndex, idx::AbstractRange)
Base.depwarn("Indexing with range of values that are not Integer is deprecated", :getindex)
getindex(x, collect(idx))
end


function Base.getindex(x::AbstractIndex, idx::AbstractRange{Bool})
Base.depwarn("Indexing with range of Bool is deprecated", :getindex)
collect(Int, idx)
end

53 changes: 44 additions & 9 deletions src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,19 +120,54 @@ function Base.insert!(x::Index, idx::Integer, nm::Symbol)
x
end

Base.getindex(x::Index, idx::Symbol) = x.lookup[idx]
Base.getindex(x::AbstractIndex, idx::Real) = Int(idx)
Base.getindex(x::AbstractIndex, idx::AbstractVector{Union{Bool, Missing}}) =
getindex(x, collect(Missings.replace(idx, false)))
Base.getindex(x::AbstractIndex, idx::Symbol) = x.lookup[idx]
Base.getindex(x::AbstractIndex, idx::AbstractVector{Symbol}) = [x.lookup[i] for i in idx]
Base.getindex(x::AbstractIndex, idx::Integer) = Int(idx)
Base.getindex(x::AbstractIndex, idx::AbstractVector{Int}) = idx
Base.getindex(x::AbstractIndex, idx::AbstractRange{Int}) = idx
Base.getindex(x::AbstractIndex, idx::AbstractRange{<:Integer}) = collect(Int, idx)

function Base.getindex(x::AbstractIndex, idx::AbstractVector{Bool})
length(x) == length(idx) || throw(BoundsError(x, idx))
find(idx)
end
Base.getindex(x::AbstractIndex, idx::AbstractVector{T}) where {T >: Missing} =
getindex(x, collect(skipmissing(idx)))
Base.getindex(x::AbstractIndex, idx::AbstractRange) = [idx;]
Base.getindex(x::AbstractIndex, idx::AbstractVector{T}) where {T <: Real} = convert(Vector{Int}, idx)
Base.getindex(x::AbstractIndex, idx::AbstractVector{Symbol}) = [x.lookup[i] for i in idx]

function Base.getindex(x::AbstractIndex, idx::AbstractVector{Union{Bool, Missing}})
if any(ismissing, idx)
# TODO: this line should be changed to throw an error after deprecation
Base.depwarn("using missing in column indexing is deprecated", :getindex)
end
getindex(x, collect(Missings.replace(idx, false)))
end

function Base.getindex(x::AbstractIndex, idx::AbstractVector{<:Integer})
# TODO: this line should be changed to throw an error after deprecation
if any(v -> v isa Bool, idx)
Base.depwarn("Indexing with Bool values is deprecated except for Vector{Bool}")
end
Vector{Int}(idx)
end

# catch all method handling cases when type of idx is not narrowest possible, Any in particular
# also it handles passing missing values in idx
function Base.getindex(x::AbstractIndex, idx::AbstractVector)
# TODO: passing missing will throw an error after deprecation
idxs = filter(!ismissing, idx)
if length(idxs) != length(idx)
Base.depwarn("using missing in column indexing is deprecated", :getindex)
end
length(idxs) == 0 && return Int[] # special case of empty idxs
if idxs[1] isa Real
if !all(v -> v isa Integer && !(v isa Bool), idxs)
# TODO: this line should be changed to throw an error after deprecation
Base.depwarn("indexing by vector of numbers other than Integer is deprecated", :getindex)
end
return Vector{Int}(idxs)
end
idxs[1] isa Symbol && return getindex(x, Vector{Symbol}(idxs))
throw(ArgumentError("idx[1] has type $(typeof(idx[1])); "*
"DataFrame only supports indexing columns with integers, symbols or boolean vectors"))
end

# Helpers

Expand Down
14 changes: 14 additions & 0 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,20 @@ module TestData
d1m_named = melt(d1[[1,3,4]], :a, variable_name=:letter, value_name=:someval)
@test names(d1m_named) == [:letter, :someval, :a]

# test empty measures or ids
dx = stack(d1, [], [:a])
@test size(dx) == (0, 3)
@test names(dx) == [:variable, :value, :a]
dx = stack(d1, :a, [])
@test size(dx) == (12, 2)
@test names(dx) == [:variable, :value]
dx = melt(d1, [], [:a])
@test size(dx) == (12, 2)
@test names(dx) == [:variable, :value]
dx = melt(d1, :a, [])
@test size(dx) == (0, 3)
@test names(dx) == [:variable, :value, :a]

stackdf(d1, :a)
d1s = stackdf(d1, [:a, :b])
d1s2 = stackdf(d1, [:c, :d])
Expand Down
18 changes: 16 additions & 2 deletions test/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@ inds = Any[1,
[true, false],
[1],
[1.0],
1:1,
1.0:1.0,
[:A],
Union{Bool, Missing}[true, false],
Union{Int, Missing}[1],
Union{Float64, Missing}[1.0],
Union{Symbol, Missing}[:A]]
Union{Symbol, Missing}[:A],
Any[1],
Any[1, missing],
Any[true, missing],
Any[:A],
Any[:A, missing],
[true, missing]]

for ind in inds
if ind == :A || ndims(ind) == 0
Expand All @@ -27,9 +32,18 @@ for ind in inds
end
end

@test i[1:1] == 1:1

@test_throws BoundsError i[[true]]
@test_throws BoundsError i[[true, false, true]]

@test_throws ArgumentError i[["a"]]
@test_throws ArgumentError i[Any["a"]]

@test i[[]] == Int[]
@test i[Int[]] == Int[]
@test i[Symbol[]] == Int[]

@test names(i) == [:A,:B]
@test names!(i, [:a,:a], allow_duplicates=true) == Index([:a,:a_1])
@test_throws ArgumentError names!(i, [:a,:a])
Expand Down

0 comments on commit 20ba041

Please sign in to comment.