-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
123 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
|
||
export CategoricalVector | ||
|
||
""" | ||
A CategoricalVector is an AbstractVector which is treated as a categorical axis regardless | ||
of the element type. Duplicate values are not allowed but are not filtered out. | ||
A CategoricalVector axis can be indexed with an ClosedInterval, with a value, or with a | ||
vector of values. Use of a CategoricalVector{Tuple} axis allows indexing similar to the | ||
hierarchical index of the Python Pandas package or the R data.table package. | ||
In general, indexing into a CategoricalVector will be much slower than the corresponding | ||
SortedVector or another sorted axis type, as linear search is required. | ||
### Constructors | ||
```julia | ||
CategoricalVector(x::AbstractVector) | ||
``` | ||
### Arguments | ||
* `x::AbstractVector` : the wrapped vector | ||
### Examples | ||
```julia | ||
v = CategoricalVector(collect([1; 8; 10:15])) | ||
A = AxisArray(reshape(1:16, 8, 2), v, [:a, :b]) | ||
A[Axis{:row}(1), :] | ||
A[Axis{:row}(10), :] | ||
A[Axis{:row}([1, 10]), :] | ||
## Hierarchical index example with three key levels | ||
data = reshape(1.:40., 20, 2) | ||
v = collect(zip([:a, :b, :c][rand(1:3,20)], [:x,:y][rand(1:2,20)], [:x,:y][rand(1:2,20)])) | ||
A = AxisArray(data, CategoricalVector(v), [:a, :b]) | ||
A[:b, :] | ||
A[[:a,:c], :] | ||
A[(:a,:x), :] | ||
A[(:a,:x,:x), :] | ||
``` | ||
""" | ||
immutable CategoricalVector{T} <: AbstractVector{T} | ||
data::AbstractVector{T} | ||
end | ||
|
||
Base.getindex(v::CategoricalVector, idx::Int) = v.data[idx] | ||
Base.getindex(v::CategoricalVector, idx::AbstractVector) = CategoricalVector(v.data[idx]) | ||
|
||
Base.length(v::CategoricalVector) = length(v.data) | ||
Base.size(v::CategoricalVector) = size(v.data) | ||
Base.size(v::CategoricalVector, i) = size(v.data, i) | ||
Base.indices(v::CategoricalVector) = indices(v.data) | ||
|
||
axistrait{T}(::Type{CategoricalVector{T}}) = Categorical | ||
checkaxis(::CategoricalVector) = nothing | ||
|
||
|
||
## Add some special indexing for CategoricalVector{Tuple}'s to achieve something like | ||
## Panda's hierarchical indexing | ||
|
||
axisindexes{T<:Tuple,S}(ax::Axis{S,CategoricalVector{T}}, idx) = axisindexes(ax, (idx,)) | ||
|
||
function axisindexes{T<:Tuple,S}(ax::Axis{S,CategoricalVector{T}}, idx::Tuple) | ||
collect(filter(ax_idx->_tuple_matches(ax.val[ax_idx], idx), indices(ax.val)...)) | ||
end | ||
|
||
function _tuple_matches(element::Tuple, idx::Tuple) | ||
length(idx) <= length(element) || return false | ||
|
||
for (x, y) in zip(element, idx) | ||
x == y || return false | ||
end | ||
|
||
return true | ||
end | ||
|
||
axisindexes{T<:Tuple,S}(ax::Axis{S,CategoricalVector{T}}, idx::AbstractArray) = | ||
vcat([axisindexes(ax, i) for i in idx]...) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Test CategoricalVector with a hierarchical index (indexed using Tuples) | ||
srand(1234) | ||
data = reshape(1.:40., 20, 2) | ||
v = collect(zip([:a, :b, :c][rand(1:3,20)], [:x,:y][rand(1:2,20)], [:x,:y][rand(1:2,20)])) | ||
idx = sortperm(v) | ||
A = AxisArray(data[idx,:], CategoricalVector(v[idx]), [:a, :b]) | ||
@test A[:b, :] == A[5:12, :] | ||
@test A[[:a,:c], :] == A[[1:4;13:end], :] | ||
@test A[(:a,:y), :] == A[2:4, :] | ||
@test A[(:c,:y,:y), :] == A[16:end, :] | ||
@test AxisArrays.axistrait(axes(A)[1]) <: AxisArrays.Categorical | ||
|
||
v = CategoricalVector(collect([1; 8; 10:15])) | ||
@test AxisArrays.axistrait(axes(A)[1]) <: AxisArrays.Categorical | ||
A = AxisArray(reshape(1:16, 8, 2), v, [:a, :b]) | ||
@test A[Axis{:row}(CategoricalVector([15]))] == AxisArray(reshape(A.data[8, :], 1, 2), CategoricalVector([15]), [:a, :b]) | ||
@test A[Axis{:row}(CategoricalVector([15])), 1] == AxisArray([A.data[8, 1]], CategoricalVector([15])) | ||
@test AxisArrays.axistrait(axes(A)[1]) <: AxisArrays.Categorical | ||
|
||
# TODO: maybe make this work? Would require removing or modifying Base.getindex(A::AxisArray, idxs::Idx...) | ||
# @test A[CategoricalVector([15]), 1] == AxisArray([A.data[8, 1]], CategoricalVector([15])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters