#  This is a demo of how automatic dimention permuting could be done, which could be deployed via traits, where the ML model specifies what form it wants it's inputs in

In [1]:
using LearnBase
using MLDataUtils

In [2]:
using RDatasets
iris = dataset("datasets", "iris");

In [3]:
# ScikitLearnBase Setup
using ScikitLearnBase
obsdim_input(::Type{BaseClassifier}) = ObsDim.First()
obsdim_input(::Type{BaseRegressor}) = ObsDim.First()
obsdim_input(::Type{<:Any}) = ObsDim.Last()
obsdim_input(model::T) where T= obsdim_input(T)

obsdim_input (generic function with 4 methods)

In [4]:
# LearnBase enhancements
using LearnBase: ObsDimension, default_obsdim
import LearnBase: nobs
#==

Base.size(data, ::ObsDim.Constant{N}) where N = size(data, N)
Base.size(data, ::ObsDim.Last) = size(data)[end]

function LearnBase.nobs(data, obsdim::ObsDimension)
    size(data, obsdim)
end
==#


In [28]:
determine_original_obsdim(data, ::ObsDim.Constant{N}) where N = ObsDim.Constant{N}()
determine_original_obsdim(data, ::ObsDim.Last)= ObsDim.Constant{ndims(data)}()
determine_original_obsdim(data, ::ObsDim.Undefined) = ObsDim.Undefined()
function determine_original_obsdim(data, obsdim=default_obsdim(data))
    determine_original_obsdim(data, convert(ObsDimension, obsdim))
end

determine_target_obsdim(::ObsDim.Last)= ObsDim.Constant{2}()
determine_target_obsdim(obsdim)= convert(ObsDimension, obsdim)


# initial

function featurematrix(data)
    featurematrix(ObsDim.Last(), data, default_obsdim(data))
end


function featurematrix(data, original_obsdim::ObsDimension) 
    featurematrix(ObsDim.Last(), data, original_obsdim)
end


function featurematrix(target_obsdim::ObsDimension, data) 
    featurematrix(target_obsdim, data, default_obsdim(data))
end

# Main entrance function
function featurematrix(target_obsdim, data, original_obsdim)
    _featurematrix(
        determine_target_obsdim(target_obsdim),
        data,
        determine_original_obsdim(data, original_obsdim)
    )
end

##############
# Matrixes

function _featurematrix( # equal
            target_obsdim::ObsDim.Constant{N}, 
            data::AbstractMatrix,
            original_obsdim::ObsDim.Constant{N}) where N
    data
end

function _featurematrix( # Not equal
            target_obsdim::ObsDim.Constant{N}, 
            data::AbstractMatrix,
            original_obsdim::ObsDim.Constant{M}) where {N, M}
    permutedims(data, (2,1))
end

##############
# Vector

function _featurematrix( # equal
            target_obsdim::ObsDim.Constant{1}, 
            data::AbstractVector,
            original_obsdim::ObsDimension)
    data
end

function _featurematrix( # equal
            target_obsdim::ObsDim.Constant{2}, 
            data::AbstractVector,
            original_obsdim::ObsDimension)
    reshape(data, (:,1))
end

###########################
# DataFrames

function _featurematrix(target_obsdim::ObsDimension, data::AbstractDataFrame, original_obsdim::ObsDim.Undefined)
    featurematrix(target_obsdim, convert(Matrix, data), ObsDim.First())
end




###############################
# Fallback
# It would be nice to optimize this to be almost nonallocating when `get_obs!` is defined

function _featurematrix(target_obsdim::ObsDimension, data, original_obsdim::ObsDimension)
   __featurematrix(target_obsdim, data, original_obsdim)
end

function __featurematrix(target_obsdim::ObsDim.Constant{1}, data, original_obsdim::ObsDimension)
    ret = Matrix(
        nobs(data, original_obsdim),
        nfeatures(data, original_obsdim)
    )
    for (ii, obs) in enumerate(eachobs(data, original_obsdim))
        ret[ii, :] .= obvervation_vec(obs)
    end
    ret
end

function __featurematrix(target_obsdim::ObsDim.Constant{2}, data, original_obsdim::ObsDimension)
    ret = Matrix(
        nfeatures(data, original_obsdim),
        nobs(data, original_obsdim)
    )
    for (ii, obs) in enumerate(eachobs(data, original_obsdim))
        ret[:, ii] .= obvervation_vec(obs)
    end
    ret
end

nfeatures(data, obsdim) = length(obvervation_vec(first(eachobs(data, obsdim))))

obvervation_vec(data::AbstractVector) = data
obvervation_vec(data::AbstractArray) = reshape(data, :)
obvervation_vec(data::Tuple) = vcat(map(obvervation_vec, data)...)
obvervation_vec(data) = obvervation_vec(collect(data))

obvervation_vec (generic function with 4 methods)

In [29]:
featurematrix(ObsDim.First(), iris)

150×5 Array{Any,2}:
 5.1  3.5  1.4  0.2  "setosa"   
 4.9  3.0  1.4  0.2  "setosa"   
 4.7  3.2  1.3  0.2  "setosa"   
 4.6  3.1  1.5  0.2  "setosa"   
 5.0  3.6  1.4  0.2  "setosa"   
 5.4  3.9  1.7  0.4  "setosa"   
 4.6  3.4  1.4  0.3  "setosa"   
 5.0  3.4  1.5  0.2  "setosa"   
 4.4  2.9  1.4  0.2  "setosa"   
 4.9  3.1  1.5  0.1  "setosa"   
 5.4  3.7  1.5  0.2  "setosa"   
 4.8  3.4  1.6  0.2  "setosa"   
 4.8  3.0  1.4  0.1  "setosa"   
 ⋮                              
 6.0  3.0  4.8  1.8  "virginica"
 6.9  3.1  5.4  2.1  "virginica"
 6.7  3.1  5.6  2.4  "virginica"
 6.9  3.1  5.1  2.3  "virginica"
 5.8  2.7  5.1  1.9  "virginica"
 6.8  3.2  5.9  2.3  "virginica"
 6.7  3.3  5.7  2.5  "virginica"
 6.7  3.0  5.2  2.3  "virginica"
 6.3  2.5  5.0  1.9  "virginica"
 6.5  3.0  5.2  2.0  "virginica"
 6.2  3.4  5.4  2.3  "virginica"
 5.9  3.0  5.1  1.8  "virginica"

In [30]:
featurematrix(ObsDim.Last(), iris)

5×150 Array{Any,2}:
 5.1        4.9        4.7        4.6        …  6.2           5.9         
 3.5        3.0        3.2        3.1           3.4           3.0         
 1.4        1.4        1.3        1.5           5.4           5.1         
 0.2        0.2        0.2        0.2           2.3           1.8         
  "setosa"   "setosa"   "setosa"   "setosa"      "virginica"   "virginica"

In [31]:
eg = [1  2 3  4; 10 20 30 40; 100 200 300 400]

3×4 Array{Int64,2}:
   1    2    3    4
  10   20   30   40
 100  200  300  400

In [32]:
featurematrix(eg)

3×4 Array{Int64,2}:
   1    2    3    4
  10   20   30   40
 100  200  300  400

In [33]:
featurematrix(ObsDim.Last(), eg, ObsDim.First())

4×3 Array{Int64,2}:
 1  10  100
 2  20  200
 3  30  300
 4  40  400

In [34]:
featurematrix(ObsDim.Last(), eg, ObsDim.Last())

3×4 Array{Int64,2}:
   1    2    3    4
  10   20   30   40
 100  200  300  400

In [35]:
featurematrix(ObsDim.First(), eg, ObsDim.First())

3×4 Array{Int64,2}:
   1    2    3    4
  10   20   30   40
 100  200  300  400

In [36]:
featurematrix(ObsDim.First(), eg, ObsDim.Last())

4×3 Array{Int64,2}:
 1  10  100
 2  20  200
 3  30  300
 4  40  400

In [37]:
featurematrix(rand(4,5,6))

20×6 Array{Any,2}:
 0.227989   0.505699   0.424723   0.327435   0.612894   0.356281  
 0.390235   0.415833   0.897752   0.910352   0.965416   0.779816  
 0.0860939  0.866595   0.485977   0.281331   0.879453   0.810118  
 0.127175   0.774614   0.128959   0.754678   0.59757    0.67359   
 0.0672949  0.89673    0.99192    0.133552   0.617958   0.0156535 
 0.304675   0.0934629  0.872009   0.882632   0.299851   0.867351  
 0.717214   0.99343    0.941644   0.437871   0.811396   0.00838708
 0.631362   0.130609   0.791254   0.999651   0.941787   0.530463  
 0.345969   0.345719   0.0184813  0.841615   0.210566   0.112985  
 0.600528   0.417667   0.680704   0.491169   0.0477976  0.181161  
 0.248144   0.215063   0.327152   0.909832   0.960823   0.287004  
 0.861411   0.631855   0.731211   0.0812935  0.452861   0.603714  
 0.851714   0.861341   0.71142    0.629378   0.65642    0.342981  
 0.972597   0.712864   0.177017   0.404287   0.0408084  0.382202  
 0.183036   0.415246   0.6244     0.692272 

In [39]:
permutedims(["asd", "bsd"])

LoadError: [91mMethodError: no method matching permutedims(::Array{String,1})[0m
Closest candidates are:
  permutedims(::Union{Base.ReshapedArray{T,N,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:Union{DenseArray, SubArray{T,N,P,I,true} where I<:Tuple{Union{Base.Slice, UnitRange},Vararg{Any,N} where N} where P where N where T}, DenseArray{T,N}, SubArray{T,N,A,I,L} where L} where I<:Tuple{Vararg{Union{Base.AbstractCartesianIndex, Int64, Range{Int64}},N} where N} where A<:Union{Base.ReshapedArray{T,N,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:Union{DenseArray, SubArray{T,N,P,I,true} where I<:Tuple{Union{Base.Slice, UnitRange},Vararg{Any,N} where N} where P where N where T} where N where T, DenseArray} where N where T, [91m::Any[39m) at multidimensional.jl:1292
  permutedims(::AbstractArray, [91m::Any[39m) at permuteddimsarray.jl:116[39m