Skip to content

Commit

Permalink
poking a bit at apply_data and ModelCols idea...
Browse files Browse the repository at this point in the history
  • Loading branch information
kleinschmidt committed Oct 6, 2019
1 parent 04a0ccf commit 956ce44
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/StatsModels.jl
Expand Up @@ -63,5 +63,6 @@ include("temporal_terms.jl")
include("formula.jl")
include("modelframe.jl")
include("statsmodel.jl")
include("data.jl")

end # module StatsModels
29 changes: 29 additions & 0 deletions src/data.jl
@@ -0,0 +1,29 @@
# Notes:
#
# Add an apply_data step which puts a link to a table on each term, wrapping it
# in a ModelCols struct. Propogate missing mask up through the terms to allow
# e.g. lead/lag to say that they're going to generate missings

mutable struct ModelCols{T<:TermOrTerms,D}
term::T
data::D
end

function apply_data end

apply_data(t::AbstractTerm, data) = ModelCols(t, data)
apply_data(t::InteractionTerm, data) =
ModelCols(InteractionTerm(apply_data.(t.terms, Ref(data))), data)
apply_data(t::FormulaTerm, data) = ModelCols(FormulaTerm(apply_data(t.lhs, data),
apply_data(t.rhs, data)),
data)
apply_data(t::MatrixTerm, data) = apply_data.(t.terms, Ref(data))


width(mc::ModelCols) = width(mc.term)

import Base.getindex

getindex(mc::ModelCols{<:CategoricalTerm}, i, j) =
mc.term.contrasts[mc.data[mc.term.sym, i], j]

17 changes: 17 additions & 0 deletions test/data.jl
@@ -0,0 +1,17 @@
using StatsModels, StatsBase

using StatsModels: apply_data, ModelCols

f = @formula(y ~ 1 + a*b)

d = (y = rand(20),
a = rand(20),
b = sample('a':'d', 5))

ff = apply_schema(f, schema(d))

fff = apply_data(ff, d)

b = fff.term.rhs[end-1]

b[:, :]

0 comments on commit 956ce44

Please sign in to comment.