JuliaAI · ablaom · Oct 19, 2025 · Oct 18, 2025 · Oct 18, 2025 · Oct 18, 2025
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "LearnDataFrontEnds"
 uuid = "5cca22a3-9356-470e-ba1b-8268d0135a4b"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.1.2"
+version = "0.2.0"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
@@ -11,7 +11,7 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [compat]
-CategoricalArrays = "0.10"
+CategoricalArrays = "1"
 LearnAPI = "0.2, 1, 2"
 MLCore = "1.0.0"
 StatsModels = "0.7.4"

diff --git a/docs/src/quick_start.md b/docs/src/quick_start.md
@@ -4,7 +4,7 @@
 - [Supervised classifiers](@ref)
 - [Transformers](@ref)
 
-	Refer to the front end [docstrings](@ref front_ends) for options ignored below.
+    Refer to the front end [docstrings](@ref front_ends) for options ignored below.
 
 ## Supervised regressors
 
@@ -31,35 +31,35 @@ Your [`LearnAPI.fit`](@ref) implementation will then look like this:
 
 ```julia
 function LearnAPI.fit(
-	learner::MyLearner,
-	observations::Obs;
-	verbosity=1,
-	)
-	X = observations.features # p x n matrix
-	y = observations.target   # n-vector (use `Saffron(multitarget=true)` for matrix)
-	feature_names = observations.names
+    learner::MyLearner,
+    observations::Obs;
+    verbosity=1,
+    )
+    X = observations.features # p x n matrix
+    y = observations.target   # n-vector (use `Saffron(multitarget=true)` for matrix)
+    feature_names = observations.names
 
-	# do stuff with `X`, `y` and `feature_names`:
-	...
+    # do stuff with `X`, `y` and `feature_names`:
+    ...
 
 end
 LearnAPI.fit(learner::MyLearner, data; kwargs...) =
-	LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
+    LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
 ```
 
 For each [`KindOfProxy`](@ref) subtype `K` to be supported (e.g., `Point`), your
 [`LearnAPI.predict`](@ref) implementation(s) will look like this:
 
 ```julia
 function LearnAPI.predict(model::MyModel, :K, observations::Obs)
-	X = observations.features # p x n matrix
-	names = observations.names # if really needed
+    X = observations.features # p x n matrix
+    names = observations.names # if really needed
 
-	# do stuff with `X`:
-	...
+    # do stuff with `X`:
+    ...
 end
 LearnAPI.predict(model::MyModel, kind_of_proxy, X) =
-	LearnAPI.predict(model, kind_of_proxy, obs(model, X))
+    LearnAPI.predict(model, kind_of_proxy, obs(model, X))
 ```
 
 ## Supervised classifiers
@@ -94,13 +94,13 @@ function LearnAPI.fit(
     X = observations.features # p x n matrix
     y = observations.target   # n-vector
     decoder = observations.decoder
-    classes_seen = observatioins.classes_seen
+    levels_seen = observations.levels_seen
     feature_names = observations.names
 
     # do stuff with `X`, `y` and `feature_names`:
-    # return a `model` object which also stores the `decoder` and/or `classes_seen` 
-	# to make them available to `predict`.
-	...
+    # return a `model` object which also stores the `decoder` and/or `levels_seen` 
+    # to make them available to `predict`.
+    ...
 end
 LearnAPI.fit(learner::MyLearner, data; kwargs...) =
     LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
@@ -116,10 +116,10 @@ function LearnAPI.predict(model::MyModel, :K, observations::Obs)
 
     # Do stuff with `X` and `model` to obtain raw `predictions` (a vector of integer
     # codes for `K = Point`, or an `n x c` matrix of probabilities for `K = Distribution`).
-    # Extract `decoder` or `classes_seen` from `model`.
+    # Extract `decoder` or `levels_seen` from `model`.
     # For `K = Point`, return `decoder.(predictions)`.
     # For `K = Distribution`, return, say,
-    # `CategoricalDistributions.Univariate(classes_seen, predictions)`.
+    # `CategoricalDistributions.Univariate(levels_seen, predictions)`.
     ...
 end
 LearnAPI.predict(model::MyModel, kind_of_proxy, X) = LearnAPI.predict(model,
@@ -152,29 +152,29 @@ Your [`LearnAPI.fit`](@ref) implementation will then look like this:
 
 ```julia
 function LearnAPI.fit(
-	learner::MyLearner,
-	observations::Obs;
-	verbosity=1,
-	)
-	x = observations.features # p x n matrix
-	feature_names = observations.names
-
-	# do stuff with `x` and `feature_names`:
-	...
+    learner::MyLearner,
+    observations::Obs;
+    verbosity=1,
+    )
+    x = observations.features # p x n matrix
+    feature_names = observations.names
+
+    # do stuff with `x` and `feature_names`:
+    ...
 end
 LearnAPI.fit(learner::MyLearner, data; kwargs...) =
-	LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
+    LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
 ```
 
 Your [`LearnAPI.transform`](@ref) implementation will look like this:
 
 ```julia
 function LearnAPI.transform(model::MyModel, observations::Obs)
-	x = observations.features # p x n matrix
-	feature_names = observations.names # if really needed
+    x = observations.features # p x n matrix
+    feature_names = observations.names # if really needed
 
-	# do stuff with `x`:
-	...
+    # do stuff with `x`:
+    ...
 end
 LearnAPI.transform(model::MyModel, X) = LearnAPI.transform(model, obs(model, X))
 ```

diff --git a/docs/src/reference.md b/docs/src/reference.md
@@ -15,6 +15,5 @@ LearnDataFrontEnds.feature_names
 LearnDataFrontEnds.swapdims
 LearnDataFrontEnds.decoder
 LearnDataFrontEnds.decompose
-LearnDataFrontEnds.classes
 LearnDataFrontEnds.canonify
 ```
diff --git a/src/backends.jl b/src/backends.jl
@@ -31,17 +31,86 @@ If [`Sage`](@ref)`(multitarget=..., code_type=...)` has been implemented, then
 `observations.target` has an integer element type controlled by `code_type`, and we
 additionally have:
 
-- `observations.classes`: A categorical vector of the ordered target classes, as actually
-  seen in the user-supplied target, with the full pool of classes available by applying
-  `Categorical.levels` to the result. The corresponding integer codes will be
-  `sort(unique(observations.target))`.
+- `observations.levels`: A categorical vector of the ordered target levels, as actually
+  seen in the user-supplied target. The corresponding integer codes will be
+  `sort(unique(observations.target))`. To get the full pool of levels, apply
+  `CategoricalArrays.levels` to `observations.levels_seen`; see the example below.
 
 - `observations.decoder`: A callable function that converts an integer code back to the
   original `CategoricalValue` it represents.
 
 Pass the first onto `predict` for making probabilistic predictions, and the second for
 point predictions; see [`Sage`](@ref) for details.
 
+# Extended help
+
+In the example below, `observations` implements the full `Obs` interface described above,
+for a learner implementing the `Sage` front end:
+
+```julia-repl
+using LearnAPI, LearnDataFrontEnds, LearnTestAPI
+using CategoricalDistributions, CategoricalArrays, DataFrames
+X = DataFrame(rand(10, 3), :auto)
+y = categorical(collect("ababababac"))
+learner = LearnTestAPI.ConstantClassifier()
+observations = obs(learner, (X[1:9,:], y[1:9]))
+
+julia> observations.features
+3×9 Matrix{Float64}:
+ 0.234043  0.526468  0.227417  0.956471    …  0.00587146  0.169291  0.353518  0.402631
+ 0.631083  0.151317  0.781049  0.00320728     0.756519    0.15317   0.452169  0.127005
+ 0.285315  0.347433  0.69174   0.516915       0.900343    0.404006  0.448986  0.962649
+
+julia> yint = observations.target
+9-element Vector{UInt32}:
+ 0x00000001
+ 0x00000002
+ 0x00000001
+ 0x00000002
+ 0x00000001
+ 0x00000002
+ 0x00000001
+ 0x00000002
+ 0x00000001
+
+julia> observations.levels_seen
+2-element CategoricalArray{Char,1,UInt32}:
+ 'a'
+ 'b'
+
+julia> sort(unique(observations.target))
+2-element Vector{UInt32}:
+ 0x00000001
+ 0x00000002
+
+julia> observations.levels_seen |> levels
+3-element CategoricalArray{Char,1,UInt32}:
+ 'a'
+ 'b'
+ 'c'
+
+julia> observations.decoder.(yint)
+9-element CategoricalArray{Char,1,UInt32}:
+ 'a'
+ 'b'
+ 'a'
+ 'b'
+ 'a'
+ 'b'
+ 'a'
+ 'b'
+ 'a'
+
+julia> d = UnivariateFinite(observations.levels_seen, [0.4, 0.6])
+UnivariateFinite{Multiclass{3}}(a=>0.4, b=>0.6)
+
+julia> levels(d)
+3-element CategoricalArray{Char,1,UInt32}:
+ 'a'
+ 'b'
+ 'c'
+```
+
 """
 abstract type Obs end
 
@@ -111,7 +180,7 @@ struct SageObs{F,T,E,D} <: Obs
     features::F  # p x n
     names::Vector{Symbol}
     target::T
-    classes_seen::CategoricalArrays.CategoricalVector{E}
+    levels_seen::CategoricalArrays.CategoricalVector{E}
     decoder::D
 end
 
@@ -122,8 +191,8 @@ function Base.show(io::IO, ::MIME"text/plain", observations::SageObs)
     println(io, "  features :: $(typeof(A))($(size(A)))")
     println(io, "  names: $(observations.names)")
     println(io, "  target :: $(typeof(y))($(size(y)))")
-    println(io, "  classes_seen: "*
-        "$(CategoricalArrays.unwrap.(observations.classes_seen)) "*
+    println(io, "  levels_seen: "*
+        "$(CategoricalArrays.unwrap.(observations.levels_seen)) "*
         "(categorical vector with complete pool)")
     print(io, "  decoder: <callable>")
 end
@@ -133,7 +202,7 @@ Base.getindex(observations::SageObs, idx) =
         MLCore.getobs(observations.features, idx),
         observations.names,
         MLCore.getobs(observations.target, idx),
-        observations.classes_seen,
+        observations.levels_seen,
         observations.decoder,
     )
 

diff --git a/src/saffron.jl b/src/saffron.jl
@@ -150,13 +150,13 @@ function finalize(x, names, y, int)  # here `int` is `levelcode` or `refcode` fu
         CategoricalArrays.CategoricalArray,
         SubArray{<:Any, <:Any, <:CategoricalArrays.CategoricalArray},
     } || throw(ERR_EXPECTED_CATEGORICAL)
-    l = LearnDataFrontEnds.classes(y)
+    l = CategoricalArrays.levels(y)
     u = unique(y)
     mask = map(in(u), l)
-    _classes_seen = l[mask]
+    _levels_seen = l[mask]
     _decoder = LearnDataFrontEnds.decoder(l)
 
-    return SageObs(x, names, int.(y), _classes_seen, _decoder)
+    return SageObs(x, names, int.(y), _levels_seen, _decoder)
 end
 
 # for input `(x::AbstractMatrix, y::MatrixOrVector)`:

diff --git a/src/sage.jl b/src/sage.jl
@@ -104,12 +104,12 @@ function LearnAPI.fit(
     X = observations.features # p x n matrix
     y = observations.target   # n-vector or q x n matrix
     decoder = observations.decoder
-    classes_seen = observations.classes_seen
+    levels_seen = observations.levels_seen
     feature_names = observations.names
 
     # do stuff with `X`, `y` and `feature_names`:
     # return a `model` object which also stores the `decoder` and/or
-    # `classes_seen` to make them available to `predict`.
+    # `levels_seen` to make them available to `predict`.
     ...
 
 end
@@ -127,10 +127,10 @@ function LearnAPI.predict(model::MyModel, :K, observations::Obs)
 
     # Do stuff with `X` and `model` to obtain raw `predictions` (a vector of integer
     # codes for `K = Point`, or an `n x c` matrix of probabilities for `K = Distribution`).
-    # Extract `decoder` or `classes_seen` from `model`.
+    # Extract `decoder` or `levels_seen` from `model`.
     # For `K = Point`, return `decoder.(predictions)`.
     # For `K = Distribution`, return, say,
-    # `CategoricalDistributions.Univariate(classes_seen, predictions)`.
+    # `CategoricalDistributions.Univariate(levels_seen, predictions)`.
     ...
 end
 LearnAPI.predict(model::MyModel, kind_of_proxy, X) = LearnAPI.predict(model,

diff --git a/src/tools.jl b/src/tools.jl
@@ -112,58 +112,8 @@ function decompose(X, v, _targets::NTuple)
     return swapdims(A, v), collect(names), swapdims(B, v)
 end
 
-"""
-    classes(x)
-
-*Private method.*
-
-Return, as a `CategoricalVector`, all the categorical elements with
-the same pool as `CategoricalValue` `x` (including `x`), with an
-ordering consistent with the pool. Note that `x in classes(x)` is
-always true.
-
-Not to be confused with `levels(x.pool)`. See the example below.
-
-Also, overloaded for `x` a `CategoricalArray`, `CategoricalPool`, and for views of
-`CategoricalArray`.
-
-    julia>  v = categorical(['c', 'b', 'c', 'a'])
-    4-element CategoricalArrays.CategoricalArray{Char,1,UInt32}:
-     'c'
-     'b'
-     'c'
-     'a'
-
-    julia> levels(v)
-    3-element Array{Char,1}:
-     'a'
-     'b'
-     'c'
-
-    julia> x = v[4]
-    CategoricalArrays.CategoricalValue{Char,UInt32} 'a'
-
-    julia> classes(x)
-    3-element CategoricalArrays.CategoricalArray{Char,1,UInt32}:
-     'a'
-     'b'
-     'c'
-
-    julia> levels(x.pool)
-    3-element Array{Char,1}:
-     'a'
-     'b'
-     'c'
-
-"""
-classes(p::CategoricalArrays.CategoricalPool) = [p[i] for i in 1:length(p)]
-classes(x::CategoricalArrays.CategoricalValue) = classes(CategoricalArrays.pool(x))
-classes(v::CategoricalArrays.CategoricalArray) = classes(CategoricalArrays.pool(v))
-classes(v::SubArray{<:Any, <:Any, <:CategoricalArrays.CategoricalArray}) = classes(parent(v))
-
-
 struct CategoricalDecoder{V,R}
-    classes::CategoricalArrays.CategoricalVector{
+    levels::CategoricalArrays.CategoricalVector{
         V,
         R,
         V,
@@ -193,7 +143,7 @@ pool as `x`.
 *Warning:* There is no guarantee that `levelcode.(d.(u)) == u` will always holds.
 
 """
-decoder(x) = CategoricalDecoder(classes(x))
+decoder(x) = CategoricalDecoder(CategoricalArrays.levels(x))
 
 (d::CategoricalDecoder{V,R})(i::Integer) where {V,R} =
-    CategoricalArrays.CategoricalValue{V,R}(d.classes[i])
+    CategoricalArrays.CategoricalValue{V,R}(d.levels[i])