# User Item Biases With Regularization
* Prediction for user $i$ and item $j$ is $\tilde r_{ij} = u_i + a_j$
* Loss function is $L = \sum_{\Omega}w_{ij}\text{loss}(r_{ij}, \tilde r_{ij}) + \lambda_u \sum_i (u_i - \bar u) ^2 + \lambda_a \sum_j (a_j - \bar a)^2 $
* $\bar u$ is the mean of $u_i$ and $\bar a$ is the mean of $a_j$ 
* $\Omega$ is the set of oberved pairs $(i, j)$
* $r_{ij}$ is the rating for user $i$ and item $j$
* $w_{ij}$ is the weight for the prediction $r_{ij}$ and is modeled as a power-law in the number of items seen by $i$ and users than have seen $j$: $w_{ij} = |j' : (i, j') \in \Omega| ^ {\lambda_{wu}} |i' : (i', j) \in \Omega| ^ {\lambda_{wa}}$
* $\text{loss}$ is mean squared error

In [1]:
const name = "UserItemBiases"
const residual_alphas = []
const implicit = false;

In [2]:
import LineSearches: BackTracking
import NBInclude: @nbinclude
import SparseArrays: sparse
import Statistics: mean
@nbinclude("Alpha.ipynb");

In [3]:
# TODO support residualization
const training = get_split("training", implicit = implicit)
const validation = get_split("validation", implicit = implicit);

## Alternating Least Squares
* Given some hyperparameters $\lambda$, we can solve for $U$ and $A$ via Alternating Least Squares
* This is an iterative algorithm where we fix $A$, then solve for the $U$ that minimizes the loss function
* Then we fix $U$ and solve for the best $A$
* These two steps are repeated until the matrices $U$ and $A$ converge

## ALS for Explicit data
* If we fix $a$, then for each user $i$, $u_i$ is optimized when
* $u_i = \dfrac{\sum_{j \in \Omega_i}(r_{ij} - a_j) w_{ij} + \bar u \lambda_u}{ \sum_{j \in \Omega_i} w_{ij} + \lambda_u}$
* $\Omega$ is the set of (user, item) pairs that we have ratings for
* $\Omega_i$ is subset of $\Omega$ for which the user is the $i$-th user

In [4]:
function make_prediction(users, items, u, a)
    r = Array{eltype(u)}(undef, length(users))
    Threads.@threads for i = 1:length(r)
        @inbounds r[i] = u[users[i]] + a[items[i]]
    end
    r
end;

In [5]:
function get_residuals!(users, items, ratings, weights, a, ρ, Ω)
    @inbounds for row = 1:length(users)
        i = users[row]
        j = items[row]
        r = ratings[row]
        w = weights[row]
        ρ[i] += (r - a[j]) * w
        Ω[i] += w
    end
    ρ, Ω
end

function update_users!(users, items, ratings, weights, u, a, λ_u, ρ, Ω)
    Threads.@threads for t = 1:Threads.nthreads()
        range = thread_range(length(ratings))
        ρ[:, Threads.threadid()] .= 0
        Ω[:, Threads.threadid()] .= 0
        @views get_residuals!(
            users[range],
            items[range],
            ratings[range],
            weights[range],
            a,
            ρ[:, Threads.threadid()],
            Ω[:, Threads.threadid()],
        )
    end

    ρ = sum(ρ, dims = 2)
    Ω = sum(Ω, dims = 2)
    μ = mean(u)
    Threads.@threads for i = 1:length(u)
        @inbounds u[i] = (ρ[i] + μ * λ_u) / (Ω[i] + λ_u)
    end
end;

In [6]:
function train_model(training, stop_criteria, λ)
    @info "training model with parameters $λ"
    λ_u, λ_a, λ_wu, λ_wa = λ
    users, items, ratings = training.user, training.item, training.rating
    weights =
        expdecay(get_counts("training"), log(λ_wu)) .*
        expdecay(get_counts("training"; by_item = true), log(λ_wa))
    u = zeros(eltype(λ_u), num_users())
    a = zeros(eltype(λ_a), num_items())

    ρ_u = zeros(eltype(u), length(u), Threads.nthreads())
    Ω_u = zeros(eltype(u), length(u), Threads.nthreads())
    ρ_a = zeros(eltype(a), length(a), Threads.nthreads())
    Ω_a = zeros(eltype(a), length(a), Threads.nthreads())

    while !stop!(stop_criteria, [u, a])
        update_users!(users, items, ratings, weights, u, a, λ_u, ρ_u, Ω_u)
        update_users!(items, users, ratings, weights, a, u, λ_a, ρ_a, Ω_a)
    end
    u, a
end;

## Training

In [7]:
function validation_mse(λ)
    λ = exp.(λ) # ensure λ is nonnegative
    stop_criteria = convergence_stopper(1e-6, max_iters = 16)
    u, a = train_model(training, stop_criteria, λ)
    r = make_prediction(validation.user, validation.item, u, a)
    residualized_loss(r, residual_alphas, implicit)
end;

In [8]:
# Find the best regularization hyperparameters
res = Optim.optimize(
    validation_mse,
    fill(0.0f0, 4),
    Optim.NewtonTrustRegion(),
    autodiff = :forward,
    Optim.Options(show_trace = true, extended_trace = true),
);
λ = exp.(Optim.minimizer(res));

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:06:29 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,1.0,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,1.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,1.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0,0.0,0.0,0.0,1.0)]
[32mProgress: 100%|███████████████████████████| Time: 0:00:01 ( 0.34 μs/it)[39m39m
[32mProgress: 100%|███████████████████████████| Time: 0:00:00 (37.44 ns/it)[39m
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:06:56 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0001966,0.001265263,4.9162406e-7,-0.0022688252,-0.00932882)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m

Iter     Function value   Gradient norm 
     0     1.820268e+00     5.538846e-02
 * time: 0.025742053985595703
 * g(x): Float32[-0.005109214, -1.1346601f-6, 0.019632269, 0.05538846]
 * reached_subproblem_solution: true
 * h(x): Float32[0.0042683696 7.7101596f-8 -0.0009896054 -0.045240797; 7.7101575f-8 -8.909593f-7 2.1172305f-7 1.124478f-5; -0.0009896058 2.117212f-7 0.0024552576 0.0019379249; -0.045240793 1.1244779f-5 0.0019380794 0.74176663]
 * x: Float32[0.0, 0.0, 0.0, 0.0]
 * lambda: NaN
 * interior: true
 * hard case: false
 * delta: 1.0


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:08:16 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0515077,1.0515077,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.000123,0.0,1.000123,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.3691843,0.0,0.0,0.3691843,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9347721,0.0,0.0,0.0,0.9347721)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:08:31 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9958476,-0.0024471707,0.00012416836,0.041969813,0.025591727)]


     1     1.820268e+00     5.538846e-02
 * time: 17.585376024246216
 * g(x): Float32[-0.005109214, -1.1346601f-6, 0.019632269, 0.05538846]
 * reached_subproblem_solution: false
 * h(x): Float32[0.0042683696 7.7101596f-8 -0.0009896054 -0.045240797; 7.7101575f-8 -8.909593f-7 2.1172305f-7 1.124478f-5; -0.0009896058 2.117212f-7 0.0024552576 0.0019379249; -0.045240793 1.1244779f-5 0.0019380794 0.74176663]
 * x: Float32[0.0, 0.0, 0.0, 0.0]
 * lambda: 0.017065741
 * interior: false
 * hard case: false
 * delta: 0.25000045


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:08:32 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0231731,1.0231731,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0000244,0.0,1.0000244,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.78652567,0.0,0.0,0.78652567,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9364305,0.0,0.0,0.0,0.9364305)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:08:47 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0022187,0.002624183,5.3669214e-6,-0.005589499,-0.025572104)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:08:47 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float3

     2     1.817539e+00     1.138831e-01
 * time: 100.06993508338928
 * g(x): Float32[0.008961319, 3.0197784f-6, -0.00998044, -0.11388312]
 * reached_subproblem_solution: false
 * h(x): Float32[0.025343893 -6.363346f-7 -0.06186454 -0.2802833; -6.3633473f-7 8.081011f-6 -7.96245f-5 -2.4741928f-5; -0.061864533 -7.962449f-5 0.20222215 0.6736025; -0.28028324 -2.4741925f-5 0.6736026 3.3538468]
 * x: Float32[0.022908619, 2.4432935f-5, -0.24012995, -0.065679945]
 * lambda: 0.0786777
 * interior: false
 * hard case: false
 * delta: 0.25000045


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:09:54 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.8855004,0.8855004,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9912029,0.0,0.9912029,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64492124,0.0,0.0,0.64492124,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9960381,0.0,0.0,0.0,0.9960381)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:10:10 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0013659,0.0020703424,6.8743857e-6,-0.0036667152,-0.020527115)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:10:10 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Floa

     3     1.814355e+00     9.281909e-03
 * time: 182.1829731464386
 * g(x): Float32[0.0010545779, 3.196487f-5, -0.0030731447, -0.009281909]
 * reached_subproblem_solution: false
 * h(x): Float32[0.01984555 -9.510403f-7 -0.05069196 -0.21984765; -9.510404f-7 4.6734098f-5 -0.0003187643 -0.00025180282; -0.05069197 -0.0003187643 0.19387342 0.55316645; -0.21984765 -0.0002518028 0.55316657 2.6651337]
 * x: Float32[-0.12160242, -0.008836024, -0.43862706, -0.003969785]
 * lambda: 0.0020634928
 * interior: false
 * hard case: false
 * delta: 0.5000009


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:11:16 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7503633,0.7503633,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.61847913,0.0,0.61847913,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64641345,0.0,0.0,0.64641345,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9854359,0.0,0.0,0.0,0.9854359)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:11:32 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012323,0.002017829,5.1326447e-6,-0.0036288053,-0.019806774)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:11:32 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Flo

     4     1.814302e+00     4.987349e-04
 * time: 264.4176380634308
 * g(x): Float32[4.078251f-5, 1.6511425f-5, -0.00015084585, -0.00049873494]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018319873 -8.862713f-7 -0.045912076 -0.2019998; -8.862712f-7 2.7302502f-5 -0.00018548746 -0.00014776865; -0.045912072 -0.00018548744 0.1747061 0.49682415; -0.2019998 -0.00014776862 0.49682415 2.4646728]
 * x: Float32[-0.28719783, -0.48049188, -0.43631598, -0.014671221]
 * lambda: 2.552265e-5
 * interior: false
 * hard case: false
 * delta: 1.0000018


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:12:38 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.74447626,0.74447626,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.33464122,0.0,0.33464122,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64566636,0.0,0.0,0.64566636,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851926,0.0,0.0,0.0,0.9851926)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:12:54 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012219,0.0020140137,2.9655369e-6,-0.003607213,-0.019734604)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:12:54 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), F

     5     1.814296e+00     3.891026e-05
 * time: 346.710440158844
 * g(x): Float32[5.060992f-8, 5.3457097f-6, -3.891026f-5, -3.057891f-5]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018271238 -7.326681f-7 -0.04584378 -0.20142907; -7.326684f-7 1.1379835f-5 -7.045927f-5 -6.400983f-5; -0.04584379 -7.045927f-5 0.1739366 0.49529612; -0.20142907 -6.400983f-5 0.4952963 2.4576008]
 * x: Float32[-0.2950743, -1.0946963, -0.43747243, -0.014918117]
 * lambda: 0.0
 * interior: true
 * hard case: false
 * delta: 1.0000018


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:14:01 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7445059,0.7445059,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.20928267,0.0,0.20928267,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6457181,0.0,0.0,0.6457181,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98518014,0.0,0.0,0.0,0.98518014)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:14:16 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012214,0.0020140007,1.9161937e-6,-0.0036009587,-0.019719845)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:14:16 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Fl

     6     1.814294e+00     1.211001e-05
 * time: 429.12921118736267
 * g(x): Float32[1.2928587f-8, 1.6971549f-6, -1.2110014f-5, -8.156211f-6]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018270303 -5.65846f-7 -0.045838594 -0.20142171; -5.658461f-7 4.792957f-6 -2.2743667f-5 -3.0704203f-5; -0.04583859 -2.2743672f-5 0.17363124 0.49494302; -0.2014217 -3.070421f-5 0.49494302 2.4572816]
 * x: Float32[-0.29503456, -1.5640695, -0.4373923, -0.014930794]
 * lambda: 0.0
 * interior: true
 * hard case: false
 * delta: 1.0000018


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:15:23 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7444482,0.7444482,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.1469047,0.0,0.1469047,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64575475,0.0,0.0,0.64575475,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851615,0.0,0.0,0.0,0.9851615)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:15:39 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012212,0.0020140028,1.3694498e-6,-0.0035977925,-0.019712046)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:15:39 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Floa

     7     1.814294e+00     4.059876e-06
 * time: 511.6463520526886
 * g(x): Float32[-4.1922817f-8, 5.442883f-7, -4.059876f-6, -2.7679807f-6]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018269695 -4.4347826f-7 -0.045835014 -0.20141673; -4.4347786f-7 1.9730621f-6 -3.1418983f-6 -1.6274098f-5; -0.04583502 -3.1418926f-6 0.17351504 0.49477392; -0.20141673 -1.6274096f-5 0.49477392 2.4571342]
 * x: Float32[-0.29511204, -1.9179711, -0.4373355, -0.014949688]
 * lambda: 0.0
 * interior: true
 * hard case: false
 * delta: 1.0000018


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:16:46 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.74446505,0.74446505,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.11148736,0.0,0.11148736,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64578676,0.0,0.0,0.64578676,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98515284,0.0,0.0,0.0,0.98515284)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:17:01 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012212,0.0020140286,1.0490612e-6,-0.0035961764,-0.019707423)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:17:01 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse)

     8     1.814294e+00     1.079623e-06
 * time: 594.1754310131073
 * g(x): Float32[-1.7768519f-8, 1.9239168f-7, -1.0331145f-6, -1.0796225f-6]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018269181 -3.582952f-7 -0.04583139 -0.20141235; -3.58295f-7 6.938755f-7 4.932676f-6 -9.224475f-6; -0.04583139 4.932678f-6 0.17346752 0.4946663; -0.20141234 -9.224473f-6 0.4946662 2.4570348]
 * x: Float32[-0.29508936, -2.193844, -0.43728596, -0.014958504]
 * lambda: 0.0
 * interior: true
 * hard case: false
 * delta: 1.0000018


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:18:08 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.74446,0.74446,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.08448604,0.0,0.08448604,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64581,0.0,0.0,0.64581,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98514456,0.0,0.0,0.0,0.98514456)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:18:24 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.001221,0.002014041,7.9930635e-7,-0.0035946944,-0.019703656)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:18:24 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Fo

     9     1.814294e+00     1.243322e-06
 * time: 676.541424036026
 * g(x): Float32[2.0588034f-9, 1.0722058f-7, -5.795566f-7, -1.2433218f-6]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018268863 -2.8463444f-7 -0.04582893 -0.2014097; -2.8463447f-7 7.741149f-9 8.359847f-6 -4.745257f-6; -0.04582893 8.359849f-6 0.17345826 0.494596; -0.2014097 -4.7452545f-6 0.49459606 2.456984]
 * x: Float32[-0.29509616, -2.471169, -0.43724993, -0.014966918]
 * lambda: 0.0
 * interior: true
 * hard case: false
 * delta: 1.0000018


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:19:31 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7445226,0.7445226,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.031080617,0.0,0.031080617,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64589816,0.0,0.0,0.64589816,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98512286,0.0,0.0,0.0,0.98512286)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:19:46 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012203,0.0020141497,2.93411e-7,-0.0035925074,-0.019698294)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:19:46 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), 

    10     1.814294e+00     2.294079e-06
 * time: 759.1934850215912
 * g(x): Float32[-1.2572309f-7, 3.047291f-7, 1.6433996f-6, -2.2940792f-6]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267184 -1.15481456f-7 -0.045817047 -0.20139275; -1.1548153f-7 -6.6580064f-8 3.3471558f-6 2.0099885f-7; -0.045817044 3.3471513f-6 0.17350867 0.49440774; -0.20139275 2.0099667f-7 0.49440768 2.4567385]
 * x: Float32[-0.29501215, -3.471171, -0.4371134, -0.014988939]
 * lambda: 1.0070112e-7
 * interior: false
 * hard case: false
 * delta: 2.0000036


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:20:53 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7447588,0.7447588,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.004206288,0.0,0.004206288,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64595217,0.0,0.0,0.64595217,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.985133,0.0,0.0,0.0,0.985133)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:21:09 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012202,0.0020142188,4.1931862e-8,-0.0035910818,-0.019695794)]


    11     1.814294e+00     2.294079e-06
 * time: 775.0819511413574
 * g(x): Float32[-1.2572309f-7, 3.047291f-7, 1.6433996f-6, -2.2940792f-6]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267184 -1.15481456f-7 -0.045817047 -0.20139275; -1.1548153f-7 -6.6580064f-8 3.3471558f-6 2.0099885f-7; -0.045817044 3.3471513f-6 0.17350867 0.49440774; -0.20139275 2.0099667f-7 0.49440768 2.4567385]
 * x: Float32[-0.29501215, -3.471171, -0.4371134, -0.014988939]
 * lambda: 2.1906693e-7
 * interior: false
 * hard case: false
 * delta: 0.50000095


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:21:09 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7446476,0.7446476,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.018851329,0.0,0.018851329,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.645902,0.0,0.0,0.645902,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851362,0.0,0.0,0.0,0.9851362)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:21:25 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012205,0.002014137,1.7817533e-7,-0.0035913845,-0.01969667)]


    12     1.814294e+00     2.294079e-06
 * time: 791.048397064209
 * g(x): Float32[-1.2572309f-7, 3.047291f-7, 1.6433996f-6, -2.2940792f-6]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267184 -1.15481456f-7 -0.045817047 -0.20139275; -1.1548153f-7 -6.6580064f-8 3.3471558f-6 2.0099885f-7; -0.045817044 3.3471513f-6 0.17350867 0.49440774; -0.20139275 2.0099667f-7 0.49440768 2.4567385]
 * x: Float32[-0.29501215, -3.471171, -0.4371134, -0.014988939]
 * lambda: 6.7604327e-7
 * interior: false
 * hard case: false
 * delta: 0.12500024


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:21:25 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7446198,0.7446198,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.027428541,0.0,0.027428541,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64588946,0.0,0.0,0.64588946,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.985137,0.0,0.0,0.0,0.985137)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:21:41 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012203,0.002014134,2.5880652e-7,-0.0035923824,-0.019698622)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:21:41 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Flo

    13     1.814294e+00     9.415919e-07
 * time: 873.287180185318
 * g(x): Float32[-1.2201027f-8, 3.0917812f-7, 1.02116644f-7, 9.415919f-7]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267391 -1.02824146f-7 -0.045818787 -0.20139556; -1.02824245f-7 -6.0775998f-9 2.4274773f-6 2.7710428f-7; -0.04581879 2.4274748f-6 0.17352986 0.4944242; -0.20139556 2.771045f-7 0.4944242 2.456737]
 * x: Float32[-0.29488155, -3.5961711, -0.43712693, -0.014974568]
 * lambda: 2.5039496e-6
 * interior: false
 * hard case: false
 * delta: 0.25000048


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:22:47 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.74460965,0.74460965,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.017539736,0.0,0.017539736,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6458988,0.0,0.0,0.6458988,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851327,0.0,0.0,0.0,0.9851327)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:23:03 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012205,0.0020141422,1.6595192e-7,-0.0035917275,-0.019697124)]


    14     1.814294e+00     9.415919e-07
 * time: 888.9959580898285
 * g(x): Float32[-1.2201027f-8, 3.0917812f-7, 1.02116644f-7, 9.415919f-7]
 * reached_subproblem_solution: false
 * h(x): Float32[0.018267391 -1.02824146f-7 -0.045818787 -0.20139556; -1.02824245f-7 -6.0775998f-9 2.4274773f-6 2.7710428f-7; -0.04581879 2.4274748f-6 0.17352986 0.4944242; -0.20139556 2.771045f-7 0.4944242 2.456737]
 * x: Float32[-0.29488155, -3.5961711, -0.43712693, -0.014974568]
 * lambda: 1.2428651e-6
 * interior: false
 * hard case: false
 * delta: 0.111778796


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:23:03 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7445935,0.7445935,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.024527756,0.0,0.024527756,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6458909,0.0,0.0,0.6458909,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98513335,0.0,0.0,0.0,0.98513335)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:23:18 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012205,0.0020141418,2.314462e-7,-0.00359227,-0.019698188)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:23:19 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Flo

    15     1.814294e+00     3.073828e-07
 * time: 971.0391821861267
 * g(x): Float32[5.1365396f-9, 3.0707287f-7, 6.306917f-8, -3.073828f-7]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267423 -9.267179f-8 -0.04581879 -0.201396; -9.2671776f-8 4.279728f-8 1.6944659f-6 3.1180542f-7; -0.04581879 1.6944668f-6 0.17353916 0.4944239; -0.201396 3.118074f-7 0.4944239 2.456746]
 * x: Float32[-0.2949168, -3.7079499, -0.4371247, -0.014978281]
 * lambda: 2.7721317e-6
 * interior: false
 * hard case: false
 * delta: 0.22355759


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:24:25 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7446055,0.7446055,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.01961408,0.0,0.01961408,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6458937,0.0,0.0,0.6458937,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851339,0.0,0.0,0.0,0.9851339)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:24:40 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012205,0.0020141476,1.8532127e-7,-0.003592046,-0.019697556)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:24:41 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float

    16     1.814294e+00     3.410076e-07
 * time: 1053.217609167099
 * g(x): Float32[1.2996632f-8, 2.8837513f-7, 1.7684073f-7, 3.4100762f-7]
 * reached_subproblem_solution: true
 * h(x): Float32[0.01826736 -7.523616f-8 -0.045818504 -0.20139554; -7.523619f-8 1.1942971f-7 5.224767f-7 3.2324917f-7; -0.045818504 5.2247844f-7 0.17355369 0.49441957; -0.20139556 3.232486f-7 0.49441966 2.4567268]
 * x: Float32[-0.29490075, -3.9315076, -0.43712032, -0.014977694]
 * lambda: 1.3308039e-6
 * interior: false
 * hard case: false
 * delta: 0.44711518


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:25:47 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7445754,0.7445754,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.012542624,0.0,0.012542624,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6458927,0.0,0.0,0.6458927,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851309,0.0,0.0,0.0,0.9851309)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:26:03 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012205,0.0020141178,1.1956595e-7,-0.003591307,-0.01969589)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:26:03 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Floa

    17     1.814294e+00     4.317066e-07
 * time: 1135.1349802017212
 * g(x): Float32[-1.471172f-8, 2.166688f-7, 2.7649378f-7, -4.317066f-7]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267374 -4.9533945f-8 -0.045818932 -0.20139612; -4.9533938f-8 1.8264393f-7 -6.956369f-7 2.5433914f-7; -0.045818932 -6.956352f-7 0.17357427 0.49442282; -0.20139614 2.5434062f-7 0.49442294 2.4567351]
 * x: Float32[-0.29494122, -4.3786225, -0.4371219, -0.014980772]
 * lambda: 5.255414e-7
 * interior: false
 * hard case: false
 * delta: 0.89423037


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:27:09 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.74454886,0.74454886,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.0051289573,0.0,0.0051289573,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.64588207,0.0,0.0,0.64588207,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.98513156,0.0,0.0,0.0,0.98513156)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:27:24 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.001221,0.0020141003,5.070526e-8,-0.0035909053,-0.01969515)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:27:25 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse

    18     1.814293e+00     1.328661e-07
 * time: 1217.122218132019
 * g(x): Float32[-1.9621442f-8, 7.709684f-8, 1.3286609f-7, -3.7881982f-8]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267551 -2.1297124f-8 -0.04582085 -0.20139855; -2.1297136f-8 1.0946719f-7 -6.284979f-7 1.2168638f-7; -0.04582085 -6.284969f-7 0.17358987 0.49444163; -0.20139855 1.2168607f-7 0.4944417 2.4567702]
 * x: Float32[-0.29497683, -5.272853, -0.43713835, -0.014980109]
 * lambda: 5.9667435e-8
 * interior: false
 * hard case: false
 * delta: 1.7884607


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:28:31 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.7445419,0.7445419,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.0025594467,0.0,0.0025594467,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6458765,0.0,0.0,0.6458765,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851325,0.0,0.0,0.0,0.9851325)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:28:47 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012202,0.002014087,2.606174e-8,-0.0035906197,-0.019694373)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:28:47 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Fl

    19     1.814293e+00     5.850910e-07
 * time: 1299.9149861335754
 * g(x): Float32[1.973485f-8, 2.5867724f-8, -3.2472562f-7, -5.8509096f-7]
 * reached_subproblem_solution: true
 * h(x): Float32[0.01826771 -1.0921008f-8 -0.04582205 -0.20140047; -1.0921013f-8 4.406097f-8 -2.1893514f-7 6.77815f-8; -0.04582205 -2.189353f-7 0.1735949 0.4944557; -0.20140047 6.7781144f-8 0.49445552 2.4567964]
 * x: Float32[-0.29498613, -5.967964, -0.43714696, -0.014979091]
 * lambda: 0.0
 * interior: true
 * hard case: false
 * delta: 1.7884607


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:29:54 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.744561,0.744561,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.0014392852,0.0,0.0014392852,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6458784,0.0,0.0,0.6458784,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851343,0.0,0.0,0.0,0.9851343)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:30:09 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012208,0.0020140908,1.4924186e-8,-0.0035908192,-0.019694353)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:30:10 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Fl

    20     1.814293e+00     1.566365e-07
 * time: 1382.7439241409302
 * g(x): Float32[3.489497f-12, 9.23058f-9, 9.725228f-8, 1.5663655f-7]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267618 -6.2314927f-9 -0.04582158 -0.20139964; -6.231494f-9 1.7257607f-8 -5.3962985f-8 4.0998497f-8; -0.04582158 -5.3963f-8 0.17359123 0.49444947; -0.20139964 4.0998497f-8 0.49444947 2.456784]
 * x: Float32[-0.29496047, -6.5436087, -0.4371441, -0.014977308]
 * lambda: 0.0
 * interior: true
 * hard case: false
 * delta: 1.7884607


[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:31:17 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.74455047,0.74455047,0.0,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.00085658807,0.0,0.00085658807,0.0,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.6458768,0.0,0.0,0.6458768,0.0), Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(0.9851336,0.0,0.0,0.0,0.9851336)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:31:32 regression coefficients: ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}, Float32, 4}[Dual{ForwardDiff.Tag{typeof(validation_mse), Float32}}(1.0012202,0.0020140829,8.9824255e-9,-0.0035905798,-0.019694276)]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:31:33 training model with parameters ForwardDiff.Dual{ForwardDiff.Tag{typeof(validation_ms

    21     1.814293e+00     1.828915e-07
 * time: 1465.510311126709
 * g(x): Float32[8.4637755f-9, 3.3822876f-9, -1.4869445f-7, 1.8289153f-7]
 * reached_subproblem_solution: true
 * h(x): Float32[0.018267673 -3.7404218f-9 -0.045821976 -0.2014; -3.740422f-9 6.7599504f-9 3.444311f-10 2.5551211f-8; -0.04582198 3.446117f-10 0.17359361 0.49445462; -0.20140001 2.5551222f-8 0.4944547 2.4567866]
 * x: Float32[-0.29497463, -7.0625534, -0.43714643, -0.014978052]
 * lambda: 0.0
 * interior: true
 * hard case: false
 * delta: 1.7884607


In [9]:
@info "The optimal λ is $λ, found in " * repr(Optim.f_calls(res)) * " function calls"

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:32:40 The optimal λ is Float32[0.74455047, 0.00085658807, 0.6458768, 0.9851336], found in 22 function calls


In [15]:
stop_criteria = convergence_stopper(1e-6, max_iters = 16)
u, a = train_model(training, stop_criteria, λ);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:39:18 training model with parameters Float32[0.74455047, 0.00085658807, 0.6458768, 0.9851336]


In [16]:
#[ Info: 20220522 21:58:57 The optimal λ is Float32[0.74455047, 0.00085658807, 0.6458768, 0.9851336], found in 22 function calls

In [17]:
validation_mse(Optim.minimizer(res))

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:39:22 training model with parameters Float32[0.74455047, 0.00085658807, 0.6458768, 0.9851336]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:39:26 regression coefficients: Float32[0.99968237]


1.8145334f0

## Inference

In [None]:
model(users, items) = make_prediction(users, items, u, a)
write_alpha(model, residual_alphas, implicit);

[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:40:06 regression coefficients: Float32[0.99968237]
[38;5;6m[1m[ [22m[39m[38;5;6m[1mInfo: [22m[39m20220524 23:40:06 validation loss: 1.8145334


In [None]:
write_params(Dict("u" => u, "a" => a, "λ" => λ));