# Notebook to compute the state evolution for ridge regression and different resampling

**Pour ridge regression, le lambda optimal est constant a $\sigma^2$**

In [1]:
using Pkg
Pkg.instantiate()

In [3]:
# using RidgeBootstrapStateEvolution
using BootstrapAsymptotics
using Plots
using JSON
using Revise

In [4]:
alpha_range    = exp10.(range(-1.0, stop=3.0, length=50))
lambda         = 1.0
noise_variance = 1.0
max_weight     = 20
rtol           = 1e-20
max_iteration  = 10000

10000

In [5]:
bootstrap_bootstrap_overlaps = []
bootstrap_bootstrap_hat_overlaps = []

for alpha in alpha_range
    problem = Ridge(α = alpha, Δ = noise_variance, ρ = 1.0, λ = lambda)
    result = BootstrapAsymptotics.state_evolution(problem, PairBootstrap(p_max = max_weight), PairBootstrap(p_max = max_weight), rtol=rtol, max_iteration=max_iteration)
    push!(bootstrap_bootstrap_overlaps, result[1])
    push!(bootstrap_bootstrap_hat_overlaps, result[2])
end

In [7]:
bootstrap_bootstrap_filename::String = "data/ridge/ridge_bootstrap_bootstrap_overlaps_lambda=$lambda.json"

dict_to_save::Dict = Dict("alpha" => alpha_range,
                          "m"   => [o.m[1] for o in bootstrap_bootstrap_overlaps],
                          "q_diag"   => [o.Q[1, 1] for o in bootstrap_bootstrap_overlaps],
                          "q_off_diag"   => [o.Q[1, 2] for o in bootstrap_bootstrap_overlaps],
                          "v"   => [o.V[1] for o in bootstrap_bootstrap_overlaps],
                          "mhat"=> [o.m[1] for o in bootstrap_bootstrap_hat_overlaps],
                          "qhat_diag"=> [o.Q[1, 1] for o in bootstrap_bootstrap_hat_overlaps],
                          "qhat_off_diag"=> [o.Q[1, 2] for o in bootstrap_bootstrap_hat_overlaps],
                          "vhat"=> [o.V[1] for o in bootstrap_bootstrap_hat_overlaps],
                          )
open(bootstrap_bootstrap_filename, "w") do f
    write(f, JSON.json(dict_to_save))
end
## 


8632

In [8]:
bootstrap_full_overlaps = []
bootstrap_full_hat_overlaps = []

for alpha in alpha_range
    # result = RidgeBootstrapStateEvolution.state_evolution_bootstrap_bootstrap_full(alpha, lambda, noise_variance, max_weight=30, relative_tolerance=1e-20, max_iteration=10000)
    problem = Ridge(α = alpha, Δ = noise_variance, ρ = 1.0, λ = lambda)
    result = BootstrapAsymptotics.state_evolution(problem, PairBootstrap(p_max = max_weight), FullResampling(), rtol=rtol, max_iteration=max_iteration)
    push!(bootstrap_full_overlaps, result[1])
    push!(bootstrap_full_hat_overlaps, result[2])
end

In [9]:
bootstrap_full_filename::String = "data/ridge/ridge_bootstrap_full_overlaps_lambda=$lambda.json"

dict_to_save::Dict = Dict("alpha" => alpha_range,
                          "q_off_diag" => [o.Q[1, 2] for o in bootstrap_full_overlaps],
                          
                          "qhat_off_diag" => [o.Q[1, 2] for o in bootstrap_full_hat_overlaps],
                          )

open(bootstrap_full_filename, "w") do f
    write(f, JSON.json(dict_to_save))
end

2843

---

# Do full-full correlation 

In [10]:
full_full_overlaps = []
full_full_hat_overlaps = []

for alpha in alpha_range
    # result = RidgeBootstrapStateEvolution.state_evolution_bootstrap_bootstrap_full(alpha, lambda, noise_variance, max_weight=30, relative_tolerance=1e-20, max_iteration=10000)
    problem = Ridge(α = alpha, Δ = noise_variance, ρ = 1.0, λ = lambda)
    result = BootstrapAsymptotics.state_evolution(problem, FullResampling(), FullResampling(), rtol=rtol, max_iteration=max_iteration)
    push!(full_full_overlaps, result[1])
    push!(full_full_hat_overlaps, result[2])
end

In [11]:
filename::String = "data/ridge/ridge_full_full_overlaps_lambda=$lambda.json"

dict_to_save::Dict = Dict("alpha" => alpha_range,
                          "m"   => [o.m[1] for o in full_full_overlaps],
                          "q_diag"   => [o.Q[1, 1] for o in full_full_overlaps],
                          "q_off_diag"   => [o.Q[1, 2] for o in full_full_overlaps],
                          "v"   => [o.V[1] for o in full_full_overlaps],
                          "mhat"=> [o.m[1] for o in full_full_hat_overlaps],
                          "qhat_diag"=> [o.Q[1, 1] for o in full_full_hat_overlaps],
                          "qhat_off_diag"=> [o.Q[1, 2] for o in full_full_hat_overlaps],
                          "vhat"=> [o.V[1] for o in full_full_hat_overlaps],
                          )

open(filename, "w") do f
    write(f, JSON.json(dict_to_save))
end

7898

# y-resampling correlation

In [12]:
label_label_overlaps = []
label_label_hat_overlaps = []

for alpha in alpha_range
    problem = Ridge(α = alpha, Δ = noise_variance, ρ = 1.0, λ = lambda)
    result = BootstrapAsymptotics.state_evolution(problem, LabelResampling(), LabelResampling(), rtol=rtol, max_iteration=max_iteration)
    push!(label_label_overlaps, result[1])
    push!(label_label_hat_overlaps, result[2])
end

In [13]:
filename::String = "data/ridge/ridge_label_label_overlaps_lambda=$lambda.json"


dict_to_save::Dict = Dict("alpha" => alpha_range,
                          "m"   => [o.m[1] for o in label_label_overlaps],
                          "q_diag"   => [o.Q[1, 1] for o in label_label_overlaps],
                          "q_off_diag"   => [o.Q[1, 2] for o in label_label_overlaps],
                          "v"   => [o.V[1] for o in label_label_overlaps],
                          "mhat"=> [o.m[1] for o in label_label_hat_overlaps],
                          "qhat_diag"=> [o.Q[1, 1] for o in label_label_hat_overlaps],
                          "qhat_off_diag"=> [o.Q[1, 2] for o in label_label_hat_overlaps],
                          "vhat"=> [o.V[1] for o in label_label_hat_overlaps],
                          )

open(filename, "w") do f
    write(f, JSON.json(dict_to_save))
end

8703

---

# Parametric residual bootstrap

We compute the variance of the residuals, for ridge it's just the training error 

1) On calcule l'overlap de ridge regression 
2) On calcule le bootstrap resampling en utilisant $\rho = q_{\rm erm}$ et $\Delta = \varepsilon_{\rm train}$

In [14]:
function get_train_error(m, q, v, noise_variance; rho = 1.0)
    return (rho + noise_variance + q - 2.0 * m) / (1+v)^2
end

get_train_error (generic function with 1 method)

In [15]:
erm_overlaps = []
residual_residual_overlaps = []
residual_residual_hat_overlaps = []
train_errors = []

for alpha in alpha_range
    problem = Ridge(α = alpha, Δ = noise_variance, ρ = 1.0, λ = lambda)
    result = BootstrapAsymptotics.state_evolution(problem, FullResampling(), FullResampling(), rtol=rtol, max_iteration=max_iteration)
    push!(erm_overlaps, result[1])
    train_error = get_train_error(result[1].m[1], result[1].Q[1, 1], result[1].V[1], noise_variance, rho = 1.0)
    problem_erm = Ridge(α = alpha, Δ = train_error, ρ = result[1].Q[1, 1], λ = lambda)
    result_2 = BootstrapAsymptotics.state_evolution(problem_erm, LabelResampling(), LabelResampling(), rtol=rtol, max_iteration=max_iteration)
    push!(residual_residual_overlaps,     result_2[1])
    push!(residual_residual_hat_overlaps, result_2[2])
end

In [16]:
to_save = Dict([
    "alpha" => alpha_range,
    "q_diag" => [o.Q[1, 1] for o in residual_residual_overlaps],
    "q_off_diag" => [o.Q[1, 2] for o in residual_residual_overlaps],
    "m" => [o.m[1] for o in residual_residual_overlaps],
])

filename::String = "data/ridge/ridge_residual_residual_overlaps_lambda=$lambda.json"

open(filename, "w") do f
    write(f, JSON.json(to_save))
end

3841

--- 

### Subsampling

In [12]:
# pick = 0.99 for jacknife
proba = 0.8

subsampling_subsampling_overlaps = []
subsampling_subsampling_hat_overlaps = []

for alpha in alpha_range
    problem = Ridge(α = alpha, Δ = noise_variance, ρ = 1.0, λ = lambda)
    result = BootstrapAsymptotics.state_evolution(problem, Subsampling(r = proba), Subsampling(r = proba), rtol=0.0, max_iteration=100_000)
    push!(subsampling_subsampling_overlaps, result[1])
    push!(subsampling_subsampling_hat_overlaps, result[2])
end

In [13]:
filename::String = "data/ridge/ridge_subsampling_subsampling_overlaps_lambda=$(lambda)_p=$(proba).json"


dict_to_save::Dict = Dict("alpha" => alpha_range,
                          "proba" => proba,
                          "m"   => [o.m[1] for o in subsampling_subsampling_overlaps],
                          "q_diag"   => [o.Q[1, 1] for o in subsampling_subsampling_overlaps],
                          "q_off_diag"   => [o.Q[1, 2] for o in subsampling_subsampling_overlaps],
                          "v"   => [o.V[1] for o in subsampling_subsampling_overlaps],
                          "mhat"=> [o.m[1] for o in subsampling_subsampling_hat_overlaps],
                          "qhat_diag"=> [o.Q[1, 1] for o in subsampling_subsampling_hat_overlaps],
                          "qhat_off_diag"=> [o.Q[1, 2] for o in subsampling_subsampling_hat_overlaps],
                          "vhat"=> [o.V[1] for o in subsampling_subsampling_hat_overlaps],
                          )

open(filename, "w") do f
    write(f, JSON.json(dict_to_save))
end

8647

In [14]:
subsampling_full_overlaps = []
subsampling_full_hat_overlaps = []

for alpha in alpha_range
    problem = Ridge(α = alpha, Δ = noise_variance, ρ = 1.0, λ = lambda)
    result = BootstrapAsymptotics.state_evolution(problem, Subsampling(r = proba),FullResampling(), rtol=0.0, max_iteration=100_000)
    push!(subsampling_full_overlaps, result[1])
    push!(subsampling_full_hat_overlaps, result[2])
end

In [15]:
filename::String = "data/ridge/ridge_subsampling_full_overlaps_lambda=$(lambda)_p=$(proba).json"


dict_to_save::Dict = Dict("alpha" => alpha_range,
                          "proba" => proba,
                          "m"   => [o.m[1] for o in subsampling_full_overlaps],
                          "q_diag"   => [o.Q[1, 1] for o in subsampling_full_overlaps],
                          "q_off_diag"   => [o.Q[1, 2] for o in subsampling_full_overlaps],
                          "v"   => [o.V[1] for o in subsampling_full_overlaps],
                          "mhat"=> [o.m[1] for o in subsampling_full_hat_overlaps],
                          "qhat_diag"=> [o.Q[1, 1] for o in subsampling_full_hat_overlaps],
                          "qhat_off_diag"=> [o.Q[1, 2] for o in subsampling_full_hat_overlaps],
                          "vhat"=> [o.V[1] for o in subsampling_full_hat_overlaps],
                          )

open(filename, "w") do f
    write(f, JSON.json(dict_to_save))
end

8640

--- 

# Bayes-optimal

In [10]:
bayes_optimal_overlaps = []
bayes_optimal_hat_overlaps = []

for alpha in alpha_range
    # for ridge, b.o is at lambda optimal = noise_variance here
    problem = Ridge(α = alpha, Δ = noise_variance, ρ = 1.0, λ = noise_variance)
    result = BootstrapAsymptotics.state_evolution(problem, FullResampling(), FullResampling(), rtol=rtol, max_iteration=max_iteration)
    push!(bayes_optimal_overlaps, result[1])
    push!(bayes_optimal_hat_overlaps, result[2])
end

In [11]:
filename::String = "data/ridge/ridge_bayes_optimal_overlaps.json"


dict_to_save::Dict = Dict("alpha" => alpha_range,
                          "m"   => [o.m[1] for o in bayes_optimal_overlaps],
                          "q"   => [o.Q[1, 1] for o in bayes_optimal_overlaps],
                          "v"   => [o.V[1] for o in bayes_optimal_overlaps],
                          )

open(filename, "w") do f
    write(f, JSON.json(dict_to_save))
end

3849