diff --git a/.gitignore b/.gitignore index a960a8b..8524870 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ docs/src/**/*.html docs/src/**/*.ipynb docs/src/**/*Manifest.toml docs/src_stash/*.ipynb +docs/src/tutorials/test.* +docs/src/tutorials/tmp* + diff --git a/Project.toml b/Project.toml index e4a390d..c788bd0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "HybridVariationalInference" uuid = "a108c475-a4e2-4021-9a84-cfa7df242f64" authors = ["Thomas Wutzler and contributors"] -version = "1.0.0-DEV" +version = "0.2" [deps] Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" @@ -10,15 +10,20 @@ ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" DistributionFits = "45214091-1ed4-4409-9bcf-fdb48a05e921" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" @@ -26,6 +31,7 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [weakdeps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" @@ -47,17 +53,22 @@ ChainRulesCore = "1.25" Combinatorics = "1.0.2" CommonSolve = "0.2.4" ComponentArrays = "0.15.19" +DifferentiationInterface = "0.6.54, 0.7" DistributionFits = "0.3.9" Distributions = "0.25.117" FillArrays = "1.13.0" Flux = "0.14, 0.15, 0.16" Functors = "0.4, 0.5" GPUArraysCore = "0.1, 0.2" +KernelAbstractions = "0.9.34" LinearAlgebra = "1.10" +LogExpFunctions = "0.3.29" Lux = "1.4.2" MLDataDevices = "1.5, 1.6" MLUtils = "0.4.5" Missings = "1.2.0" +NaNMath = "1.1.3" +Optimisers = "0.4.6" Optimization = "3.19.3, 4" Random = "1.10.0" SimpleChains = "0.4" @@ -66,6 +77,7 @@ StaticArrays = "1.9.13" StatsBase = "0.34.4" StatsFuns = "1.3.2" Test = "1.10" +Zygote = "0.7.10" julia = "1.10" [workspace] diff --git a/dev/doubleMM.jl b/dev/doubleMM.jl index 021b936..428a7eb 100644 --- a/dev/doubleMM.jl +++ b/dev/doubleMM.jl @@ -42,7 +42,7 @@ train_dataloader = MLUtils.DataLoader( (xM, xP, y_o, y_unc, 1:n_site); batchsize = n_batch, partial = false) σ_o = exp.(y_unc[:, 1] / 2) -# assign the train_loader, otherwise it eatch time creates another version of synthetic data +# assign the train_loader, otherwise it each time creates another version of synthetic data prob0 = HybridProblem(prob0_; train_dataloader) #tmp = HVI.get_hybridproblem_ϕunc(prob0; scenario) #prob0.covar @@ -248,7 +248,7 @@ end (y2_K1global, θsP2_K1global, θsMs2_K1global) = (y, θsP, θsMs); end -() -> begin # otpimize using LUX +() -> begin # optimize using LUX #using Lux g_lux = Lux.Chain( # dense layer with bias that maps to 8 outputs and applies `tanh` activation @@ -560,7 +560,8 @@ end end #ζi = first(eachrow(Array(chain))) -f_allsites = get_hybridproblem_PBmodel(prob0; scenario, use_all_sites = true) +f = get_hybridproblem_PBmodel(probc; scenario) +f_allsites = create_nsite_applicator(f, n_site) #ζs = mapreduce(ζi -> transposeMs(ζi, intm_PMs_gen, true), hcat, eachrow(Array(chain))); ζsP = Array(chain)[:,1:n_θP]' ζsMst = reshape(Array(chain)[:,(n_θP+1) : end], n_sample_NUTS, n_site, n_θM) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 4fff7ac..6d3e418 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -3,6 +3,10 @@ project: render: - src/tutorials/basic_cpu.qmd - src/tutorials/*.qmd +# julia: +# # workaround for quarto not pciking up the correct environment variable +# # https://github.com/quarto-dev/quarto-cli/issues/13416#issuecomment-3333700286 +# env: ["JULIA_DEPOT_PATH=/User/homes/twutz/scratch/twutz/julia_gpu_depots"] diff --git a/docs/src/explanation/theory_hvi.md b/docs/src/explanation/theory_hvi.md index 18053a7..60834a0 100644 --- a/docs/src/explanation/theory_hvi.md +++ b/docs/src/explanation/theory_hvi.md @@ -28,7 +28,7 @@ In order to learn $\phi_g$, the user needs to provide a batch of $i \in \{1 \ldo ## Estimation using the ELBO In order to find the parameters of the approximation of the posterior, HVI -minizes the KL divergence between the approximation and the true posterior. +minimizes the KL divergence between the approximation and the true posterior. This is achieve by maximizing the evidence lower bound (ELBO). $$\mathcal{L}(\phi) = \mathbb{E}_{q(\theta)} \left[\log p(y,\theta) \right] - \mathbb{E}_{q(\theta)} \left[\log q(\theta) \right]$$ @@ -128,7 +128,7 @@ $\phi = (\phi_P, \phi_g, \phi_u)$, comprises - $\phi_P = \mu_{\zeta_P}$: the means of the distributions of the transformed global parameters, - $\phi_g$: the parameters of the machine learning model, and -- $\phi_u$: paramerization of $\Sigma_\zeta$ that is additional to the means. +- $\phi_u$: parameterization of $\Sigma_\zeta$ that is additional to the means. ### Details Specifically, $\phi_u= (log\sigma^2_P, log\sigma^2_{M0}, log\sigma^2_{M\eta}, a_P, a_M)$, diff --git a/docs/src/tutorials/basic_cpu.md b/docs/src/tutorials/basic_cpu.md index e1a80c9..c3475ec 100644 --- a/docs/src/tutorials/basic_cpu.md +++ b/docs/src/tutorials/basic_cpu.md @@ -27,7 +27,8 @@ The example process based model (PBM) predicts a double-monod constrained rate for different substrate concentrations, `S1`, and `S2`. $$ -y = r_0+ r_1 \frac{S_1}{K_1 + S_1} \frac{S_2}{K_2 + S_2}$$ +y = r_0+ r_1 \frac{S_1}{K_1 + S_1} \frac{S_2}{K_2 + S_2} +$$ ``` julia function f_doubleMM(θc::CA.ComponentVector{ET}, x) where ET @@ -49,7 +50,7 @@ access the components by its symbolic names in the provided `ComponentArray`. HVI requires the evaluation of the likelihood of the predictions. It corresponds to the cost of predictions given some observations. -The user specifies a function of the negative log-Likehood +The user specifies a function of the negative log-Likelihood `neg_logden(obs, pred, uncertainty_parameters)`, where all of the parameters are arrays with columns for sites. @@ -229,10 +230,10 @@ given a vector of global parameters, and a matrix of site parameters to invocation of the process based model (PBM), defined at the beginning. ``` julia -f_batch = f_allsites = PBMSiteApplicator(f_doubleMM; θP, θM, θFix, xPvec=xP[:,1]) +f_batch = PBMSiteApplicator(f_doubleMM; θP, θM, θFix, xPvec=xP[:,1]) prob = HybridProblem(θP, θM, g_chain_scaled, ϕg0, - f_batch, f_allsites, priors_dict, py, + f_batch, priors_dict, py, transM, transP, train_dataloader, n_covar, n_site, n_batch) ``` @@ -241,6 +242,11 @@ prob = HybridProblem(θP, θM, g_chain_scaled, ϕg0, Eventually, having assembled all the moving parts of the HVI, we can perform the inversion. +``` julia +# silence warning of no GPU backend found (because we did not import CUDA here) +ENV["MLDATADEVICES_SILENCE_WARN_NO_GPU"] = 1 +``` + ``` julia using OptimizationOptimisers import Zygote @@ -313,8 +319,7 @@ The HVI Problem needs to be updated with this new applicatior. ``` julia f_batch = PBMPopulationApplicator(f_doubleMM_sites, n_batch; θP, θM, θFix, xPvec=xP[:,1]) -f_allsites = PBMPopulationApplicator(f_doubleMM_sites, n_site; θP, θM, θFix, xPvec=xP[:,1]) -probo_sites = HybridProblem(probo; f_batch, f_allsites) +probo_sites = HybridProblem(probo; f_batch) ``` For numerical efficiency, the number of sites within one batch is part of the @@ -345,8 +350,7 @@ module `Main` to allow for easier reloading with JLD2. ``` julia f_batch = PBMPopulationApplicator(DoubleMM.f_doubleMM_sites, n_batch; θP, θM, θFix, xPvec=xP[:,1]) -f_allsites = PBMPopulationApplicator(DoubleMM.f_doubleMM_sites, n_site; θP, θM, θFix, xPvec=xP[:,1]) -probo2 = HybridProblem(probo; f_batch, f_allsites) +probo2 = HybridProblem(probo; f_batch) ``` ``` julia diff --git a/docs/src/tutorials/basic_cpu.qmd b/docs/src/tutorials/basic_cpu.qmd index 37d8449..39cf0a5 100644 --- a/docs/src/tutorials/basic_cpu.qmd +++ b/docs/src/tutorials/basic_cpu.qmd @@ -36,7 +36,8 @@ The example process based model (PBM) predicts a double-monod constrained rate for different substrate concentrations, `S1`, and `S2`. $$ -y = r_0+ r_1 \frac{S_1}{K_1 + S_1} \frac{S_2}{K_2 + S_2}$$ +y = r_0+ r_1 \frac{S_1}{K_1 + S_1} \frac{S_2}{K_2 + S_2} +$$ ```{julia} function f_doubleMM(θc::CA.ComponentVector{ET}, x) where ET @@ -58,7 +59,7 @@ access the components by its symbolic names in the provided `ComponentArray`. HVI requires the evaluation of the likelihood of the predictions. It corresponds to the cost of predictions given some observations. -The user specifies a function of the negative log-Likehood +The user specifies a function of the negative log-Likelihood `neg_logden(obs, pred, uncertainty_parameters)`, where all of the parameters are arrays with columns for sites. @@ -239,10 +240,10 @@ given a vector of global parameters, and a matrix of site parameters to invocation of the process based model (PBM), defined at the beginning. ```{julia} -f_batch = f_allsites = PBMSiteApplicator(f_doubleMM; θP, θM, θFix, xPvec=xP[:,1]) +f_batch = PBMSiteApplicator(f_doubleMM; θP, θM, θFix, xPvec=xP[:,1]) prob = HybridProblem(θP, θM, g_chain_scaled, ϕg0, - f_batch, f_allsites, priors_dict, py, + f_batch, priors_dict, py, transM, transP, train_dataloader, n_covar, n_site, n_batch) ``` @@ -265,7 +266,7 @@ y1 = f_batch(CA.getdata(θP), CA.getdata(θMs), CA.getdata(x_batch))[2] #using Cthulhu #@descend_code_warntype f_batch(CA.getdata(θP), CA.getdata(θMs), CA.getdata(x_batch)) prob0 = HVI.DoubleMM.DoubleMMCase() - f_batch0 = get_hybridproblem_PBmodel(prob0; use_all_sites = false) + f_batch0 = get_hybridproblem_PBmodel(prob0) y1f = f_batch0(θP, θMs, x_batch)[2] y1 .- y1f # equal end @@ -276,6 +277,11 @@ end Eventually, having assembled all the moving parts of the HVI, we can perform the inversion. +```{julia} +# silence warning of no GPU backend found (because we did not import CUDA here) +ENV["MLDATADEVICES_SILENCE_WARN_NO_GPU"] = 1 +``` + ```{julia} using OptimizationOptimisers import Zygote @@ -349,8 +355,7 @@ The HVI Problem needs to be updated with this new applicatior. ```{julia} f_batch = PBMPopulationApplicator(f_doubleMM_sites, n_batch; θP, θM, θFix, xPvec=xP[:,1]) -f_allsites = PBMPopulationApplicator(f_doubleMM_sites, n_site; θP, θM, θFix, xPvec=xP[:,1]) -probo_sites = HybridProblem(probo; f_batch, f_allsites) +probo_sites = HybridProblem(probo; f_batch) ``` For numerical efficiency, the number of sites within one batch is part of the @@ -380,8 +385,7 @@ module `Main` to allow for easier reloading with JLD2. ```{julia} f_batch = PBMPopulationApplicator(DoubleMM.f_doubleMM_sites, n_batch; θP, θM, θFix, xPvec=xP[:,1]) -f_allsites = PBMPopulationApplicator(DoubleMM.f_doubleMM_sites, n_site; θP, θM, θFix, xPvec=xP[:,1]) -probo2 = HybridProblem(probo; f_batch, f_allsites) +probo2 = HybridProblem(probo; f_batch) ``` ```{julia} @@ -397,4 +401,4 @@ end #| eval: false #| echo: false probo = load(fname, "probo"; iotype = IOStream); -``` \ No newline at end of file +``` diff --git a/docs/src/tutorials/blocks_corr.md b/docs/src/tutorials/blocks_corr.md index 54da1fe..27a8629 100644 --- a/docs/src/tutorials/blocks_corr.md +++ b/docs/src/tutorials/blocks_corr.md @@ -1,4 +1,4 @@ -# How to model indenpendent parameter-blocks in the posterior +# How to model independent parameter-blocks in the posterior ``` @meta diff --git a/docs/src/tutorials/blocks_corr.qmd b/docs/src/tutorials/blocks_corr.qmd index 380903a..16a0323 100644 --- a/docs/src/tutorials/blocks_corr.qmd +++ b/docs/src/tutorials/blocks_corr.qmd @@ -1,5 +1,5 @@ --- -title: "How to model indenpendent parameter-blocks in the posterior" +title: "How to model independent parameter-blocks in the posterior" engine: julia execute: echo: true diff --git a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-11-output-1.png b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-11-output-1.png index b5f5d20..67efac2 100644 Binary files a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-11-output-1.png and b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-11-output-1.png differ diff --git a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-12-output-1.png b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-12-output-1.png index 80db561..7214bb1 100644 Binary files a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-12-output-1.png and b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-12-output-1.png differ diff --git a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-13-output-1.png b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-13-output-1.png index d1f156b..b8f9977 100644 Binary files a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-13-output-1.png and b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-13-output-1.png differ diff --git a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-10-output-1.png b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-10-output-1.png index 8ee22fb..d56fea8 100644 Binary files a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-10-output-1.png and b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-10-output-1.png differ diff --git a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-11-output-1.png b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-11-output-1.png index b8d8e64..fd0d188 100644 Binary files a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-11-output-1.png and b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-11-output-1.png differ diff --git a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-12-output-1.png b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-12-output-1.png index 7aa7d4d..606f068 100644 Binary files a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-12-output-1.png and b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-12-output-1.png differ diff --git a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-9-output-1.png b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-9-output-1.png index 1b863b6..6c3e05f 100644 Binary files a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-9-output-1.png and b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-9-output-1.png differ diff --git a/docs/src/tutorials/inspect_results.md b/docs/src/tutorials/inspect_results.md index 0c41fe8..7849c63 100644 --- a/docs/src/tutorials/inspect_results.md +++ b/docs/src/tutorials/inspect_results.md @@ -39,7 +39,7 @@ using function [`sample_posterior`](@ref). using StableRNGs rng = StableRNG(112) n_sample_pred = 400 -(; θsP, θsMs) = sample_posterior(rng, probo; n_sample_pred) +(; θsP, θsMs) = sample_posterior(rng, probo; n_sample_pred, is_testmode = true) ``` Lets look at the results. diff --git a/docs/src/tutorials/inspect_results.qmd b/docs/src/tutorials/inspect_results.qmd index c10bc6c..d22d91e 100644 --- a/docs/src/tutorials/inspect_results.qmd +++ b/docs/src/tutorials/inspect_results.qmd @@ -53,9 +53,7 @@ probo = load(fname, "probo"); _xP_batch = first(probo.train_dataloader)[2] f_batch = PBMPopulationApplicator( f_doubleMM_sites, probo.n_batch; probo.θP, probo.θM, θFix, xPvec=_xP_batch[:,1]) -f_allsites = PBMPopulationApplicator( - f_doubleMM_sites, probo.n_site; probo.θP, probo.θM, θFix, xPvec=_xP_batch[:,1]) -probo = HybridProblem(probo; f_batch, f_allsites) +probo = HybridProblem(probo; f_batch) ``` ## Sample the posterior @@ -66,7 +64,7 @@ using function [`sample_posterior`](@ref). using StableRNGs rng = StableRNG(112) n_sample_pred = 400 -(; θsP, θsMs) = sample_posterior(rng, probo; n_sample_pred) +(; θsP, θsMs) = sample_posterior(rng, probo; n_sample_pred, is_testmode = true) ``` Lets look at the results. diff --git a/docs/src/tutorials/intermediate/basic_cpu_results.jld2 b/docs/src/tutorials/intermediate/basic_cpu_results.jld2 index 4046a37..4e7d91f 100644 Binary files a/docs/src/tutorials/intermediate/basic_cpu_results.jld2 and b/docs/src/tutorials/intermediate/basic_cpu_results.jld2 differ diff --git a/docs/src/tutorials/logden_user.md b/docs/src/tutorials/logden_user.md index 240f4a1..3ccd55d 100644 --- a/docs/src/tutorials/logden_user.md +++ b/docs/src/tutorials/logden_user.md @@ -5,7 +5,7 @@ CurrentModule = HybridVariationalInference ``` -This guide shows how the user can specify a customized log-density function. +This guide shows how the user can specify a customized log-Likelihood function. ## Motivation diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-10-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-10-output-1.png index dd132d4..dff53ac 100644 Binary files a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-10-output-1.png and b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-10-output-1.png differ diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-11-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-11-output-1.png index 64b3b19..d49bedb 100644 Binary files a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-11-output-1.png and b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-11-output-1.png differ diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-12-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-12-output-1.png index 26aae9f..ee42111 100644 Binary files a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-12-output-1.png and b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-12-output-1.png differ diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-8-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-8-output-1.png index 0ab0e99..38e4dbf 100644 Binary files a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-8-output-1.png and b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-8-output-1.png differ diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-9-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-9-output-1.png index 8142630..d6b0d9e 100644 Binary files a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-9-output-1.png and b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-9-output-1.png differ diff --git a/docs/src_stash/Manifest.toml b/docs/src_stash/Manifest.toml index 37f2a35..6b46cfc 100644 --- a/docs/src_stash/Manifest.toml +++ b/docs/src_stash/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.11.4" manifest_format = "2.0" -project_hash = "39d0d5866236472d6bc1a58c4e663ea8a2a2e057" +project_hash = "e0bb7b9f384e479f88276cca8506811903b895c6" [[deps.AliasTables]] deps = ["PtrArrays", "Random"] @@ -39,6 +39,23 @@ git-tree-sha1 = "fde3bf89aead2e723284a8ff9cdf5b551ed700e8" uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" version = "1.18.5+0" +[[deps.ChunkCodecCore]] +git-tree-sha1 = "51f4c10ee01bda57371e977931de39ee0f0cdb3e" +uuid = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1" +version = "1.0.0" + +[[deps.ChunkCodecLibZlib]] +deps = ["ChunkCodecCore", "Zlib_jll"] +git-tree-sha1 = "cee8104904c53d39eb94fd06cbe60cb5acde7177" +uuid = "4c0bbee4-addc-4d73-81a0-b6caacae83c8" +version = "1.0.0" + +[[deps.ChunkCodecLibZstd]] +deps = ["ChunkCodecCore", "Zstd_jll"] +git-tree-sha1 = "34d9873079e4cb3d0c62926a225136824677073f" +uuid = "55437552-ac27-4d47-9aa3-63184e8fd398" +version = "1.0.0" + [[deps.CodecZlib]] deps = ["TranscodingStreams", "Zlib_jll"] git-tree-sha1 = "962834c22b66e32aa10f7611c08c8ca4e20749a9" @@ -163,6 +180,16 @@ git-tree-sha1 = "3a948313e7a41eb1db7a1e733e6335f17b4ab3c4" uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" version = "7.1.1+0" +[[deps.FileIO]] +deps = ["Pkg", "Requires", "UUIDs"] +git-tree-sha1 = "b66970a70db13f45b7e57fbda1736e1cf72174ea" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.17.0" +weakdeps = ["HTTP"] + + [deps.FileIO.extensions] + HTTPExt = "HTTP" + [[deps.FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" version = "1.11.0" @@ -249,6 +276,11 @@ git-tree-sha1 = "f923f9a774fcf3f5cb761bfa43aeadd689714813" uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566" version = "8.5.1+0" +[[deps.HashArrayMappedTries]] +git-tree-sha1 = "2eaa69a7cab70a52b9687c8bf950a5a93ec895ae" +uuid = "076d061b-32b6-4027-95e0-9a2c6f6d7e74" +version = "0.2.0" + [[deps.InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" @@ -259,6 +291,18 @@ git-tree-sha1 = "e2222959fbc6c19554dc15174c81bf7bf3aa691c" uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" version = "0.2.4" +[[deps.JLD2]] +deps = ["ChunkCodecLibZlib", "ChunkCodecLibZstd", "FileIO", "MacroTools", "Mmap", "OrderedCollections", "PrecompileTools", "ScopedValues"] +git-tree-sha1 = "da2e9b4d1abbebdcca0aa68afa0aa272102baad7" +uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +version = "0.6.2" + + [deps.JLD2.extensions] + UnPackExt = "UnPack" + + [deps.JLD2.weakdeps] + UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" + [[deps.JLFzf]] deps = ["REPL", "Random", "fzf_jll"] git-tree-sha1 = "82f7acdc599b65e0f8ccd270ffa1467c21cb647b" @@ -668,6 +712,12 @@ version = "1.3.1" uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" version = "0.7.0" +[[deps.ScopedValues]] +deps = ["HashArrayMappedTries", "Logging"] +git-tree-sha1 = "c3b2323466378a2ba15bea4b2f73b081e022f473" +uuid = "7e506255-f358-4e82-b7e4-beb19740aa63" +version = "1.5.0" + [[deps.Scratch]] deps = ["Dates"] git-tree-sha1 = "9b81b8393e50b7d4e6d0a9f14e192294d3b7c109" diff --git a/docs/src_stash/Project.toml b/docs/src_stash/Project.toml index 43aec5b..f36a540 100644 --- a/docs/src_stash/Project.toml +++ b/docs/src_stash/Project.toml @@ -1,2 +1,3 @@ [deps] +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/docs/src_stash/test.md b/docs/src_stash/test.md new file mode 100644 index 0000000..4501b05 --- /dev/null +++ b/docs/src_stash/test.md @@ -0,0 +1,21 @@ +# Basic workflow without GPU + + +``` julia +ENV["JULIA_DEPOT_PATH"] +``` + + "/User/homes/twutz/scratch/twutz/julia_gpu_depots:" + +``` julia +DEPOT_PATH +``` + + 3-element Vector{String}: + "/User/homes/twutz/scratch/twutz/julia_gpu_depots" + "/Net/Groups/Services/HPC_22/apps/julia/julia-1.11.4/local/share/julia" + "/Net/Groups/Services/HPC_22/apps/julia/julia-1.11.4/share/julia" + +``` julia +#println(no_existing_var) +``` diff --git a/docs/src_stash/test.qmd b/docs/src_stash/test.qmd new file mode 100644 index 0000000..6ac17e4 --- /dev/null +++ b/docs/src_stash/test.qmd @@ -0,0 +1,26 @@ +--- +title: "Basic workflow without GPU" +engine: julia +# julia: +# env: ["JULIA_DEPOT_PATH=/User/homes/twutz/scratch/twutz/julia_gpu_depots"] +execute: + echo: true +format: + commonmark: + variant: -raw_html+tex_math_dollars + wrap: preserve +# params: +# JULIA_DEPOT_PATH: /User/homes/twutz/scratch/twutz/julia_cluster_depots +--- +```{julia} +ENV["JULIA_DEPOT_PATH"] +``` + +```{julia} +DEPOT_PATH +``` + +```{julia} +#println(no_existing_var) +``` + diff --git a/ext/HybridVariationalInferenceCUDAExt.jl b/ext/HybridVariationalInferenceCUDAExt.jl index 180944c..eb73007 100644 --- a/ext/HybridVariationalInferenceCUDAExt.jl +++ b/ext/HybridVariationalInferenceCUDAExt.jl @@ -12,20 +12,21 @@ function HVI.vec2utri(v::CUDA.CuVector{T}; n=invsumn(length(v)) ) where {T} m = CUDA.allowscalar() do CUDA.CuArray([j >= i ? (k += 1; v[k]) : z for i in 1:n, j in 1:n]) end + # TODO test if without allowscalar is faster # m = CUDA.zeros(T,n,n) # @cuda threads = 256 vec2utri_gpu!(m, v) # planned to put v into positions of m return (m) end -function vec2utri_gpu!(m, v::AbstractVector) - index = threadIdx().x # this example only requires linear indexing, so just use `x` - stride = blockDim().x - for i in index:stride:length(v) - row, col = vec2utri_pos(i) - @inbounds m[row, col] = v[i] - end - return nothing # important -end +# function vec2utri_gpu!(m, v::AbstractVector) +# index = threadIdx().x # this example only requires linear indexing, so just use `x` +# stride = blockDim().x +# for i in index:stride:length(v) +# row, col = vec2utri_pos(i) +# @inbounds m[row, col] = v[i] +# end +# return nothing # important +# end #function vec2uutri(v::GPUArraysCore.AbstractGPUVector{T}; n=invsumn(length(v)) + one(T)) where {T} @@ -85,6 +86,7 @@ function HVI._create_randn(rng, v::CUDA.CuVector{T,M}, dims...) where {T,M} res::CUDA.CuArray{T, length(dims),M} end +# TODO ^eplace by KernelAbstractions after type stability is fixed function HVI.ones_similar_x(x::CuArray, size_ret = size(x)) # call CUDA.ones rather than ones for x::CuArray ChainRulesCore.@ignore_derivatives CUDA.ones(eltype(x), size_ret) diff --git a/ext/HybridVariationalInferenceFluxExt.jl b/ext/HybridVariationalInferenceFluxExt.jl index 1d8f462..47136d8 100644 --- a/ext/HybridVariationalInferenceFluxExt.jl +++ b/ext/HybridVariationalInferenceFluxExt.jl @@ -23,7 +23,7 @@ function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::Chain, float_type:: FluxApplicator(rebuild), ϕ end -function HVI.apply_model(app::FluxApplicator, x::T, ϕ) where T +function HVI.apply_model(app::FluxApplicator, x::T, ϕ; is_testmode=false) where T # assume no size informmation in x -> can hint the type of the result # to be the same as the type of the input m = app.rebuild(ϕ) @@ -38,7 +38,7 @@ function HVI.construct_partric(app::FluxApplicator{RT}, x, ϕ) where RT PartricFluxApplicator{RT, typeof(m), typeof(y)}(app.rebuild) end -function HVI.apply_model(app::PartricFluxApplicator{RT, MT, YT}, x, ϕ) where {RT, MT, YT} +function HVI.apply_model(app::PartricFluxApplicator{RT, MT, YT}, x, ϕ; is_testmode=false) where {RT, MT, YT} m = app.rebuild(ϕ)::MT res = m(x)::YT res diff --git a/ext/HybridVariationalInferenceLuxExt.jl b/ext/HybridVariationalInferenceLuxExt.jl index 4c66175..c8995c9 100644 --- a/ext/HybridVariationalInferenceLuxExt.jl +++ b/ext/HybridVariationalInferenceLuxExt.jl @@ -5,32 +5,85 @@ using HybridVariationalInference: HybridVariationalInference as HVI using ComponentArrays: ComponentArrays as CA using Random using StatsFuns: logistic - +import Functors +import GPUArraysCore # AbstractModelApplicator that stores a Lux.StatefulLuxLayer, so that # it can be applied with given inputs and parameters # The `int_ϕ` ComponentArrayInterpreter, attaches the correct axes to the # supplied parameters, that do not need to keep the Axis information -struct LuxApplicator{MT, IT} <: AbstractModelApplicator - stateful_layer::MT +struct LuxApplicator{MT1, MT2, IT} <: AbstractModelApplicator + stateful_layer_test::MT1 + stateful_layer_train::MT2 int_ϕ::IT + is_testmode::Bool end +Functors.@functor LuxApplicator () # prevent warning for putting stateful_layers to GPU + function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::Chain, float_type=Float32; device = gpu_device()) ps, st = Lux.setup(rng, m) ps_ca = float_type.(CA.ComponentArray(ps)) st = st |> device - stateful_layer = StatefulLuxLayer{true}(m, nothing, st) - #stateful_layer(x_o_gpu[:, 1:n_site_batch], ps_ca) + stateful_layer_train = StatefulLuxLayer{true}(m, nothing, st) + stateful_layer_test = Lux.testmode(stateful_layer_train) int_ϕ = get_concrete(ComponentArrayInterpreter(ps_ca)) - LuxApplicator(stateful_layer, int_ϕ), ps_ca + LuxApplicator(stateful_layer_test, stateful_layer_train, int_ϕ, false), ps_ca end -function HVI.apply_model(app::LuxApplicator, x, ϕ) - ϕc = app.int_ϕ(CA.getdata(ϕ)) - app.stateful_layer(x, ϕc) +function HVI.apply_model(app::LuxApplicator, x, ϕ; is_testmode=false) + ϕd = CA.getdata(ϕ) + if (ϕ isa SubArray) && (ϕ.parent isa GPUArraysCore.AbstractGPUArray) + # Lux has problems with SubArrays of GPUArrays, need to convert to plain Array + ϕc = app.int_ϕ(ϕd[:]) + else + ϕc = app.int_ϕ(ϕd) + end + # tmp(x, ϕc) + if is_testmode + app.stateful_layer_test(x, ϕc) + else + app.stateful_layer_train(x, ϕc) + end end + + +function HVI.construct_3layer_MLApplicator( + rng::AbstractRNG, prob::HVI.AbstractHybridProblem, ::Val{:Lux}; + scenario::Val{scen}, + p_dropout = 0.2) where scen + (;θM) = get_hybridproblem_par_templates(prob; scenario) + n_out = length(θM) + n_covar = get_hybridproblem_n_covar(prob; scenario) + n_pbm_covars = length(get_hybridproblem_pbmpar_covars(prob; scenario)) + n_input = n_covar + n_pbm_covars + #(; n_covar, n_θM) = get_hybridproblem_sizes(prob; scenario) + float_type = get_hybridproblem_float_type(prob; scenario) + is_using_dropout = :use_dropout ∈ scen + g_chain = if is_using_dropout + Lux.Chain( + # dense layer with bias that maps to 8 outputs and applies `tanh` activation + Lux.Dense(n_input => n_input * 4, tanh), + Lux.Dropout(p_dropout), + Lux.Dense(n_input * 4 => n_input * 4, tanh), + Lux.Dropout(p_dropout), + # dense layer without bias that maps to n outputs and `logistic` activation + Lux.Dense(n_input * 4 => n_out, logistic, use_bias = false) + ) + else + Lux.Chain( + # dense layer with bias that maps to 8 outputs and applies `tanh` activation + Lux.Dense(n_input => n_input * 4, tanh), + Lux.Dense(n_input * 4 => n_input * 4, tanh), + # dense layer without bias that maps to n outputs and `logistic` activation + Lux.Dense(n_input * 4 => n_out, logistic, use_bias = false) + ) + end + construct_ChainsApplicator(rng, g_chain, float_type) +end + + # function HVI.HybridProblem(rng::AbstractRNG, # θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Chain, # args...; device = gpu_device(), kwargs...) diff --git a/ext/HybridVariationalInferenceSimpleChainsExt.jl b/ext/HybridVariationalInferenceSimpleChainsExt.jl index b53262d..a572777 100644 --- a/ext/HybridVariationalInferenceSimpleChainsExt.jl +++ b/ext/HybridVariationalInferenceSimpleChainsExt.jl @@ -15,7 +15,7 @@ function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::SimpleChain, FloatT SimpleChainsApplicator(m), ϕ end -HVI.apply_model(app::SimpleChainsApplicator, x, ϕ) = app.m(x, ϕ) +HVI.apply_model(app::SimpleChainsApplicator, x, ϕ; is_testmode=false) = app.m(x, ϕ) function HVI.construct_3layer_MLApplicator( rng::AbstractRNG, prob::HVI.AbstractHybridProblem, ::Val{:SimpleChains}; diff --git a/src/AbstractHybridProblem.jl b/src/AbstractHybridProblem.jl index ad0404a..6c3b499 100644 --- a/src/AbstractHybridProblem.jl +++ b/src/AbstractHybridProblem.jl @@ -91,7 +91,7 @@ end """ get_hybridproblem_transforms(::AbstractHybridProblem; scenario) -Return a NamedTupe of +Return a NamedTuple of - `transP`: Bijectors.Transform for the global PBM parameters, θP - `transM`: Bijectors.Transform for the single-site PBM parameters, θM """ @@ -197,7 +197,7 @@ end Put relevant parts of the DataLoader to gpu, depending on scenario. """ -function gdev_hybridproblem_dataloader(dataloader::MLUtils.DataLoader; gdevs, +function gdev_hybridproblem_dataloader(dataloader::MLUtils.DataLoader; gdevs = nothing, gdev_M = gdevs.gdev_M, gdev_P = gdevs.gdev_P, # scenario::Val{scen} = Val(()), @@ -207,12 +207,26 @@ function gdev_hybridproblem_dataloader(dataloader::MLUtils.DataLoader; gdevs, batchsize = dataloader.batchsize, partial = dataloader.partial ) - xM, xP, y_o, y_unc, i_sites = dataloader.data + # xM, xP, y_o, y_unc, i_sites = dataloader.data + # xM_dev = gdev_M(xM) + # xP_dev, y_o_dev, y_unc_dev = (gdev_P(xP), gdev_P(y_o), gdev_P(y_unc)) + data_dev = gdev_hybridproblem_data(dataloader.data; gdev_M, gdev_P) + train_loader_dev = MLUtils.DataLoader(data_dev; batchsize, partial) + return(train_loader_dev) +end + +function gdev_hybridproblem_data(data::Tuple; gdevs = nothing, + gdev_M = gdevs.gdev_M, + gdev_P = gdevs.gdev_P, + # scenario::Val{scen} = Val(()), + # gdev = gpu_device(), + # gdev_M = :use_gpu ∈ _val_value(scenario) ? gdev : identity, + # gdev_P = :f_on_gpu ∈ _val_value(scenario) ? gdev : identity, + ) + xM, xP, y_o, y_unc, i_sites = data xM_dev = gdev_M(xM) xP_dev, y_o_dev, y_unc_dev = (gdev_P(xP), gdev_P(y_o), gdev_P(y_unc)) - train_loader_dev = MLUtils.DataLoader((xM_dev, xP_dev, y_o_dev, y_unc_dev, i_sites); - batchsize, partial) - return(train_loader_dev) + (xM_dev, xP_dev, y_o_dev, y_unc_dev, i_sites) end """ diff --git a/src/ComponentArrayInterpreter.jl b/src/ComponentArrayInterpreter.jl index 960d5ee..f2c481a 100644 --- a/src/ComponentArrayInterpreter.jl +++ b/src/ComponentArrayInterpreter.jl @@ -171,6 +171,7 @@ function ComponentArrayInterpreter( ComponentArrayInterpreter(n_dims, CA.getaxes(ca), m_dims) end + function ComponentArrayInterpreter( n_dims::NTuple{N,<:Integer}, axes::NTuple{A,<:CA.AbstractAxis}, m_dims::NTuple{M,<:Integer}) where {N,A,M} diff --git a/src/DoubleMM/DoubleMM.jl b/src/DoubleMM/DoubleMM.jl index 6b39306..95a0757 100644 --- a/src/DoubleMM/DoubleMM.jl +++ b/src/DoubleMM/DoubleMM.jl @@ -13,6 +13,8 @@ using Distributions, DistributionFits using MLDataDevices import GPUArraysCore # used in conditional breakpoints import StableRNGs +import MLUtils +import ChainRulesCore export f_doubleMM, f_doubleMM_sites, xP_S1, xP_S2 include("f_doubleMM.jl") diff --git a/src/DoubleMM/f_doubleMM.jl b/src/DoubleMM/f_doubleMM.jl index 7c8928a..22edc82 100644 --- a/src/DoubleMM/f_doubleMM.jl +++ b/src/DoubleMM/f_doubleMM.jl @@ -105,17 +105,23 @@ function f_doubleMM_sites(θc::CA.ComponentMatrix, xPc::CA.ComponentMatrix) # # extract the parameters as vectors that are row-repeated into a matrix VT = typeof(CA.getdata(θc)[:,1]) # workaround for non-type-stable Symbol-indexing - n_obs = size(S1, 1) - rep_fac = HVI.ones_similar_x(xPc, n_obs) # to reshape into matrix, avoiding repeat + #n_obs = size(S1, 1) + #rep_fac = HVI.ones_similar_x(xPc, n_obs) # to reshape into matrix, avoiding repeat + #is_dummy = isnan.(S1) .|| isnan.(S2) + is_valid = isfinite.(S1) .&& isfinite.(S2) (r0, r1, K1, K2) = map((:r0, :r1, :K1, :K2)) do par p1 = CA.getdata(θc[:, par]) ::VT + #Main.@infiltrate_main + # tmp = Zygote.gradient(p1 -> sum(repeat_rowvector_dummy(p1', is_dummy)), p1)[1] + #p1_mat = repeat_rowvector_dummy(p1', is_dummy) + p1_mat = is_valid .* p1' # places zeros in dummy positions, prevents gradients there #repeat(p1', n_obs) # matrix: same for each concentration row in S1 #(rep_fac .* p1') # move to computation below to save allocation end # # each variable is a matrix (n_obs x n_site) - #r0 .+ r1 .* S1 ./ (K1 .+ S1) .* S2 ./ (K2 .+ S2) - (rep_fac .* r0') .+ (rep_fac .* r1') .* S1 ./ ((rep_fac .* K1') .+ S1) .* S2 ./ ((rep_fac .* K2') .+ S2) + r0 .+ r1 .* S1 ./ (K1 .+ S1) .* S2 ./ (K2 .+ S2) + #(rep_fac .* r0') .+ (rep_fac .* r1') .* S1 ./ ((rep_fac .* K1') .+ S1) .* S2 ./ ((rep_fac .* K2') .+ S2) end # function f_doubleMM_sites(θc::CA.ComponentMatrix, xPc::CA.ComponentMatrix) @@ -221,7 +227,6 @@ end function HVI.get_hybridproblem_MLapplicator( rng::AbstractRNG, prob::HVI.DoubleMM.DoubleMMCase; scenario::Val{scen}, - use_all_sites = false ) where {scen} ml_engine = select_ml_engine(; scenario) g_nomag, ϕ_g0 = construct_3layer_MLApplicator(rng, prob, ml_engine; scenario) @@ -232,10 +237,12 @@ function HVI.get_hybridproblem_MLapplicator( (; transM) = get_hybridproblem_transforms(prob; scenario) lowers, uppers = HVI.get_quantile_transformed( priors::AbstractVector{<:Distribution}, transM) - n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) - n_site_batch = use_all_sites ? n_site : n_batch - g = NormalScalingModelApplicator( - g_nomag, lowers, uppers, eltype(ϕ_g0)) + #n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) + g = if (:use_rangescaling ∈ scen) + RangeScalingModelApplicator(g_nomag, lowers, uppers, eltype(ϕ_g0)) + else + NormalScalingModelApplicator(g_nomag, lowers, uppers, eltype(ϕ_g0)) + end return g, ϕ_g0 end @@ -283,7 +290,7 @@ end # cl::CLT # end -function HVI.get_hybridproblem_PBmodel(prob::DoubleMMCase; use_all_sites=false, scenario::Val{scen}) where {scen} +function HVI.get_hybridproblem_PBmodel(prob::DoubleMMCase; scenario::Val{scen}) where {scen} # θall defined in this module above # TODO check and test for population or sites, currently return only site specific pt = get_hybridproblem_par_templates(prob; scenario) @@ -294,8 +301,7 @@ function HVI.get_hybridproblem_PBmodel(prob::DoubleMMCase; use_all_sites=false, PBMSiteApplicator(f_doubleMM; pt.θP, pt.θM, θFix, xPvec) else n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) - n_site_batch = use_all_sites ? n_site : n_batch - PBMPopulationApplicator(f_doubleMM_sites, n_site_batch; pt.θP, pt.θM, θFix, xPvec) + PBMPopulationApplicator(f_doubleMM_sites, n_batch; pt.θP, pt.θM, θFix, xPvec) end end @@ -350,11 +356,22 @@ function HVI.get_hybridproblem_n_site_and_batch(prob::DoubleMMCase; (n_site, n_batch) end -function HVI.get_hybridproblem_train_dataloader(prob::DoubleMMCase; scenario::Val, +function HVI.get_hybridproblem_train_dataloader(prob::DoubleMMCase; scenario::Val{scen}, rng::AbstractRNG = StableRNG(111), kwargs... -) +) where {scen} n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) - construct_dataloader_from_synthetic(rng, prob; scenario, n_batch, kwargs...) + if (:driverNAN ∈ scen) + (; xM, xP, y_o, y_unc) = gen_hybridproblem_synthetic(rng, prob; scenario) + n_site = size(xM,2) + i_sites = 1:n_site + # set the last two entries of the S1 drivers and observations of the second site NaN + view(@view(xP[:S1,2]), 7:8) .= NaN + y_o[7:8,2] .= NaN + train_loader = MLUtils.DataLoader((xM, xP, y_o, y_unc, i_sites); + batchsize = n_batch, partial = false) + else + construct_dataloader_from_synthetic(rng, prob; scenario, n_batch, kwargs...) + end end function HVI.gen_hybridproblem_synthetic(rng::AbstractRNG, prob::DoubleMMCase; @@ -371,7 +388,8 @@ function HVI.gen_hybridproblem_synthetic(rng::AbstractRNG, prob::DoubleMMCase; int_θMs_sites = ComponentArrayInterpreter(θM, (n_site,)) # normalize to be distributed around the prescribed true values θMs_true = int_θMs_sites(scale_centered_at(θMs_true0, θM, FloatType(0.1))) - f = get_hybridproblem_PBmodel(prob; scenario, use_all_sites = true) + f_batch = get_hybridproblem_PBmodel(prob; scenario) + f = create_nsite_applicator(f_batch, n_site) #xP = fill((; S1 = xP_S1, S2 = xP_S2), n_site) int_xP_sites = ComponentArrayInterpreter(int_xP1, (n_site,)) xP = int_xP_sites(vcat(repeat(xP_S1, 1, n_site), repeat(xP_S2, 1, n_site))) diff --git a/src/HybridProblem.jl b/src/HybridProblem.jl index c87fdbc..97cc62d 100644 --- a/src/HybridProblem.jl +++ b/src/HybridProblem.jl @@ -6,8 +6,7 @@ Fields: - `θP::ComponentVector`, `θM::ComponentVector`: parameter templates - `g::AbstractModelApplicator`, `ϕg::AbstractVector`: ML model and its parameters - `ϕunc::ComponentVector`: parameters for the Covariance matrix of the approximate posterior -- `f_batch`: Process-based model predicing for `n_batch` sites -- `f_allsites`: Process-based model predicing for `n_site` sites +- `f_batch`: Process-based model predicting for n_batch sites - `priors`: AbstractDict: Prior distributions for all PBM parameters on constrained scale - `py`: Likelihood function - `transM::Stacked`, `transP::Stacked`: bijectors transforming from unconstrained to @@ -27,7 +26,6 @@ struct HybridProblem <: AbstractHybridProblem θP::CA.ComponentVector θM::CA.ComponentVector f_batch::Any - f_allsites::Any g::AbstractModelApplicator ϕg::Any # depends on framework ϕunc::CA.ComponentVector @@ -46,7 +44,6 @@ struct HybridProblem <: AbstractHybridProblem θP::CA.ComponentVector, θM::CA.ComponentVector, g::AbstractModelApplicator, ϕg::AbstractVector, f_batch, - f_allsites, priors::AbstractDict, py, transM::Stacked, @@ -61,7 +58,7 @@ struct HybridProblem <: AbstractHybridProblem ϕunc::CA.ComponentVector = init_hybrid_ϕunc(cor_ends, zero(eltype(θM))), ) where N new( - θP, θM, f_batch, f_allsites, g, ϕg, ϕunc, priors, py, transM, transP, cor_ends, + θP, θM, f_batch, g, ϕg, ϕunc, priors, py, transM, transP, cor_ends, train_dataloader, n_covar, n_site, n_batch, pbm_covars) end end @@ -86,8 +83,7 @@ function HybridProblem(prob::AbstractHybridProblem; scenario = (), θM = get_hybridproblem_par_templates(prob; scenario).θM, g = get_hybridproblem_MLapplicator(prob; scenario)[1], ϕg = get_hybridproblem_MLapplicator(prob; scenario)[2], - f_batch = get_hybridproblem_PBmodel(prob; scenario, use_all_sites = false), - f_allsites = get_hybridproblem_PBmodel(prob; scenario, use_all_sites = true), + f_batch = get_hybridproblem_PBmodel(prob; scenario), priors = get_hybridproblem_priors(prob; scenario), py = get_hybridproblem_neg_logden_obs(prob; scenario), transP = get_hybridproblem_transforms(prob; scenario).transP, @@ -100,7 +96,7 @@ function HybridProblem(prob::AbstractHybridProblem; scenario = (), pbm_covars = get_hybridproblem_pbmpar_covars(prob; scenario), ϕunc = get_hybridproblem_ϕunc(prob; scenario), ) - HybridProblem(θP, θM, g, ϕg, f_batch, f_allsites, priors, py, transM, transP, train_dataloader, + HybridProblem(θP, θM, g, ϕg, f_batch, priors, py, transM, transP, train_dataloader, n_covar, n_site, n_batch, cor_ends, pbm_covars, ϕunc) end @@ -156,8 +152,8 @@ end # (; n_covar=prob.n_covar, n_batch=prob.n_batch, n_θM, n_θP) # end -function get_hybridproblem_PBmodel(prob::HybridProblem; scenario = (), use_all_sites=false) - use_all_sites ? prob.f_allsites : prob.f_batch +function get_hybridproblem_PBmodel(prob::HybridProblem; scenario = ()) + prob.f_batch end function get_hybridproblem_MLapplicator(prob::HybridProblem; scenario = ()) diff --git a/src/HybridSolver.jl b/src/HybridSolver.jl index a7c9cb0..4410549 100644 --- a/src/HybridSolver.jl +++ b/src/HybridSolver.jl @@ -7,11 +7,16 @@ end HybridPointSolver(; alg) = HybridPointSolver(alg) function CommonSolve.solve(prob::AbstractHybridProblem, solver::HybridPointSolver; - scenario, rng=Random.default_rng(), - gdevs = get_gdev_MP(scenario), + scenario=Val(()), rng=Random.default_rng(), + gdevs = nothing, # get_gdev_MP(scenario) is_inferred::Val{is_infer} = Val(false), + ad_backend_loss = AutoZygote(), + epochs, + is_omitting_NaNbatches = false, + is_omit_priors = false, kwargs... ) where is_infer + gdevs = isnothing(gdevs) ? get_gdev_MP(scenario) : gdevs par_templates = get_hybridproblem_par_templates(prob; scenario) g, ϕg0 = get_hybridproblem_MLapplicator(prob; scenario) FT = get_hybridproblem_float_type(prob; scenario) @@ -20,46 +25,110 @@ function CommonSolve.solve(prob::AbstractHybridProblem, solver::HybridPointSolve ϕg=1:length(ϕg0), ϕP=par_templates.θP)) #ϕ0_cpu = vcat(ϕg0, par_templates.θP .* FT(0.9)) # slightly disturb θP_true ϕ0_cpu = vcat(ϕg0, apply_preserve_axes(inverse(transP), par_templates.θP)) + n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) train_loader = get_hybridproblem_train_dataloader(prob; scenario) + #TODO provide different test data + # TODO use 1/10 of the training data + # currently HybridProblem returns only applciators of size n_batch and n_site + # i_test = rand(1:n_site, Integer(floor(n_site/10))) + # test_data = map(train_loader.data) do data_comp + # ndims(data_comp) == 2 ? data_comp[:, i_test] : data_comp[i_test] + # end + test_data = train_loader.data gdev = gdevs.gdev_M if gdev isa MLDataDevices.AbstractGPUDevice ϕ0_dev = gdev(ϕ0_cpu) g_dev = gdev(g) - train_loader_dev = gdev_hybridproblem_dataloader(train_loader; scenario, gdev) + train_loader_dev = gdev_hybridproblem_dataloader(train_loader; gdevs) + test_data_dev = gdev_hybridproblem_data(test_data; gdevs) else ϕ0_dev = ϕ0_cpu g_dev = g train_loader_dev = train_loader + test_data_dev = test_data + end + f = get_hybridproblem_PBmodel(prob; scenario) + ftest = create_nsite_applicator(f, size(test_data[1],2)) + if gdevs.gdev_P isa MLDataDevices.AbstractGPUDevice + f_dev = gdevs.gdev_P(f) + ftest_dev = gdevs.gdev_P(ftest) + else + f_dev = f + ftest_dev = ftest end - f = get_hybridproblem_PBmodel(prob; scenario, use_all_sites=false) py = get_hybridproblem_neg_logden_obs(prob; scenario) pbm_covars = get_hybridproblem_pbmpar_covars(prob; scenario) - n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) + n_site_test = size(test_data[1],2) priors = get_hybridproblem_priors(prob; scenario) priorsP = [priors[k] for k in keys(par_templates.θP)] priorsM = [priors[k] for k in keys(par_templates.θM)] #intP = ComponentArrayInterpreter(par_templates.θP) - loss_gf = get_loss_gf(g_dev, transM, transP, f, py, intϕ; - cdev=infer_cdev(gdevs), pbm_covars, n_site_batch=n_batch, priorsP, priorsM,) + loss_gf = get_loss_gf(g_dev, transM, transP, f_dev, py, intϕ; + n_site_batch=n_batch, + cdev=infer_cdev(gdevs), pbm_covars, priorsP, priorsM, is_omit_priors,) + loss_gf_test = get_loss_gf(g_dev, transM, transP, ftest_dev, py, intϕ; + n_site_batch=n_site_test, + cdev=infer_cdev(gdevs), pbm_covars, priorsP, priorsM, is_omit_priors,) # call loss function once l1 = is_infer ? - Test.@inferred(loss_gf(ϕ0_dev, first(train_loader_dev)...))[1] : + Test.@inferred(loss_gf(ϕ0_dev, first(train_loader_dev)...; is_testmode=true))[1] : # using ShareAdd; @usingany Cthulhu # @descend_code_warntype loss_gf(ϕ0_dev, first(train_loader_dev)...) - loss_gf(ϕ0_dev, first(train_loader_dev)...)[1] + loss_gf(ϕ0_dev, first(train_loader_dev)...; is_testmode=true)[1] # and gradient # xMg, xP, y_o, y_unc = first(train_loader_dev) # gr1 = Zygote.gradient( # p -> loss_gf(p, xMg, xP, y_o, y_unc)[1], # ϕ0_dev) # Zygote.gradient(ϕ0_dev -> loss_gf(ϕ0_dev, data1...)[1], ϕ0_dev) - optf = Optimization.OptimizationFunction((ϕ, data) -> loss_gf(ϕ, data...)[1], - Optimization.AutoZygote()) - # use CA.getdata(ϕ0_dev), i.e. the plain vector to avoid recompiling for specific CA - # loss_gf re-attaches the axes - optprob = OptimizationProblem(optf, CA.getdata(ϕ0_dev), train_loader_dev) - res = Optimization.solve(optprob, solver.alg; kwargs...) - ϕ = intϕ(res.u) + if is_omitting_NaNbatches + # implement training loop by hand to skip minibatches with NaN gradients + ps = CA.getdata(ϕ0_dev) + opt_st_new = Optimisers.setup(solver.alg, ps) + n_skips = 0 + # prepare DI.gradient, need to access and update outside cope data_batch + # because cannot redefine fopt_loss_gf + data_batch = first(train_loader_dev) + is_testmode = false + function fopt_loss_gf(ϕ) + #@show first(data_batch[5], 2) + loss_gf(ϕ, data_batch...; is_testmode)[1] + end + ad_prep = DI.prepare_gradient(fopt_loss_gf, ad_backend_loss, zero(ps)) + grad = similar(ps) + stime = time() + for epoch in 1:epochs + is_testmode = false + #i,data_batch = first(enumerate(loader)) + for (i, data_batch_) in enumerate(train_loader_dev) + data_batch = data_batch_ # propagate outside for to scope of fopt_loss_gf + DI.gradient!(fopt_loss_gf, grad, ad_prep, ad_backend_loss, ps) + if any(isnan.(grad)) + n_skips += 1 + #println("Skipped NaN : Batch $i") + print(",$i") + else + Optimisers.update!(opt_st_new, ps, grad) + end + end + ttime = time() - stime + # compute loss for test data + l = loss_gf_test(ps, test_data_dev...; is_testmode = true) + println() + @show round(ttime, digits=1), epoch, l.nLy, l.neg_log_prior, l.loss_penalty + # TODO log + end + res = nothing + ϕ = intϕ(ps) + else + optf = Optimization.OptimizationFunction((ϕ, data) -> loss_gf(ϕ, data...; is_testmode=false)[1], + ad_backend_loss) + # use CA.getdata(ϕ0_dev), i.e. the plain vector to avoid recompiling for specific CA + # loss_gf re-attaches the axes + optprob = OptimizationProblem(optf, CA.getdata(ϕ0_dev), train_loader_dev) + res = Optimization.solve(optprob, solver.alg; epochs, kwargs...) + ϕ = intϕ(res.u) + end θP = cpu_ca(apply_preserve_axes(transP, cpu_ca(ϕ).ϕP)) probo = HybridProblem(prob; ϕg=cpu_ca(ϕ).ϕg, θP) (; ϕ, resopt=res, probo) @@ -141,9 +210,9 @@ function CommonSolve.solve(prob::AbstractHybridProblem, solver::HybridPosteriorS g_dev = g train_loader_dev = train_loader end - f = get_hybridproblem_PBmodel(prob; scenario, use_all_sites=false) + f = get_hybridproblem_PBmodel(prob; scenario) if gdevs.gdev_P isa MLDataDevices.AbstractGPUDevice - f_dev = fmap(gdevs.gdev_P, f) + f_dev = gdevs.gdev_P(f) #fmap(gdevs.gdev_P, f) else f_dev = f end @@ -164,9 +233,10 @@ function CommonSolve.solve(prob::AbstractHybridProblem, solver::HybridPosteriorS # @usingany Cthulhu # @descend_code_warntype loss_elbo(ϕ0_dev, rng, first(train_loader_dev)...) l0 = is_infer ? - (Test.@inferred loss_elbo(ϕ0_dev, rng, first(train_loader_dev)...)) : - loss_elbo(ϕ0_dev, rng, first(train_loader_dev)...) - optf = Optimization.OptimizationFunction((ϕ, data) -> first(loss_elbo(ϕ, rng, data...)), + (Test.@inferred loss_elbo(ϕ0_dev, rng, first(train_loader_dev)...; is_testmode=true)) : + loss_elbo(ϕ0_dev, rng, first(train_loader_dev)...; is_testmode=false) + optf = Optimization.OptimizationFunction( + (ϕ, data) -> first(loss_elbo(ϕ, rng, data...; is_testmode=false)), Optimization.AutoZygote()) optprob = OptimizationProblem(optf, CA.getdata(ϕ0_dev), train_loader_dev) res = Optimization.solve(optprob, solver.alg; kwargs...) @@ -216,7 +286,7 @@ function get_loss_elbo(g, transP, transMs, f, py; trans_mMs=StackedArray(transMs.stacked, n_MC_mean), priorsP=priorsP, priorsM=priorsM, floss_penalty=floss_penalty - function loss_elbo(ϕ, rng, xM, xP, y_o, y_unc, i_sites) + function loss_elbo(ϕ, rng, xM, xP, y_o, y_unc, i_sites; is_testmode) #ϕc = int_μP_ϕg_unc(ϕ) neg_elbo_gtf( rng, ϕ, g, f, py, xM, xP, y_o, y_unc, i_sites; @@ -224,20 +294,33 @@ function get_loss_elbo(g, transP, transMs, f, py; n_MC, n_MC_cap, n_MC_mean, cor_ends, priors_θP_mean, priors_θMs_mean, cdev, pbm_covar_indices, transP, transMs, trans_mP, trans_mMs, priorsP, priorsM, floss_penalty, #ϕg = ϕc.ϕg, ϕunc = ϕc.unc, + is_testmode, ) end end end + +function compute_elbo_components( + prob::AbstractHybridProblem, solver::HybridPosteriorSolver; + scenario, kwargs... + ) + train_loader = get_hybridproblem_train_dataloader(prob; scenario) + data = train_loader.data + compute_elbo_components( + prob::AbstractHybridProblem, solver::HybridPosteriorSolver, data; + scenario, kwargs... + ) +end + """ Compute the components of the elbo for given initial conditions of the problems for the first batch of the trainloader. """ function compute_elbo_components( - prob::AbstractHybridProblem, solver::HybridPosteriorSolver; + prob::AbstractHybridProblem, solver::HybridPosteriorSolver, data::Tuple; scenario, rng=Random.default_rng(), gdev=gpu_device(), θmean_quant=0.0, - use_all_sites=false, kwargs...) n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) par_templates = get_hybridproblem_par_templates(prob; scenario) @@ -248,23 +331,27 @@ function compute_elbo_components( (; transP, transM) = get_hybridproblem_transforms(prob; scenario) (; ϕ, transPMs_batch, interpreters, get_transPMs, get_ca_int_PMs) = init_hybrid_params( θP, θM, cor_ends, ϕg0, n_batch; transP, transM, ϕunc0) - train_loader = get_hybridproblem_train_dataloader(prob; scenario) if gdev isa MLDataDevices.AbstractGPUDevice ϕ0_dev = gdev(ϕ) g_dev = gdev(g) # zygote fails if gdev is a CPUDevice, although should be non-op - train_loader_dev = gdev_hybridproblem_dataloader(train_loader; scenario, gdev) + data_dev = gdev_hybridproblem_data(data; scenario, gdev) else ϕ0_dev = ϕ g_dev = g - train_loader_dev = train_loader + data_dev = data end - f = get_hybridproblem_PBmodel(prob; scenario, use_all_sites) + (xM, xP, y_o, y_unc, i_sites) = data_dev + n_site_pred = size(xP,2) + @assert size(xM, 2) == n_site_pred + @assert size(y_o, 2) == n_site_pred + @assert size(y_unc, 2) == n_site_pred + @assert length(i_sites) == n_site_pred + f_batch = get_hybridproblem_PBmodel(prob; scenario) + f = (n_site_pred == n_batch) ? f : create_nsite_applicator(f_batch, n_site_pred) py = get_hybridproblem_neg_logden_obs(prob; scenario) priors_θ_mean = construct_priors_θ_mean( prob, ϕ0_dev.ϕg, keys(θM), θP, θmean_quant, g_dev, transM; scenario, get_ca_int_PMs, gdev, cdev, pbm_covars) - # TODO replace train_loader.data by proper function that pulls all the data - xM, xP, y_o, y_unc, i_sites = use_all_sites ? train_loader_dev.data : first(train_loader_dev) neg_elbo_gtf_components( rng, ϕ0_dev, g_dev, transPMs_batch, f, py, xM, xP, y_o, y_unc, i_sites, interpreters; solver.n_MC, solver.n_MC_cap, cor_ends, priors_θ_mean) @@ -275,7 +362,8 @@ In order to let mean of θ stay close to initial point parameter estimates construct a prior on mean θ to a Normal around initial prediction. """ function construct_priors_θ_mean(prob, ϕg, keysθM, θP, θmean_quant, g_dev, transM, transP; - scenario::Val{scen}, get_ca_int_PMs, gdevs, pbm_covars) where {scen} + scenario::Val{scen}, get_ca_int_PMs, gdevs, pbm_covars, + ) where {scen} iszero(θmean_quant) ? ([],[]) : begin gdev=gdevs.gdev_M @@ -294,7 +382,8 @@ function construct_priors_θ_mean(prob, ϕg, keysθM, θP, θmean_quant, g_dev, # ζMs = g_dev(xMP_all, CA.getdata(ϕg))' # transpose to par-last for StackedArray # ζMs_cpu = cdev(ζMs) # θMs = transMs(ζMs_cpu) - θMs = gtrans(g_dev, transMs, xMP_all, CA.getdata(ϕg); cdev=cpu_device()) + θMs = gtrans( + g_dev, transMs, xMP_all, CA.getdata(ϕg); cdev=cpu_device(), is_testmode = true) priors_dict = get_hybridproblem_priors(prob; scenario) priorsP = [priors_dict[k] for k in keys(θP)] priors_θP_mean = map(priorsP, θP) do priorsP, θPi diff --git a/src/HybridVariationalInference.jl b/src/HybridVariationalInference.jl index 0292776..37dc8e8 100644 --- a/src/HybridVariationalInference.jl +++ b/src/HybridVariationalInference.jl @@ -4,6 +4,7 @@ using ComponentArrays: ComponentArrays as CA using Random using StatsBase # fit ZScoreTransform using StatsFuns # norminvcdf +using LogExpFunctions # logistic, loglogistic using Combinatorics # gen_hybridproblem_synthetic/combinations using GPUArraysCore using LinearAlgebra @@ -16,12 +17,18 @@ using MLUtils # dataloader using CommonSolve #using OptimizationOptimisers # default alg=Adam(0.02) using Optimization +import Optimisers # for hand-coded optimization loop using Distributions, DistributionFits using StaticArrays: StaticArrays as SA using Functors using Test: Test # @inferred using Missings using FillArrays +using KernelAbstractions +import NaNMath # ignore missing observations in logDensity +using DifferentiationInterface: DifferentiationInterface as DI +import Zygote + export DoubleMM @@ -31,6 +38,7 @@ export extend_stacked_nrow, StackedArray #public Exp #julia 1.10 public: https://github.com/JuliaLang/julia/pull/55097 VERSION >= v"1.11.0-DEV.469" && eval(Meta.parse("public Exp")) +VERSION >= v"1.11.0-DEV.469" && eval(Meta.parse("public Logistic")) include("bijectors_utils.jl") export AbstractComponentArrayInterpreter, ComponentArrayInterpreter, @@ -42,10 +50,12 @@ include("ComponentArrayInterpreter.jl") export AbstractModelApplicator, construct_ChainsApplicator export construct_3layer_MLApplicator, select_ml_engine export NullModelApplicator, MagnitudeModelApplicator, NormalScalingModelApplicator +export RangeScalingModelApplicator include("ModelApplicator.jl") export AbstractPBMApplicator, NullPBMApplicator, PBMSiteApplicator, PBMPopulationApplicator -export DirectPBMApplicator +export DirectPBMApplicator, PBMPopulationGlobalApplicator +export create_nsite_applicator include("PBMApplicator.jl") # export AbstractGPUDataHandler, NullGPUDataHandler, get_default_GPUHandler @@ -64,7 +74,7 @@ export AbstractHybridProblem, get_hybridproblem_MLapplicator, get_hybridproblem_ get_hybridproblem_pbmpar_covars, gen_cov_pred, construct_dataloader_from_synthetic, - gdev_hybridproblem_dataloader, + gdev_hybridproblem_dataloader, gdev_hybridproblem_data, setup_PBMpar_interpreter, get_gdev_MP include("AbstractHybridProblem.jl") @@ -79,7 +89,7 @@ export HybridProblem export get_quantile_transformed include("HybridProblem.jl") -export gf, get_loss_gf +export gf, get_loss_gf, predict_point_hvi #export map_f_each_site include("gf.jl") @@ -92,6 +102,7 @@ include("util_opt.jl") export cpu_ca, apply_preserve_axes include("util_ca.jl") +export ones_similar_x include("util_gpu.jl") export neg_logden_indep_normal, entropy_MvNormal @@ -112,4 +123,7 @@ include("HybridSolver.jl") export DoubleMM include("DoubleMM/DoubleMM.jl") +export RRuleMonitor +include("RRuleMonitor.jl") + end diff --git a/src/ModelApplicator.jl b/src/ModelApplicator.jl index 4d08fc0..d0077cc 100644 --- a/src/ModelApplicator.jl +++ b/src/ModelApplicator.jl @@ -1,5 +1,5 @@ """ - AbstractModelApplicator(x, ϕ) + AbstractModelApplicator(x, ϕ; is_testmode = false) Abstraction of applying a machine learning model at covariate matrix, `x`, using parameters, `ϕ`. It returns a matrix of predictions with the same @@ -20,7 +20,7 @@ abstract type AbstractModelApplicator end function apply_model end -(app::AbstractModelApplicator)(x, ϕ) = apply_model(app, x, ϕ) +(app::AbstractModelApplicator)(x, ϕ; kwargs...) = apply_model(app, x, ϕ; kwargs...) """ @@ -30,7 +30,7 @@ Model applicator that returns its inputs. Used for testing. """ struct NullModelApplicator <: AbstractModelApplicator end -function apply_model(app::NullModelApplicator, x, ϕ) +function apply_model(app::NullModelApplicator, x, ϕ; kwargs...) return x end @@ -102,13 +102,15 @@ struct MagnitudeModelApplicator{M,A} <: AbstractModelApplicator multiplier::M end -function apply_model(app::MagnitudeModelApplicator, x, ϕ) +function apply_model(app::MagnitudeModelApplicator, x, ϕ; kwargs...) #@show size(x), size(ϕ), app.multiplier @assert eltype(app.multiplier) == eltype(ϕ) - apply_model(app.app, x, ϕ) .* app.multiplier + apply_model(app.app, x, ϕ; kwargs...) .* app.multiplier end + + """ NormalScalingModelApplicator(app, μ, σ) NormalScalingModelApplicator(app, priors, transM) @@ -158,14 +160,54 @@ function NormalScalingModelApplicator( NormalScalingModelApplicator(app, μ, σ) end -function apply_model(app::NormalScalingModelApplicator, x, ϕ) - y_perc = apply_model(app.app, x, ϕ) +function apply_model(app::NormalScalingModelApplicator, x, ϕ; kwargs...) + y_perc = apply_model(app.app, x, ϕ; kwargs...) # @show typeof(app.μ) # @show typeof(ϕ) @assert eltype(app.μ) == eltype(ϕ) - norminvcdf.(app.μ, app.σ, y_perc) # from StatsFuns + ans = norminvcdf.(app.μ, app.σ, y_perc) # from StatsFuns + # if !all(isfinite.(ans)) + # @info "NormalScalingModelApplicator.apply_model: encountered non-finite results" + # #@show ans, y_perc, app.μ, app.σ + # #@show app.app, x, ϕ + # #error("error to print stacktrace") + # end + ans +end + +""" + RangeScalingModelApplicator(app, y0) + +Wrapper around AbstractModelApplicator assumed to predict on (0,1) with +a linear mappting to prededfined range. +""" +struct RangeScalingModelApplicator{VF,A} <: AbstractModelApplicator + offset::VF + width::VF + app::A end +function apply_model(app::RangeScalingModelApplicator, x, ϕ; kwargs...) + res0 = apply_model(app.app, x, ϕ; kwargs...) + res = res0 .* app.width .+ app.offset +end + +""" + RangeScalingModelApplicator(app, lowers, uppers, FT::Type; repeat_inner::Integer = 1) + +Provide the target ragen by vectors `lower` and `upper`. The size of both +outputs must correspond to the size of the output of `app`. +""" +function RangeScalingModelApplicator( + app::AbstractModelApplicator, + lowers::VT, uppers::VT, + FT::Type) where VT<:AbstractVector + width = collect(FT, uppers .- lowers) + lowersFT = collect(FT, lowers) # convert eltype + RangeScalingModelApplicator(lowersFT, width, app) +end + + diff --git a/src/PBMApplicator.jl b/src/PBMApplicator.jl index c354d02..9543aee 100644 --- a/src/PBMApplicator.jl +++ b/src/PBMApplicator.jl @@ -28,6 +28,12 @@ function (app::AbstractPBMApplicator)(θP::AbstractArray, θMs::AbstractArray, x apply_model(app, θP, θMs, xP) end +""" +create_nsite_applicator(app::AbstractPBMApplicator, n_site) +""" +function create_nsite_applicator end + + """ apply_model(app::AbstractPBMApplicator, θsP::AbstractVector, θsMs::AbstractMatrix, xP::AbstractMatrix) apply_model(app::AbstractPBMApplicator, θsP::AbstractMatrix, θsMs::AbstractArray{ET,3}, xP) @@ -90,6 +96,8 @@ function apply_model(app::NullPBMApplicator, θP::AbstractVector, θMs::Abstract return CA.getdata(θMs) end +create_nsite_applicator(app::NullPBMApplicator, n_site) = app + """ DirectPBMApplicator() @@ -103,6 +111,8 @@ end function apply_model(app::DirectPBMApplicator, θP::AbstractVector, θMs::AbstractMatrix, xP) return app.f(θP, θMs, xP) end +create_nsite_applicator(app::DirectPBMApplicator, n_site) = app + @@ -114,7 +124,7 @@ struct PBMSiteApplicator{F, IT, IXT, VFT} <: AbstractPBMApplicator end """ - PBMSiteApplicator(fθ, n_batch; θP, θM, θFix, xPvec) + PBMSiteApplicator(fθ; θP, θM, θFix, xPvec) Construct AbstractPBMApplicator from process-based model `fθ` that computes predictions for a single site. @@ -137,19 +147,24 @@ function PBMSiteApplicator(fθ; ) intθ1 = get_concrete(ComponentArrayInterpreter(flatten1(CA.ComponentVector(; θP, θM, θFix)))) int_xPsite = get_concrete(ComponentArrayInterpreter(xPvec)) - PBMSiteApplicator(fθ, intθ1, int_xPsite, CA.getdata(θFix)) + PBMSiteApplicator(fθ, intθ1, int_xPsite, θFix) +end + +function create_nsite_applicator(app::PBMSiteApplicator, n_site) + PBMSiteApplicator(app.fθ, app.intθ1, app.int_xPsite, app.θFix) end + function apply_model(app::PBMSiteApplicator, θP::AbstractVector, θMs::AbstractMatrix, xP) function apply_PBMsite(θM, xP1) if (CA.getdata(θP) isa GPUArraysCore.AbstractGPUArray) && - (!(app.θFix isa GPUArraysCore.AbstractGPUArray) || + (!(CA.getdata(app.θFix) isa GPUArraysCore.AbstractGPUArray) || !(CA.getdata(θMs) isa GPUArraysCore.AbstractGPUArray)) error("concatenating GPUarrays with non-gpu arrays θFix or θMs. " * "May fmap PBMModelapplicators to gdev, " * "or compute PBMmodel on CPU") end - θ = vcat(CA.getdata(θP), CA.getdata(θM), app.θFix) + θ = vcat(CA.getdata(θP), CA.getdata(θM), CA.getdata(app.θFix)) θc = app.intθ1(θ); # show errors without ";" xPc = app.int_xPsite(xP1); ans = CA.getdata(app.fθ(θc, xPc)) @@ -189,10 +204,10 @@ end @functor PBMPopulationApplicator (θFixm, rep_fac) """ - PBMPopulationApplicator(fθpop, n_batch; θP, θM, θFix, xPvec) + PBMPopulationApplicator(fθpop, n_site; θP, θM, θFix, xPvec) Construct AbstractPBMApplicator from process-based model `fθ` that computes predictions -across sites for a population of size `n_batch`. +across sites for a population of size `n_site`. The applicator combines enclosed `θFix`, with provided `θMs` and `θP` to a `ComponentMatrix` with parameters with one row for each site, that can be column-indexed by Symbols. @@ -203,32 +218,48 @@ can be column-indexed by Symbols. - `θc`: parameters: `ComponentMatrix` (n_site x n_par) with each row a parameter vector - `xPc`: observations: `ComponentMatrix` (n_obs x n_site) with each column observationsfor one site -- `n_batch`: number of indiduals, i.e. rows in `θMs` +- `n_site`: number of indiduals, i.e. rows in `θMs` - `θP`: `ComponentVector` template of global process model parameters - `θM`: `ComponentVector` template of individual process model parameters - `θFix`: `ComponentVector` of actual fixed process model parameters - `xPvec`: `ComponentVector` template of model drivers for a single site """ -function PBMPopulationApplicator(fθpop, n_batch; +function PBMPopulationApplicator(fθpop, n_site; θP::CA.ComponentVector, θM::CA.ComponentVector, θFix::CA.ComponentVector, xPvec::CA.ComponentVector ) intθvec = ComponentArrayInterpreter(flatten1(CA.ComponentVector(; θP, θM, θFix))) int_xP_vec = ComponentArrayInterpreter(xPvec) - isFix = repeat(axes(θFix, 1)', n_batch) + isFix = repeat(axes(θFix, 1)', n_site) # - intθ = get_concrete(ComponentArrayInterpreter((n_batch,), intθvec)) - int_xP = get_concrete(ComponentArrayInterpreter(int_xP_vec, (n_batch,))) - #isP = repeat(axes(θP, 1)', n_batch) + intθ = get_concrete(ComponentArrayInterpreter((n_site,), intθvec)) + int_xP = get_concrete(ComponentArrayInterpreter(int_xP_vec, (n_site,))) + #isP = repeat(axes(θP, 1)', n_site) # n_site = size(θMs, 1) - rep_fac = ones_similar_x(θP, n_batch) # to reshape into matrix, avoiding repeat - θFixm = CA.getdata(θFix[isFix]) + rep_fac = ones_similar_x(θP, n_site) # to reshape into matrix, avoiding repeat + θFixm = CA.ComponentMatrix(θFix[isFix], (CA.FlatAxis(), CA.getaxes(θFix)[1])) PBMPopulationApplicator(fθpop, θFixm, rep_fac, intθ, int_xP) end +function create_nsite_applicator(app::PBMPopulationApplicator, n_site) + θFix = app.θFixm[1,:] + isFix = repeat(axes(θFix, 1)', n_site) + θFixm = if length(θFix) == 0 + CA.ComponentMatrix(θFix[isFix], (CA.FlatAxis(), CA.FlatAxis())) + else + CA.ComponentMatrix(θFix[isFix], (CA.FlatAxis(), CA.getaxes(θFix)[1])) + end + # + intθ = get_concrete(ComponentArrayInterpreter((n_site,), (CA.getaxes(app.intθ)[2],),())) + int_xP = get_concrete(ComponentArrayInterpreter( + (), (CA.getaxes(app.int_xP)[1],), (n_site,))) + rep_fac = ones_similar_x(θFix, n_site) # to reshape into matrix, avoiding repeat + PBMPopulationApplicator(app.fθpop, θFixm, rep_fac, intθ, int_xP) +end + function apply_model(app::PBMPopulationApplicator, θP::AbstractVector, θMs::AbstractMatrix, xP) if (CA.getdata(θP) isa GPUArraysCore.AbstractGPUArray) && - (!(app.θFixm isa GPUArraysCore.AbstractGPUArray) || + (!(CA.getdata(app.θFixm) isa GPUArraysCore.AbstractGPUArray) || !(CA.getdata(θMs) isa GPUArraysCore.AbstractGPUArray)) error("concatenating GPUarrays with non-gpu arrays θFixm or θMs. " * "May transfer PBMPopulationApplicator to gdev, " * @@ -241,7 +272,7 @@ function apply_model(app::PBMPopulationApplicator, θP::AbstractVector, θMs::Ab #@benchmark CA.getdata(θP[app.isP]) #@benchmark CA.getdata(repeat(θP', size(θMs,1))) #@benchmark rep_fac .* CA.getdata(θP)' # - local θ = hcat(app.rep_fac .* CA.getdata(θP)', CA.getdata(θMs), app.θFixm) + local θ = hcat(app.rep_fac .* CA.getdata(θP)', CA.getdata(θMs), CA.getdata(app.θFixm)) #local θ = hcat(CA.getdata(θP[app.isP]), CA.getdata(θMs), app.θFixm) #local θ = hcat(CA.getdata(repeat(θP', size(θMs,1))), CA.getdata(θMs), app.θFixm) local θc = app.intθ(CA.getdata(θ)) @@ -250,3 +281,77 @@ function apply_model(app::PBMPopulationApplicator, θP::AbstractVector, θMs::Ab return pred_sites end +struct PBMPopulationGlobalApplicator{MFT, IsT, IgT, IXT, F} <: AbstractPBMApplicator + fθpop::F + θFix::MFT # may be CuVector rather than Vector + intθs::IsT + intθg::IgT + int_xP::IXT +end + +@functor PBMPopulationGlobalApplicator + + +""" + PBMPopulationGlobalApplicator(fθpop, n_site; θP, θM, θFix, xPvec) + +Construct AbstractPBMApplicator from process-based model `fθ` that computes predictions +across sites for a population of size `n_site`. +The applicator combines enclosed `θFix`, with provided `θMs` and `θP` +to a `ComponentMatrix` with parameters with one row for each site, that +can be column-indexed by Symbols. + +## Arguments +- `fθpop`: process model, process model `f(θsc, θgc, xPc)`, which is not + agnostic of the partitioning of parameters into fixed, global, and individual + to increase performance + - `θc`: parameters: `ComponentMatrix` (n_site x n_par_site) with each row a parameter vector + - `θgc`: parameters: `ComponentVector` (n_par_global) + - `xPc`: observations: `ComponentMatrix` (n_obs x n_site) with each column + observationsfor one site +- `n_site`: number of indiduals, i.e. rows in `θMs` +- `θP`: `ComponentVector` template of global process model parameters +- `θM`: `ComponentVector` template of individual process model parameters +- `θFix`: `ComponentVector` of actual fixed process model parameters +- `xPvec`: `ComponentVector` template of model drivers for a single site +""" +function PBMPopulationGlobalApplicator(fθpop, n_site; + θP::CA.ComponentVector, θM::CA.ComponentVector, θFix::CA.ComponentVector, + xPvec::CA.ComponentVector + ) + #intθvec = ComponentArrayInterpreter(flatten1(CA.ComponentVector(; θP, θM, θFix))) + int_xP_vec = ComponentArrayInterpreter(xPvec) + intθs = get_concrete(ComponentArrayInterpreter((n_site,), θM)) + intθg = get_concrete(ComponentArrayInterpreter(vcat(θP, θFix))) + int_xP = get_concrete(ComponentArrayInterpreter(int_xP_vec, (n_site,))) + PBMPopulationGlobalApplicator(fθpop, θFix, intθs, intθg, int_xP) +end + +function create_nsite_applicator(app::PBMPopulationGlobalApplicator, n_site) + @info("called PBMPopulationGlobalApplicator.create_nsite_applicator") + intθs = get_concrete(ComponentArrayInterpreter((n_site,), (CA.getaxes(app.intθs)[2],),())) + int_xP = get_concrete(ComponentArrayInterpreter( + (), (CA.getaxes(app.int_xP)[1],), (n_site,))) + PBMPopulationGlobalApplicator(app.fθpop, app.θFix, intθs, app.intθg, int_xP) +end + + +function apply_model(app::PBMPopulationGlobalApplicator, θP::AbstractVector, θMs::AbstractMatrix, xP) + if (CA.getdata(θP) isa GPUArraysCore.AbstractGPUArray) && + (!(CA.getdata(app.θFix) isa GPUArraysCore.AbstractGPUArray) || + !(CA.getdata(θMs) isa GPUArraysCore.AbstractGPUArray)) + error("concatenating GPUarrays with non-gpu arrays θFixm or θMs. " * + "May transfer PBMPopulationGlobalApplicator to gdev, " * + "or compute PBM on CPU.") + end + local θs = CA.getdata(θMs) + local θg = vcat(CA.getdata(θP), CA.getdata(app.θFix)) + local θsc = app.intθs(CA.getdata(θs)) + local θgc = app.intθg(CA.getdata(θg)) + local xPc = app.int_xP(CA.getdata(xP)) + local pred_sites = app.fθpop(θsc, θgc, xPc) + return pred_sites +end + + + diff --git a/src/RRuleMonitor.jl b/src/RRuleMonitor.jl new file mode 100644 index 0000000..88a73ea --- /dev/null +++ b/src/RRuleMonitor.jl @@ -0,0 +1,118 @@ +""" + RRuleMonitor(label, f, [ad_backend::ADTypes.AbstractADType]) + +Identity mapping of Callable or function `f` that intervenes the the pullback +and raises an error if the supplied cotangent or the jacobian +contains non-finitie entries. + +Arguments +- label: id (String, or symbol) used in the error message. +- `ad_backend`: the AD backend used in `DifferentiationInterface.jacobian`. + Defaults to `AutoZygote().` +""" +struct RRuleMonitor{F,L,A} <: AbstractModelApplicator + label::L + f::F + ad_backend::A + #TODO think of preparing: https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/stable/tutorials/basic/#Preparing-for-multiple-gradients +end + +RRuleMonitor(label, f) = RRuleMonitor(label, f, DI.AutoZygote()) +#RRuleMonitor(label, f) = RRuleMonitor{typeof(label), typeof(f), DI.AutoZygote}(label, f, DI.AutoZygote()) + +@functor RRuleMonitor + +function (m::RRuleMonitor)(args...; kwargs...) + apply_RRuleMonitor(m, args...; kwargs...) +end + +function apply_RRuleMonitor(m::RRuleMonitor, args...; kwargs...) + m.f(args...; kwargs...) +end + +# AbstractModelApplicator +apply_model(m::RRuleMonitor, x, ϕ; kwargs...) = m.f(x, ϕ; kwargs...) + +function ChainRulesCore.rrule(::typeof(apply_RRuleMonitor), m::RRuleMonitor, args...; kwargs...) + function apply_RRuleMonitor_pullback(Δy) + # if m.label == "transP" + # @show Δy[:] + # end + if !all(isfinite.(Δy[:])) + @info "apply_RRuleMonitor_pullback: encountered non-finite co-gradients Δy " * + "for RRuleMonitor $(m.label)" + #@show Δy[:] + #Main.@infiltrate_main + error("traceback") + end + # do not call apply_RRuleMonitor because that results in infinite recursion + # for backends other than AutoZygote need to call jacobian for single argument function + jacs = if m.ad_backend isa DI.AutoZygote + function ftmp(args_...) + m.f(args_...; kwargs...) + end + jacs = Zygote.jacobian(ftmp, args...) + else + Tuple(begin + fxi = (x) -> m.f(args[1:(i-1)]..., x, args[i+1:end]...; kwargs...) + DI.jacobian(fxi, m.ad_backend, args[i]) + end for (i,x) in enumerate(args)) + end + for (i,jac) in enumerate(jacs) + if !all(isfinite.(jac)) + #@show jac + @info("apply_RRuleMonitor_pullback: encountered non-finite Jacobian " * + "for argument $(i) in RRuleMonitor $(m.label)") + #Main.@infiltrate_main + error("traceback") + end + end + projectors = (ProjectTo(arg) for arg in args) + # if m.label == "f in gf" && Main.cnt_SteadySOCPools >= 1118 + # Main.@infiltrate_main + # end + #(pr,jac,x) = first(zip(projectors,jacs,args)) + Δx = (@thunk(pr(reshape(jac' * vec(Δy), size(x)))) for (pr,jac,x) in zip( + projectors,jacs,args)) + # Δx = Tuple(begin + # if size(jac',2) != size(Δy,1) + # Main.@infiltrate_main + # end + # Δxi = jac' * Δy[:] + # Δxip = pr(reshape(Δxi, size(x))) + # end for (pr,jac,x) in zip(projectors,jacs,args)) + (NoTangent(), NoTangent(), Δx...) + end + return apply_RRuleMonitor(m, args...; kwargs...), apply_RRuleMonitor_pullback +end + +# # with DI support only functions of one argument +# function ChainRulesCore.rrule(::typeof(apply_RRuleMonitor), m::RRuleMonitor, x; kwargs...) +# function apply_RRuleMonitor_pullback(Δy) +# if !all(isfinite.(Δy)) +# @info "apply_RRuleMonitor_pullback: encountered non-finite co-gradients Δy " * +# "for RRuleMonitor $(m.label)" +# #@show Δy[:] +# #Main.@infiltrate_main +# error("traceback") +# end +# # do not call apply_RRuleMonitor because that results in infinite recursion +# ftmp = (x_) -> m.f(x_; kwargs...) +# jac = DI.jacobian(ftmp, m.ad_backend, x) +# if !all(isfinite.(jac)) +# #@show jac +# @info("apply_RRuleMonitor_pullback: encountered non-finite Jacobian " * +# "in RRuleMonitor $(m.label)") +# #Main.@infiltrate_main +# error("traceback") +# end +# pr = ProjectTo(x) +# if m.label == "f in gf" +# #Main.@infiltrate_main +# end +# Δx = @thunk(pr(reshape(jac' * vec(Δy), size(x)))) +# (NoTangent(), NoTangent(), Δx) +# end +# return apply_RRuleMonitor(m, args...; kwargs...), apply_RRuleMonitor_pullback +# end + diff --git a/src/bijectors_utils.jl b/src/bijectors_utils.jl index 9061ebb..20d437b 100644 --- a/src/bijectors_utils.jl +++ b/src/bijectors_utils.jl @@ -27,6 +27,35 @@ end # end Bijectors.is_monotonically_increasing(::Exp) = true +#----------------------- Logistic +""" + Logistic() + +A bijector that applies broadcasted exponential function, i.e. `logit.(x)`. +It is equivalent to `elementwise(exp)` but works better with automatic +differentiation on GPU. +""" +struct Logistic <: Bijector +end + +#Functors.@functor Logistic +Bijectors.transform(b::Logistic, x) = logistic.(x) # note the broadcast +Bijectors.transform(ib::Inverse{<:Logistic}, y) = logit.(y) + +# `logabsdetjac` +# https://en.wikipedia.org/wiki/Logistic_function#Derivative +Bijectors.logabsdetjac(b::Logistic, x) = sum(loglogistic.(x) + log1mlogistic.(x)) + +`with_logabsdet_jacobian` +function Bijectors.with_logabsdet_jacobian(b::Logistic, x) + return transform(b,x), logabsdetjac(b,x) +end +# function Bijectors.with_logabsdet_jacobian(ib::Inverse{<:Logistic}, y) +# x = transform(ib, y) +# return x, -logabsdetjac(inverse(ib), x) +# end +Bijectors.is_monotonically_increasing(::Logistic) = true + """ StackedArray(stacked, nrow) diff --git a/src/cholesky.jl b/src/cholesky.jl index 24fd444..7784ad2 100644 --- a/src/cholesky.jl +++ b/src/cholesky.jl @@ -147,7 +147,7 @@ This can be used to fit parameters that yield an upper Cholesky-Factor of a Covariance matrix. It uses the upper triangular matrix rather than the lower because it -involes a sum across columns, whereas the alternative of a lower triangular +involves a sum across columns, whereas the alternative of a lower triangular uses sum across rows. Sum across columns is often faster, because entries of columns are contiguous. """ diff --git a/src/elbo.jl b/src/elbo.jl index cc26bec..9f6a461 100644 --- a/src/elbo.jl +++ b/src/elbo.jl @@ -51,10 +51,11 @@ function neg_elbo_gtf_components(rng, ϕ::AbstractVector{FT}, g, f, py, trans_mMs =StackedArray(transMs.stacked, n_MC), priorsP, priorsM, floss_penalty = zero_penalty_loss, + is_testmode, ) where {FT} n_MCr = isempty(priors_θP_mean) ? n_MC : max(n_MC, n_MC_mean) ζsP, ζsMs, σ = generate_ζ(rng, g, ϕ, xM; n_MC=n_MCr, cor_ends, pbm_covar_indices, - int_unc, int_μP_ϕg_unc) + int_unc, int_μP_ϕg_unc, is_testmode) ζsP_cpu = cdev(ζsP) # fetch to CPU, because for <1000 sites (n_batch) this is faster ζsMs_cpu = cdev(ζsMs) # fetch to CPU, because for <1000 sites (n_batch) this is faster # @@ -194,22 +195,21 @@ function zero_penalty_loss( end - """ - predict_hvi([rng], predict_hvi(rng, prob::AbstractHybridProblem) + predict_hvi([rng], prob::AbstractHybridProblem) Prediction function for hybrid variational inference parameter model. ## Arguments -- The problem for which to predict -- xM: covariates for the machine-learning model (ML): Matrix (n_θM x n_site_pred). -- xP: model drivers for process based model (PBM): Matrix with (n_site_pred) rows. - If provided a ComponentArray with a Tuple-Axis in rows, the PBM model can - access parts of it, e.g. `xP[:S1,...]`. +- `prob`: The problem for which to predict ## Keyword arguments - `scenario` - `n_sample_pred` +- `xM`: covariates for the machine-learning model (ML): Matrix (n_θM x n_site_pred). + Possibility to override the default from `get_hybridproblem_train_dataloader`. +- `xP`: model drivers for process based model (PBM): Matrix with (n_site_pred) rows. + Possibility to override the default from `get_hybridproblem_train_dataloader`. Returns an NamedTuple `(; y, θsP, θsMs, entropy_ζ)` with entries - `y`: Array `(n_obs, n_site, n_sample_pred)` of model predictions. @@ -222,20 +222,27 @@ Returns an NamedTuple `(; y, θsP, θsMs, entropy_ζ)` with entries """ function predict_hvi(rng, prob::AbstractHybridProblem; scenario=Val(()), gdevs = get_gdev_MP(scenario), + xM = nothing, xP = nothing, + is_testmode = true, kwargs... ) - dl = get_hybridproblem_train_dataloader(prob; scenario) - dl_dev = gdev_hybridproblem_dataloader(dl; gdevs) - xM, xP = dl_dev.data[1:2] - (; θsP, θsMs, entropy_ζ) = sample_posterior(rng, prob, xM; scenario, gdevs, kwargs...) + if isnothing(xM) || isnothing(xP) + dl = get_hybridproblem_train_dataloader(prob; scenario) + dl_dev = gdev_hybridproblem_dataloader(dl; gdevs) + xM_dl, xP_dl = dl_dev.data[1:2] + xM = isnothing(xM) ? xM_dl : xM + xP = isnothing(xP) ? xP_dl : xP + end + (; θsP, θsMs, entropy_ζ) = sample_posterior( + rng, prob, xM; scenario, gdevs, is_testmode, kwargs...) # n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) - n_site_pred = size(θsMs,1) - is_predict_batch = (n_site_pred == n_batch) + n_site_pred = size(θsMs,1) # determined by size(xM) @assert size(xP, 2) == n_site_pred - f = get_hybridproblem_PBmodel(prob; scenario, use_all_sites=!is_predict_batch) + f_batch = get_hybridproblem_PBmodel(prob; scenario) + f = n_site_pred == n_batch ? f_batch : create_nsite_applicator(f_batch, n_site_pred) if gdevs.gdev_P isa MLDataDevices.AbstractGPUDevice - f_dev = fmap(gdevs.gdev_P, f) + f_dev = gdevs.gdev_P(f)#fmap(gdevs.gdev_P, f) else f_dev = f end @@ -323,11 +330,11 @@ function sample_posterior(rng, g, ϕ::AbstractVector, xM::AbstractMatrix; cor_ends, pbm_covar_indices, is_inferred::Val{is_infer} = Val(false), - kwargs... + is_testmode, ) where is_infer ζsP_gpu, ζsMs_gpu, σ = generate_ζ(rng, g, CA.getdata(ϕ), CA.getdata(xM); int_μP_ϕg_unc, int_unc, - n_MC=n_sample_pred, cor_ends, pbm_covar_indices) + n_MC=n_sample_pred, cor_ends, pbm_covar_indices, is_testmode) ζsP = cdev(ζsP_gpu) ζsMs = cdev(ζsMs_gpu) logdetΣ = 2 * sum(log.(σ)) @@ -354,7 +361,9 @@ each MC sample and then transforming each parameter on block across sites. function generate_ζ(rng, g, ϕ::AbstractVector{FT}, xM::MT; int_μP_ϕg_unc::AbstractComponentArrayInterpreter, int_unc::AbstractComponentArrayInterpreter, - n_MC=3, cor_ends, pbm_covar_indices) where {FT,MT} + n_MC=3, cor_ends, pbm_covar_indices, + is_testmode, + ) where {FT,MT} # see documentation of neg_elbo_gtf ϕc = int_μP_ϕg_unc(CA.getdata(ϕ)) μ_ζP = CA.getdata(ϕc.μP) @@ -364,7 +373,7 @@ function generate_ζ(rng, g, ϕ::AbstractVector{FT}, xM::MT; xMP0 = _append_each_covars(xM, CA.getdata(μ_ζP), pbm_covar_indices) #Main.@infiltrate_main - μ_ζMs0 = g(xMP0, ϕg) + μ_ζMs0 = g(xMP0, ϕg; is_testmode) ζP_resids, ζMs_parfirst_resids, σ = sample_ζresid_norm(rng, μ_ζP, μ_ζMs0, ϕc.unc; n_MC, cor_ends, int_unc) if pbm_covar_indices isa SA.SVector{0} # do not need to predict again but just add the residuals to μ_ζP and μ_ζMs @@ -561,10 +570,11 @@ function _create_blockdiag( tmp = vcat(Uσ) end +# TODO replace by KA.rand when it becomes available, see ones_similar +# https://github.com/JuliaGPU/KernelAbstractions.jl/issues/488 function _create_randn(rng, ::AbstractVector{T}, dims...) where {T} randn(rng, T, dims...) end - #moved to HybridVariationalInferenceCUDAExt #function _create_randn(rng, ::CUDA.CuVector{T}, dims...) where {T} diff --git a/src/gf.jl b/src/gf.jl index 30b5a76..5578bef 100644 --- a/src/gf.jl +++ b/src/gf.jl @@ -49,28 +49,77 @@ # #map_f_each_site(f_double, θMs_true, stack(Iterators.repeated(CA.getdata(θP_true), size(θMs_true,2)))) """ -composition f ∘ transM ∘ g: mechanistic model after machine learning parameter prediction + predict_point_hvi([rng], prob::AbstractHybridProblem) + +Prediction function for hybrid variational inference parameter model that omits +the sampling step but returns the prediction at the mean in unconstrained space. + +## Arguments +- `prob`: The problem for which to predict + +## Keyword arguments +- `scenario` +- `gdevs` +- `xM`: covariates for the machine-learning model (ML): Matrix (n_θM x n_site_pred). + Possibility to override the default from `get_hybridproblem_train_dataloader`. +- `xP`: model drivers for process based model (PBM): Matrix with (n_site_pred) rows. + Possibility to override the default from `get_hybridproblem_train_dataloader`. + +Returns an NamedTuple `(; y, θMs, θP)` with entries +- `y`: Matrix `(n_obs, n_site)` of model predictions. +- `θP`: ComponentVector of PBM model parameters + that are kept constant across sites. +- `θMs`: ComponentMatrix `(n_site, n_θM)` of PBM model parameters + that vary by site. """ -function gf(prob::AbstractHybridProblem; scenario = Val(()), kwargs...) - train_loader = get_hybridproblem_train_dataloader(prob; scenario) - train_loader_dev = gdev_hybridproblem_dataloader(train_loader; scenario) - xM, xP = train_loader_dev.data[1:2] - gf(prob, xM, xP; scenario, kwargs...) +function predict_point_hvi(rng, prob::AbstractHybridProblem; scenario=Val(()), + gdevs = get_gdev_MP(scenario), + xM = nothing, xP = nothing, + is_testmode = true, + kwargs... + ) + if isnothing(xM) || isnothing(xP) + dl = get_hybridproblem_train_dataloader(prob; scenario) + dl_dev = gdev_hybridproblem_dataloader(dl; gdevs) + xM_dl, xP_dl = dl_dev.data[1:2] + xM = isnothing(xM) ? xM_dl : xM + xP = isnothing(xP) ? xP_dl : xP + end + y_pred, θMs, θP = gf(prob, xM, xP; scenario, gdevs, is_testmode, kwargs...) + θPc = ComponentArrayInterpreter(prob.θP)(θP) + θMsc = ComponentArrayInterpreter((size(θMs,1),), prob.θM)(θMs) + (;y_pred, θMs=θMsc, θP=θPc) end + + + +# function gf(prob::AbstractHybridProblem; scenario = Val(()), kwargs...) +# train_loader = get_hybridproblem_train_dataloader(prob; scenario) +# train_loader_dev = gdev_hybridproblem_dataloader(train_loader; scenario) +# xM, xP = train_loader_dev.data[1:2] +# gf(prob, xM, xP; scenario, kwargs...) +# end +""" +composition f ∘ transM ∘ g: mechanistic model after machine learning parameter prediction +""" function gf(prob::AbstractHybridProblem, xM::AbstractMatrix, xP::AbstractMatrix; scenario = Val(()), - gdev = :use_gpu ∈ _val_value(scenario) ? gpu_device() : identity, - cdev = gdev isa MLDataDevices.AbstractGPUDevice ? cpu_device() : identity, + gdevs = nothing, #get_gdev_MP(scenario), is_inferred::Val{is_infer} = Val(false), kwargs... ) where is_infer + gdevs = isnothing(gdevs) ? get_gdev_MP(scenario) : gdevs g, ϕg = get_hybridproblem_MLapplicator(prob; scenario) n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) - is_predict_batch = (n_batch == size(xP,2)) - n_site_pred = is_predict_batch ? n_batch : n_site - @assert size(xP, 2) == n_site_pred + n_site_pred = size(xP,2) @assert size(xM, 2) == n_site_pred - f = get_hybridproblem_PBmodel(prob; scenario, use_all_sites = !is_predict_batch) + f_batch = get_hybridproblem_PBmodel(prob; scenario) + f = (n_site_pred == n_batch) ? f : create_nsite_applicator(f_batch, n_site_pred) + if gdevs.gdev_P isa MLDataDevices.AbstractGPUDevice + f_dev = gdevs.gdev_P(f) #fmap(gdevs.gdev_P, f) + else + f_dev = f + end (; θP, θM) = get_hybridproblem_par_templates(prob; scenario) (; transP, transM) = get_hybridproblem_transforms(prob; scenario) transMs = StackedArray(transM, n_site_pred) @@ -78,14 +127,15 @@ function gf(prob::AbstractHybridProblem, xM::AbstractMatrix, xP::AbstractMatrix; pbm_covars = get_hybridproblem_pbmpar_covars(prob; scenario) pbm_covar_indices = CA.getdata(intP(1:length(intP))[pbm_covars]) ζP = inverse(transP)(θP) - g_dev, ϕg_dev, ζP_dev = (gdev(g), gdev(ϕg), gdev(CA.getdata(ζP))) + gdev, cdev = gdevs.gdev_M, infer_cdev(gdevs) + g_dev, ϕg_dev, xM_dev, ζP_dev = gdev(g), gdev(ϕg), gdev(CA.getdata(xM)), gdev(CA.getdata(ζP)) # most of the properties of prob are not type-inferred # hence result is not type-inferred, but may test at this context res = is_infer ? Test.@inferred( gf( - g_dev, transMs, transP, f, xM, xP, ϕg_dev, ζP_dev, pbm_covar_indices; + g_dev, transMs, transP, f_dev, xM_dev, xP, ϕg_dev, ζP_dev, pbm_covar_indices; cdev, kwargs...)) : - gf(g_dev, transMs, transP, f, xM, xP, ϕg_dev, ζP_dev, pbm_covar_indices; + gf(g_dev, transMs, transP, f_dev, xM_dev, xP, ϕg_dev, ζP_dev, pbm_covar_indices; cdev, kwargs...) end @@ -100,7 +150,7 @@ end function gf(g::AbstractModelApplicator, transMs, transP, f, xM, xP, ϕg, ζP, pbm_covar_indices::AbstractVector{<:Integer}; - cdev) + cdev, is_testmode) # @show first(xM,5) # @show first(ϕg,5) @@ -110,10 +160,16 @@ function gf(g::AbstractModelApplicator, transMs, transP, f, xM, xP, ϕg, ζP, # end #xMP = _append_PBM_covars(xM, intP(ζP), pbm_covars) xMP = _append_each_covars(xM, CA.getdata(ζP), pbm_covar_indices) - θMs = gtrans(g, transMs, xMP, ϕg; cdev) + θMs = gtrans(g, transMs, xMP, ϕg; cdev, is_testmode) + # transPM = RRuleMonitor("transP", ζP -> transP(ζP)) + # θP = transPM(CA.getdata(ζP)) θP = transP(CA.getdata(ζP)) θP_cpu = cdev(θP) y_pred = f(θP_cpu, θMs, xP) + # fM = RRuleMonitor("f in gf", (θP_cpu) -> f(θP_cpu, θMs, xP), DI.AutoForwardDiff()) + # y_pred = fM(θP_cpu) + # fM = RRuleMonitor("f in gf", (θP_cpu, θMs) -> f(θP_cpu, θMs, xP)) + # y_pred = fM(θP_cpu, θMs) # very slow large JvP with θMs return y_pred, θMs, θP_cpu end @@ -121,17 +177,23 @@ end composition transM ∘ g: transformation after machine learning parameter prediction Provide a `transMs = StackedArray(transM, n_batch)` """ -function gtrans(g, transMs, xMP, ϕg; cdev) +function gtrans(g, transMs, xMP, ϕg; cdev, is_testmode) # TODO remove after removing gf # predict the log of the parameters - ζMst = g(xMP, ϕg) + ζMst = g(xMP, ϕg; is_testmode) ζMs = ζMst' ζMs_cpu = cdev(ζMs) θMs = transMs(ζMs_cpu) + if !all(isfinite.(θMs)) + @info "gtrans: encountered non-finite parameters" + #@show θMs, ζMs_cpu, transMs + #@show xMP, ϕg, is_testmode + #TODO save xMP, ϕg, is_testmode using JLD2 + end + θMs #θMs = reduce(hcat, map(transM, eachcol(ζMs_cpu))) # transform each row end - """ Create a loss function for given - g(x, ϕ): machine learning model @@ -168,18 +230,25 @@ function get_loss_gf(g, transM, transP, f, py, cdev=cpu_device(), pbm_covars, n_site_batch, priorsP, priorsM, floss_penalty = zero_penalty_loss, + is_omit_priors = false, kwargs...) let g = g, transM = transM, transP = transP, f = f, intϕ = get_concrete(intϕ), transMs = StackedArray(transM, n_site_batch), + is_omit_priors = is_omit_priors, cdev = cdev, pbm_covar_indices = CA.getdata(intP(1:length(intP))[pbm_covars]), - priorsP = priorsP, priorsM = priorsM, floss_penalty = floss_penalty + priorsP = priorsP, priorsM = priorsM, floss_penalty = floss_penalty, + cpu_dev = cpu_device() # real cpu, different form infer_cdev(gdevs) that maybe idenetity #, intP = get_concrete(intP) #inv_transP = inverse(transP), kwargs = kwargs - function loss_gf(ϕ, xM, xP, y_o, y_unc, i_sites) + function loss_gf(ϕ, xM, xP, y_o, y_unc, i_sites; is_testmode) ϕc = intϕ(ϕ) + # GPUArraysCore.allowscalar(() -> if !all(isfinite.(ϕ)) + # @show ϕc.ϕP + # error("invokded loss function loss_gf with non-finite parameters") + # end) # μ_ζP = ϕc.ϕP # xMP = _append_each_covars(xM, CA.getdata(μ_ζP), pbm_covar_indices) # ϕ_M = g(xMP, CA.getdata(ϕc.ϕg)) @@ -187,9 +256,14 @@ function get_loss_gf(g, transM, transP, f, py, # ζP_cpu = cdev(CA.getdata(μ_ζP)) # ζMs_cpu = cdev(CA.getdata(μ_ζMs)) # y_pred, _, _ = apply_f_trans(ζP_cpu, ζMs_cpu, f, xP; transM, transP) + if !all(isfinite.(ϕ)) + @info "loss_gf: encountered non-finite ϕ" + @show ϕc.ϕP + #Main.@infiltrate_main + end y_pred, θMs_pred, θP_pred = gf( g, transMs, transP, f, xM, xP, CA.getdata(ϕc.ϕg), CA.getdata(ϕc.ϕP), - pbm_covar_indices; cdev, kwargs...) + pbm_covar_indices; cdev, is_testmode, kwargs...) #σ = exp.(y_unc ./ 2) #nLy = sum(abs2, (y_pred .- y_o) ./ σ) nLy = py( y_pred, y_o, y_unc) @@ -198,10 +272,27 @@ function get_loss_gf(g, transM, transP, f, py, logpdf_tv = (prior, θ::AbstractVector) -> begin map(Base.Fix1(logpdf, prior), θ)::Vector{eltype(θP_pred)} end - neg_log_prior = -sum(logpdf_t.(priorsP, θP_pred)) - sum(map( - (priorMi, θMi) -> sum(logpdf_tv(priorMi, θMi)), priorsM, eachcol(θMs_pred))) + #Main.@infiltrate_main + #Maybe: move priors to GPU, for now need to move θ to cpu + # currently does not work on gpu, moving to dpu has problems with gradient + # θP_pred_cpu = cpu_dev(CA.getdata(θP_pred)) + # θMs_pred_cpu = cpu_dev(CA.getdata(θMs_pred)) + θP_pred_cpu = CA.getdata(θP_pred) + θMs_pred_cpu = CA.getdata(θMs_pred) + # TODO account for prior cost on global parameters after debug + neg_log_prior = is_omit_priors ? zero(nLy) : + -sum(logpdf_t.(priorsP, θP_pred_cpu)) + + -sum(map((priorMi, θMi) -> sum( + logpdf_tv(priorMi, θMi)), priorsM, eachcol(θMs_pred_cpu))) + #neg_log_prior = min(sqrt(floatmax(neg_log_prior0)), neg_log_prior0) + if !isfinite(neg_log_prior) + @info "loss_gf: encountered non-finite prior density" + @show θP_pred, θMs_pred, ϕc.ϕP + error("debug get_loss_gf") + end ϕunc = eltype(θP_pred)[] # no uncertainty parameters optimized loss_penalty = floss_penalty(y_pred, θMs_pred, θP_pred, ϕc.ϕg, ϕunc) + #@show nLy, neg_log_prior, loss_penalty nLjoint_pen = nLy + neg_log_prior + loss_penalty return (;nLjoint_pen, y_pred, θMs_pred, θP_pred, nLy, neg_log_prior, loss_penalty) end diff --git a/src/logden_normal.jl b/src/logden_normal.jl index d3cd814..262adea 100644 --- a/src/logden_normal.jl +++ b/src/logden_normal.jl @@ -24,11 +24,34 @@ function neg_logden_indep_normal(obs::AbstractArray, μ::AbstractArray, logσ2:: # optimize argument logσ2 rather than σs for performance #nlogL = sum(σfac .* (1/2) .* logσ2 .+ (1/2) .* exp.(.- logσ2) .* abs2.(obs .- μ)) # specifying logσ2 instead of σ is not transforming a random variable -> no Jacobian - obs_data = CA.getdata(obs) - μ_data = CA.getdata(μ) - nlogL = sum(σfac .* logσ2 .+ abs2.(obs_data .- μ_data) .* exp.(.-logσ2)) / convert(eltype(μ),2) + # + # obs_data = CA.getdata(obs) + # μ_data = CA.getdata(μ) + # nlogL = NaNMath.sum( # observations might by NaN for missing + # σfac .* logσ2 .+ abs2.(obs_data .- μ_data) .* exp.(.-logσ2)) / convert(eltype(μ),2) + # return (nlogL) + # + i_finobs = .! isnan.(obs) + obs_data = CA.getdata(obs)[i_finobs] + μ_data = CA.getdata(μ)[i_finobs] + logσ2_fin = logσ2[i_finobs] + nlogL = sum( # observations might by NaN for missing + σfac .* logσ2_fin .+ abs2.(obs_data .- μ_data) .* exp.(.-logσ2_fin)) / convert(eltype(μ),2) return (nlogL) end + +function neg_logden_indep_normal(obs::AbstractGPUArray, μ::AbstractGPUArray, logσ2::AbstractGPUArray{ET}; + σfac=one(ET)) where ET + #cannot use NaNMath.sum on gpu, allocate vectors of non-NAN + i_finobs = .! isnan.(obs) + obs_data = CA.getdata(obs)[i_finobs] + μ_data = CA.getdata(μ)[i_finobs] + logσ2_fin = logσ2[i_finobs] + nlogL = sum( # observations might by NaN for missing + σfac .* logσ2_fin .+ abs2.(obs_data .- μ_data) .* exp.(.-logσ2_fin)) / convert(eltype(μ),2) + return (nlogL) +end + # function neg_logden_indep_normal(obss::AbstractMatrix, preds::AbstractMatrix, logσ2::AbstractVector; kwargs...) # nlogLs = map(eachcol(obss), eachcol(preds)) do obs, μ # neg_logden_indep_normal(obs, μ, logσ2; kwargs...) diff --git a/src/util_ca.jl b/src/util_ca.jl index 224ce87..c378582 100644 --- a/src/util_ca.jl +++ b/src/util_ca.jl @@ -31,7 +31,7 @@ component information that might be present in the dimensions. function compose_axes(axtuples::NamedTuple) ls = map(axtuple -> Val(prod(axis_length.(axtuple))), axtuples) # to work on types, need to construct value types of intervals - intervals = _construct_invervals(;lengths=ls) + intervals = _construct_intervals(;lengths=ls) named_intervals = (;zip(keys(axtuples),intervals)...) axc = map(named_intervals, axtuples) do interval, axtuple ax = length(axtuple) == 1 ? axtuple[1] : CA.ShapedAxis(axis_length.(axtuple)) @@ -40,7 +40,7 @@ function compose_axes(axtuples::NamedTuple) CA.Axis(; axc...) end -function _construct_invervals(;lengths) +function _construct_intervals(;lengths) reduce((ranges,length) -> _add_interval(;ranges, length), Iterators.tail(lengths), init=(Val(1:_val_value(first(lengths))),)) end diff --git a/src/util_gpu.jl b/src/util_gpu.jl index 7253cbd..d4f2cba 100644 --- a/src/util_gpu.jl +++ b/src/util_gpu.jl @@ -12,11 +12,77 @@ function ones_similar_x(x::AbstractArray, size_ret = size(x)) Ones{eltype(x)}(size_ret) end +# TODO replace CUDA-method in extension after type stabilie is fixed +# https://github.com/JuliaGPU/KernelAbstractions.jl/issues/634 +# function ones_similar_x(x::GPUArraysCore.AbstractGPUArray, s = size(x)) +# backend = get_backend(x) +# ans = KernelAbstractions.ones(backend, eltype(x), s) +# #ans = ChainRulesCore.@ignore_derivatives KernelAbstractions.ones(backend, eltype(x), s) +# # https://juliagpu.github.io/KernelAbstractions.jl/stable/quickstart/#Synchronization +# # ChainRulesCore.@ignore_derivatives synchronize(backend) +# ans +# end + # handle containers and transformations of Arrays ones_similar_x(x::CA.ComponentArray, s = size(x)) = ones_similar_x(CA.getdata(x), s) ones_similar_x(x::LinearAlgebra.Adjoint, s = size(x)) = ones_similar_x(parent(x), s) ones_similar_x(x::SubArray, s = size(x)) = ones_similar_x(parent(x), s) +function repeat_rowvector_dummy(x::AbstractMatrix{T}, is_dummy::Union{BitVector,AbstractVector{Bool}}; + ones_vec = ones_similar_x(x, length(is_dummy)), + ) where T + ones_vec .* x .+ (is_dummy .* convert(T,NaN)) + #ones_vec .* x .+ (is_dummy) +end + +function ChainRulesCore.rrule(::typeof(repeat_rowvector_dummy), x, is_dummy::Union{BitVector,AbstractVector{Bool}}; kwargs...) + function repeat_rowvector_dummy_pullback(Δy) + # only sum the partials across non-dummy rows for each column + Δx = sum(Δy[.! is_dummy,:]; dims=1) + (NoTangent(), Δx, NoTangent()) + end + return repeat_rowvector_dummy(x, is_dummy; kwargs...), repeat_rowvector_dummy_pullback +end + +function repeat_rowvector_dummy(x::AbstractMatrix{T}, is_dummy::Union{BitMatrix,AbstractMatrix{Bool}}; + ) where T + x .+ (is_dummy .* convert(T,NaN)) +end + +function ChainRulesCore.rrule(::typeof(repeat_rowvector_dummy), + x, is_dummy::Union{BitMatrix,AbstractMatrix{Bool}}; kwargs...) + pullback = if !any(is_dummy) + # avoid mapping rows if there is no dummy + function repeat_rowvector_dummy_pullback_emptybitmatrix(Δy) + Δx = sum(Δy; dims=1) + #Main.@infiltrate_main + (NoTangent(), Δx, ZeroTangent()) + end + else + function repeat_rowvector_dummy_pullback_bitmatrix(Δy) + #@info "called rrule for repeat_rowvector_dummy with is_dummy matrix" + # only sum the partials across non-dummy rows for each column + #Δx = similar(x) # errors in copyto! need the same as xtvec + Δxt = similar(x') + # TODO think of avoiding allocation of temporary vector + # using generator or StaticArray results in scalar indexing + xtvec = map(eachcol(Δy[:,:]), eachcol(is_dummy)) do Δyi, is_dummi_i + sum(Δyi[.! is_dummi_i]) + end + # gen = (sum(Δyi[.! is_dummi_i]) for (Δyi, is_dummi_i) in zip(eachcol(Δy[:,:]), eachcol(is_dummy))) + #xtvec = @SArray[sum(Δyi[.! is_dummi_i]) for (Δyi, is_dummi_i) in zip(eachcol(Δy[:,:]), eachcol(is_dummy))] + # if !all(isfinite.(xtvec)) + # @info "repeat_rowvector_dummy_pullback_bitmatrix: encountered non-finite gradients" + # end + copyto!(Δxt, xtvec) + (NoTangent(), Δxt', ZeroTangent()) + end + end + return repeat_rowvector_dummy(x, is_dummy; kwargs...), pullback +end + + + diff --git a/test/Project.toml b/test/Project.toml index ab63301..0832140 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,12 +4,15 @@ Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" DistributionFits = "45214091-1ed4-4409-9bcf-fdb48a05e921" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" diff --git a/test/runtests.jl b/test/runtests.jl index 87bbd4b..99c5bbb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,8 @@ const GROUP = get(ENV, "GROUP", "All") # defined in in CI.yml @time begin if GROUP == "All" || GROUP == "Basic" + #@safetestset "test" include("test/test_RRuleMonitor.jl") + @time @safetestset "test_RRuleMonitor" include("test_RRuleMonitor.jl") #@safetestset "test" include("test/test_bijectors_utils.jl") @time @safetestset "test_bijectors_utils" include("test_bijectors_utils.jl") #@safetestset "test" include("test/test_util.jl") @@ -15,6 +17,8 @@ const GROUP = get(ENV, "GROUP", "All") # defined in in CI.yml @time @safetestset "test_ComponentArrayInterpreter" include("test_ComponentArrayInterpreter.jl") #@safetestset "test" include("test/test_hybridprobleminterpreters.jl") @time @safetestset "test_hybridprobleminterpreters" include("test_hybridprobleminterpreters.jl") + #@safetestset "test" include("test/test_PBMApplicator.jl") + @time @safetestset "test_PBMApplicator" include("test_PBMApplicator.jl") #@safetestset "test" include("test/test_ModelApplicator.jl") @time @safetestset "test_ModelApplicator" include("test_ModelApplicator.jl") #@safetestset "test" include("test/test_gencovar.jl") @@ -40,6 +44,8 @@ const GROUP = get(ENV, "GROUP", "All") # defined in in CI.yml # tests that need fixing (but should not be commented) at the end: #@safetestset "test" include("test/test_HybridProblem.jl") @time @safetestset "test_HybridProblem" include("test_HybridProblem.jl") + #@safetestset "test" include("test/test_missingdriver.jl") + @time @safetestset "test_missingdriver" include("test_missingdriver.jl") end end diff --git a/test/test_ComponentArrayInterpreter.jl b/test/test_ComponentArrayInterpreter.jl index bd9a765..f6dabf9 100644 --- a/test/test_ComponentArrayInterpreter.jl +++ b/test/test_ComponentArrayInterpreter.jl @@ -12,7 +12,7 @@ using Suppressor gdev = Suppressor.@suppress gpu_device() # not loaded CUDA cdev = cpu_device() -@testset "construct StaticComponentArrayInterepreter" begin +@testset "construct StaticComponentArrayInterpreter" begin intv = @inferred CP.StaticComponentArrayInterpreter(CA.ComponentVector(a=1:3, b=reshape(4:9,3,2))) ints = @inferred CP.StaticComponentArrayInterpreter((;a=Val(3), b = Val((3,2)))) # @descend_code_warntype CP.StaticComponentArrayInterpreter((;a=Val(3), b = Val((3,2)))) diff --git a/test/test_HybridProblem.jl b/test/test_HybridProblem.jl index c8b9c2b..c7bb04e 100644 --- a/test/test_HybridProblem.jl +++ b/test/test_HybridProblem.jl @@ -82,11 +82,11 @@ function construct_problem(; scenario::Val{scen}) where scen #g_chain_scaled = app #ϕunc0 = init_hybrid_ϕunc(cor_ends, zero(FT)) pbm_covars = (:covarK2 ∈ scen) ? (:K2,) : () - f_batch = f_sites = PBMSiteApplicator( + f_batch = PBMSiteApplicator( f_doubleMM; θP, θM, θFix=CA.ComponentVector{FT}(), xPvec=xP[:,1]) HybridProblem(θP, θM, g_chain_scaled, ϕg0, - f_batch, f_sites, priors_dict, py, + f_batch, priors_dict, py, transM, transP, train_dataloader, n_covar, n_site, n_batch, cor_ends, pbm_covars, #ϕunc0, @@ -140,7 +140,7 @@ test_without_flux = (scenario) -> begin n_site, n_batch = get_hybridproblem_n_site_and_batch(prob; scenario) train_loader = get_hybridproblem_train_dataloader(prob; scenario) (xM, xP, y_o, y_unc, i_sites) = first(train_loader) - f = get_hybridproblem_PBmodel(prob; scenario, use_all_sites = false) + f = get_hybridproblem_PBmodel(prob; scenario) py = get_hybridproblem_neg_logden_obs(prob; scenario) par_templates = get_hybridproblem_par_templates(prob; scenario) #f(par_templates.θP, hcat(par_templates.θM, par_templates.θM), xP[1:2]) @@ -156,19 +156,21 @@ test_without_flux = (scenario) -> begin # Pass the site-data for the batches as separate vectors wrapped in a tuple loss_gf = get_loss_gf(g, transM, transP, f, py, intϕ; - pbm_covars, n_site_batch = n_batch, priorsP, priorsM) + pbm_covars, n_site_batch = n_batch, priorsP, priorsM, + ) (_xM, _xP, _y_o, _y_unc, _i_sites) = first(train_loader) - l1 = loss_gf(p0, _xM, _xP, _y_o, _y_unc, _i_sites) + #l1 = loss_gf(p0, _xM, _xP, _y_o, _y_unc, _i_sites; is_testmode = false) l1 = @inferred ( # @descend_code_warntype ( - loss_gf(p0, _xM, _xP, _y_o, _y_unc, _i_sites)) + loss_gf(p0, _xM, _xP, _y_o, _y_unc, _i_sites; is_testmode = true)) tld = first(train_loader) - gr = Zygote.gradient(p -> loss_gf(p, tld...)[1], CA.getdata(p0)) + gr = Zygote.gradient(p -> loss_gf(p, tld...; is_testmode = false)[1], CA.getdata(p0)) @test gr[1] isa Vector () -> begin - optf = Optimization.OptimizationFunction((ϕ, data) -> loss_gf(ϕ, data...)[1], + optf = Optimization.OptimizationFunction( + (ϕ, data) -> loss_gf(ϕ, data...; is_testmode = false)[1], Optimization.AutoZygote()) optprob = OptimizationProblem(optf, p0, train_loader) @@ -198,7 +200,7 @@ gdev = gpu_device() test_with_flux = (scenario) -> begin prob = probc = construct_problem(;scenario); - @testset "HybridPointSolver $(last(CP._val_value(scenario)))" begin + @testset "HybridPointSolver + predict_point_hvi $(last(CP._val_value(scenario)))" begin rng = StableRNG(111) solver = HybridPointSolver(; alg=Adam(0.02)) (; ϕ, resopt, probo) = solve(prob, solver; scenario, rng, @@ -218,12 +220,15 @@ test_with_flux = (scenario) -> begin end)() @test θPo.r0 < 1.5 * θP.r0 @test ϕ.ϕP.K2 < 1.5 * log(θP.K2) + (;y_pred, θMs, θP) = predict_point_hvi(rng, probo; scenario) + _,_,y_obs,_ = get_hybridproblem_train_dataloader(prob; scenario).data + @test size(y_pred) == size(y_obs) end; - @testset "HybridPosteriorSolver $(last(CP._val_value(scenario)))" begin + @testset "HybridPosteriorSolver + predict_hvi $(last(CP._val_value(scenario)))" begin rng = StableRNG(111) solver = HybridPosteriorSolver(; alg=Adam(0.02), n_MC=3) - (; ϕ, θP, resopt) = solve(prob, solver; scenario, rng, + (; probo, ϕ, θP, resopt) = solve(prob, solver; scenario, rng, #callback = callback_loss(100), maxiters = 1200, #maxiters = 20 # too small so that it yields error #maxiters=37, # still complains "need to specify maxiters or epochs" @@ -237,6 +242,14 @@ test_with_flux = (scenario) -> begin @test exp(ϕ.μP.K2) == θP.K2 < 1.5 * θP.K2 θP prob.θP + n_sample_pred = 12 + (; y, θsP, θsMs, entropy_ζ) = predict_hvi(rng, probo; scenario, n_sample_pred); + _,_,y_obs,_ = get_hybridproblem_train_dataloader(prob; scenario).data + @test size(y) == (size(y_obs)..., n_sample_pred) + yc = cdev(y) + _ = map(eachslice(yc; dims = 3)) do ycs + @test all(isfinite.(ycs[isfinite.(y_obs)])) + end end; end # test_with flux @@ -321,8 +334,8 @@ test_with_flux_gpu = (scenario) -> begin # moved to solve and predict_hvi # probg = HybridProblem( # probg, - # f_batch = fmap(gdev, probg.f_batch), - # f_allsites = fmap(gdev, probg.f_allsites)) + # f_batch = gdev(probg.f_batch), + # f_allsites = gdev(probg.f_allsites)) #prob = CP.update(probg, transM = identity, transP = identity); solver = HybridPosteriorSolver(; alg=Adam(0.02), n_MC=3) n_site, n_batch = get_hybridproblem_n_site_and_batch(probg; scenario = scenf) diff --git a/test/test_ModelApplicator.jl b/test/test_ModelApplicator.jl index 3b80d62..70521e6 100644 --- a/test/test_ModelApplicator.jl +++ b/test/test_ModelApplicator.jl @@ -1,10 +1,14 @@ using Test using HybridVariationalInference +using HybridVariationalInference: HybridVariationalInference as HVI using ComponentArrays: ComponentArrays as CA using StatsFuns using Distributions using MLDataDevices, CUDA, cuDNN, GPUArraysCore +gdev = gpu_device() +cdev = cpu_device() + @testset "NullModelApplicator" begin g = NullModelApplicator() c1 = CA.ComponentVector(a = (a1 = 1, a2 = 2:3), b = 3:4) @@ -31,7 +35,6 @@ end; p = normcdf.(μ, σ, y) #hcat(r, p) @test p ≈ r - gdev = gpu_device() #cdev = cpu_device() if gdev isa MLDataDevices.AbstractGPUDevice g_gpu = g |> gdev @@ -41,3 +44,25 @@ end; @test y isa GPUArraysCore.AbstractGPUArray end end; + +@testset "RangeScalingModelApplicator" begin + app = NullModelApplicator() + r = logistic.(randn(Float32, 5)) # 0..1 + lowers = collect(exp.(1.0:5.0)) # different magnitudes + uppers = lowers .* 2 + g = RangeScalingModelApplicator(app, lowers, uppers, eltype(r)) + y = g(r, []) + width = uppers .- lowers + @test y ≈(r .* width .+ lowers) + @test eltype(y) == eltype(r) + #cdev = cpu_device() + if gdev isa MLDataDevices.AbstractGPUDevice + g_gpu = g |> gdev + @test g_gpu.offset isa GPUArraysCore.AbstractGPUArray + @test g_gpu.width isa GPUArraysCore.AbstractGPUArray + r_gpu = r |> gdev + y_dev = g_gpu(r_gpu, []) + @test y_dev isa GPUArraysCore.AbstractGPUArray + @test cdev(y_dev) ≈ y + end +end; diff --git a/test/test_PBMApplicator.jl b/test/test_PBMApplicator.jl new file mode 100644 index 0000000..5df1c71 --- /dev/null +++ b/test/test_PBMApplicator.jl @@ -0,0 +1,95 @@ +using Test +using HybridVariationalInference +using ComponentArrays: ComponentArrays as CA + +import Zygote + +using MLDataDevices, CUDA, cuDNN, GPUArraysCore + +f_pop = function(θsc, xPc) + local n_obs = size(xPc, 1) + is_valid = isfinite.(CA.getdata(xPc)) + a1 = is_valid .* CA.getdata(θsc[:,:a1])' + a2 = is_valid .* CA.getdata(θsc[:,:a2])' + # b in θP has been expanded in PopulationApplicator + b = is_valid .* CA.getdata(θsc[:,:b])' + y = a1 .+ log.(a2) .* abs2.(cos.(b .- 0.2)) .* xPc.^2 +end + +() -> begin + include("test/test_scratch.jl") +end + +@testset "PBMPopulationApplicator" begin + n_obs = 3 + n_site = 5 + xPvec = CA.ComponentVector(s1 = 1.0:n_obs) + xPc = xPvec .* ones(n_site)' .+ abs2.(randn(n_obs, n_site) .* 0.1) + θP = CA.ComponentVector(b=3.0) + θM = CA.ComponentVector(a1=2.0,a2=1.0) + θFix = CA.ComponentVector(c=1.5) + # + θMs = (ones(n_site) .* θM') .+ abs2.(randn(n_site, length(θM)) .* 0.1) + θs = hcat(ones(n_site) .* θP', θMs) + y_obs = f_pop(θs, xPc) + g = PBMPopulationApplicator(f_pop, n_site; θP, θM, θFix, xPvec) + ret = g(θP, θMs, xPc) + @test ret ≈ y_obs + + gr = Zygote.gradient((θP, θMs) -> sum(g(θP, θMs, xPc)), CA.getdata(θP), CA.getdata(θMs)) + + xPc_NaN = copy(xPc); xPc_NaN[2:3,1] .= NaN + ret2 = g(θP, θMs, xPc_NaN) + gr = Zygote.gradient((θP, θMs) -> sum(g(θP, θMs, xPc_NaN)), CA.getdata(θP), CA.getdata(θMs)) + @test all(isfinite.(gr[1])) + + n_site_m = 4 + gm = create_nsite_applicator(g, n_site_m) + retm = gm(θP, θMs[1:n_site_m,:], xPc[:,1:n_site_m]) + @test retm ≈ y_obs[:,1:n_site_m] +end; + +f_global = function(θsc, θgc, xPc) + local n_obs = size(xPc, 1) + #is_dummy = isnan.(CA.getdata(xPc)) + is_valid = isfinite.(CA.getdata(xPc)) + # rep_fac = ones_similar_x(θsc, n_obs) + # a1 = rep_fac .* CA.getdata(θsc[:,:a1])' + # a2 = rep_fac .* CA.getdata(θsc[:,:a2])' + #a1 = repeat_rowvector_dummy(CA.getdata(θsc[:,:a1])', is_dummy) + #a2 = repeat_rowvector_dummy(CA.getdata(θsc[:,:a2])', is_dummy) + a1 = is_valid .* CA.getdata(θsc[:,:a1])' + a2 = is_valid .* CA.getdata(θsc[:,:a2])' + b = CA.getdata(θgc.b) .* (is_valid) + y = a1 .+ log.(a2) .* abs2.(cos.(b .- 0.2)) .* xPc.^2 +end + +@testset "PBMPopulationGlobalApplicator" begin + n_obs = 3 + n_site = 5 + xPvec = CA.ComponentVector(s1 = 1.0:n_obs) + xPc = xPvec .* ones(n_site)' .+ abs2.(randn(n_obs, n_site) .* 0.1) + θP = CA.ComponentVector(b=3.0) + θM = CA.ComponentVector(a1=2.0,a2=1.0) + θFix = CA.ComponentVector(c=1.5) + # + θMs = (ones(n_site) .* θM') .+ abs2.(randn(n_site, length(θM)) .* 0.1) + y_obs = f_global(θMs, θP, xPc) + g = PBMPopulationGlobalApplicator(f_global, n_site; θP, θM, θFix, xPvec) + ret = g(θP, θMs, xPc) + @test ret ≈ y_obs + + gr = Zygote.gradient((θP, θMs) -> sum(g(θP, θMs, xPc)), CA.getdata(θP), CA.getdata(θMs)) + + xPc_NaN = copy(xPc); xPc_NaN[2:3,1] .= NaN + ret2 = g(θP, θMs, xPc_NaN) + gr = Zygote.gradient((θP, θMs) -> sum(g(θP, θMs, xPc_NaN)), CA.getdata(θP), CA.getdata(θMs)) + @test all(isfinite.(gr[1])) # \thetaP + @test all(isfinite.(gr[2])) # solves finite gradient for a2 for first site + + n_site_m = 4 + gm = create_nsite_applicator(g, n_site_m) + retm = gm(θP, θMs[1:n_site_m,:], xPc[:,1:n_site_m]) + @test retm ≈ y_obs[:,1:n_site_m] +end; + diff --git a/test/test_RRuleMonitor.jl b/test/test_RRuleMonitor.jl new file mode 100644 index 0000000..8825c8c --- /dev/null +++ b/test/test_RRuleMonitor.jl @@ -0,0 +1,66 @@ +using Test +using HybridVariationalInference +using HybridVariationalInference: HybridVariationalInference as HVI +using ComponentArrays: ComponentArrays as CA +# using MLDataDevices +# import CUDA, cuDNN +using DifferentiationInterface: DifferentiationInterface as DI +import Zygote +import ForwardDiff + + +function ftest(x) + 3 .* abs2.(x) +end + +function ftest2(x1,x2) + x1 .* abs2.(x2) +end + + +@testset "RRuleMonitor one argument" begin + x = collect(1.0:3.0) + y = ftest(x) + m = RRuleMonitor("ftest", ftest) + y2 = m(x) + @test y2 == y + gr = Zygote.gradient(x -> sum(ftest(x)), x)[1] + gr2 = Zygote.gradient(x -> sum(m(x)), x)[1] + @test gr2 == gr +end + +@testset "RRuleMonitor two argument" begin + x1 = collect(3.1:0.1:3.3) + x2 = collect(1.0:3.0) + y = ftest2(x1, x2) + m = RRuleMonitor("ftest2", ftest2) + y2 = m(x1, x2) + @test y2 == y + gr = Zygote.gradient((x1,x2) -> sum(ftest2(x1,x2)), x1, x2) + gr2 = Zygote.gradient((x1,x2) -> sum(m(x1,x2)), x1, x2) + @test gr2 == gr + md = RRuleMonitor("ftest2_Forward", ftest2, DI.AutoForwardDiff()) + gr3 = Zygote.gradient((x1,x2) -> sum(md(x1,x2)), x1, x2) + @test all(gr3 .≈ gr) +end + +function ftestsqrt(x) + is_pos = x .>= zero(x) + sqrt.(is_pos .* x) .+ .!is_pos .* convert(eltype(x), NaN) +end +@testset "RRuleMonitor non-finite" begin + x = collect(2.0:-1.0:-1.0) + y = ftestsqrt(x) + m = RRuleMonitor("ftestsqrt", ftestsqrt) + y2 = m(x) + @test isequal(y2,y) + gr = Zygote.gradient(x -> sum(ftestsqrt(x)), x)[1] + @test_throws ErrorException Zygote.gradient(x -> sum(m(x)), x) + #@test isequal(gr2,gr) + # + md = RRuleMonitor("ftestsqrt", ftestsqrt, DI.AutoForwardDiff()) + @test_throws ErrorException Zygote.gradient(x -> sum(md(x)), x) +end + + + diff --git a/test/test_bijectors_utils.jl b/test/test_bijectors_utils.jl index 01a62f7..bc5c8de 100644 --- a/test/test_bijectors_utils.jl +++ b/test/test_bijectors_utils.jl @@ -1,6 +1,7 @@ using Test using HybridVariationalInference using HybridVariationalInference: HybridVariationalInference as CP +using StatsFuns using Bijectors @@ -51,6 +52,21 @@ end; @test dys == dy end; +@testset "Logistic" begin + c3 = HybridVariationalInference.Logistic() + c3s = Stacked((c3,c3), (1:3,4:4)) + y1 = @inferred c3(x) + y2 = @inferred c3s(x) + @test all(inverse(c3)(y2) .≈ x) + @test all(inverse(c3s)(y2) .≈ x) + # test logabsdetjac + gr = Zygote.gradient(x -> sum(logistic.(x)), x)[1] + logjac = Bijectors.logabsdetjac(c3s, x) + @test logjac ≈ sum(log.(gr)) + y2b, logjac2= Bijectors.with_logabsdet_jacobian(c3s, x) + @test y2b == y2 + @test logjac2== logjac +end; if gdev isa MLDataDevices.AbstractGPUDevice xd = gdev(x) diff --git a/test/test_doubleMM.jl b/test/test_doubleMM.jl index b4d6936..60a36e1 100644 --- a/test/test_doubleMM.jl +++ b/test/test_doubleMM.jl @@ -91,7 +91,10 @@ end # yg = CP.DoubleMM.f_doubleMM(θg, xPMg, intθ); θvecg = gdev(θvec); # errors without ";" xPMg = CP.apply_preserve_axes(gdev, xPM); + yg = fy(θvecg, xPMg) yg = @inferred fy(θvecg, xPMg); + #@usingany Cthulhu + #@descend_code_warntype fy(θvecg, xPMg) @test cdev(yg) == y_exp ygradg = Zygote.gradient(θv -> sum(fy(θv, xPMg)), θvecg)[1]; @test ygradg isa CA.ComponentArray @@ -128,10 +131,10 @@ end # θg = gdev(θ) # xPMg = gdev(xPM) # yg = CP.DoubleMM.f_doubleMM(θg, xPMg, intθ); - θvecg = gdev(θvec) - xPMg = gdev(xPM) - y_og = gdev(y_o) - y_uncg = gdev(y_unc) + θvecg = gdev(θvec); + xPMg = gdev(xPM); + y_og = gdev(y_o); + y_uncg = gdev(y_unc); costg = fcost(θvecg, xPMg, y_og, y_uncg) @test costg ≈ cost ygradg = Zygote.gradient(θv -> fcost(θv, xPMg, y_og, y_uncg), θvecg)[1]; # errors without ";" @@ -197,8 +200,8 @@ end g, ϕg0 = get_hybridproblem_MLapplicator(prob; scenario) (; transP, transM) = get_hybridproblem_transforms(prob; scenario) n_site, n_site_batch = get_hybridproblem_n_site_and_batch(prob; scenario) - f = get_hybridproblem_PBmodel(prob; scenario, use_all_sites = false) - f2 = get_hybridproblem_PBmodel(prob; scenario, use_all_sites = true) + f = get_hybridproblem_PBmodel(prob; scenario) + f2 = create_nsite_applicator(f, n_site) py = get_hybridproblem_neg_logden_obs(prob; scenario) priors = get_hybridproblem_priors(prob; scenario) priorsP = [priors[k] for k in keys(par_templates.θP)] @@ -223,14 +226,14 @@ end pbm_covars, n_site_batch = n_batch, priorsP, priorsM) loss_gf_site = get_loss_gf(g, transM, transP, f2, py, intϕ; pbm_covars, n_site_batch = n_site, priorsP, priorsM) - nLjoint = @inferred first(loss_gf(p0, first(train_loader)...)) + nLjoint = @inferred first(loss_gf(p0, first(train_loader)...; is_testmode=true)) (xM_batch, xP_batch, y_o_batch, y_unc_batch, i_sites_batch) = first(train_loader) # @usingany Cthulhu # @descend_code_warntype loss_gf(p0, xM_batch, xP_batch, y_o_batch, y_unc_batch, i_sites_batch) Zygote.gradient( p0 -> first(loss_gf( - p0, xM_batch, xP_batch, y_o_batch, y_unc_batch, i_sites_batch)), CA.getdata(p0)) - optf = Optimization.OptimizationFunction((ϕ, data) -> first(loss_gf(ϕ, data...)), + p0, xM_batch, xP_batch, y_o_batch, y_unc_batch, i_sites_batch; is_testmode=false)), CA.getdata(p0)) + optf = Optimization.OptimizationFunction((ϕ, data) -> first(loss_gf(ϕ, data...; is_testmode=false)), Optimization.AutoZygote()) optprob = OptimizationProblem(optf, CA.getdata(p0), train_loader) @@ -239,7 +242,7 @@ end optprob, Adam(0.02), maxiters = 2000) (;nLjoint_pen, y_pred, θMs_pred, θP_pred, nLy, neg_log_prior, loss_penalty) = loss_gf_site( - res.u, train_loader.data...) + res.u, train_loader.data...; is_testmode=true) #(nLjoint, y_pred, θMs_pred, θP, nLy, neg_log_prior, loss_penalty) = loss_gf(p0, xM, xP, y_o, y_unc); θMs_pred = CA.ComponentArray(θMs_pred, CA.getaxes(θMs_true')) #TODO @test isapprox(par_templates.θP, intϕ(res.u).ϕP, rtol = 0.15) diff --git a/test/test_elbo.jl b/test/test_elbo.jl index 022d99a..14bd99d 100644 --- a/test/test_elbo.jl +++ b/test/test_elbo.jl @@ -54,8 +54,8 @@ test_scenario = (scenario) -> begin # prediction by g(ϕg, XM) does not correspond to θMs_true, randomly initialized # only the magnitude is there because of NormalScaling and prior g, ϕg0 = get_hybridproblem_MLapplicator(probc; scenario) - f = get_hybridproblem_PBmodel(probc; scenario, use_all_sites=false) - f_pred = get_hybridproblem_PBmodel(probc; scenario, use_all_sites=true) + f = get_hybridproblem_PBmodel(probc; scenario) + f_pred = create_nsite_applicator(f, n_site) n_θM, n_θP = values(map(length, par_templates)) @@ -99,7 +99,7 @@ test_scenario = (scenario) -> begin CP.generate_ζ( rng, g, ϕ_ini, xM[:, 1:n_batch]; n_MC, cor_ends, pbm_covar_indices, - int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc) + int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc, is_testmode = false) ) @testset "generate_ζ $(last(CP._val_value(scenario)))" begin @@ -116,7 +116,8 @@ test_scenario = (scenario) -> begin _ζsP, _ζsMs, _σ = CP.generate_ζ( rng, g, ϕ, xM[:, 1:n_batch]; n_MC=8, cor_ends, pbm_covar_indices, - int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc) + int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc, + is_testmode = true) sum(_ζsP) + sum(_ζsMs) + sum(_σ) end, CA.getdata(ϕ_ini)) @test gr[1] isa Vector @@ -156,7 +157,8 @@ test_scenario = (scenario) -> begin CP.generate_ζ( rng, g, _ϕ, xM_batch; n_MC = n_predict, cor_ends, pbm_covar_indices, - int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc) + int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc, + is_testmode = true) ) ζMs_g = g(xM_batch, probc.ϕg)' # have been generated with no scaling function test_distζ(_ζsP, _ζsMs, ϕunc_true, ζMs_g) @@ -241,7 +243,8 @@ test_scenario = (scenario) -> begin CP.generate_ζ( rng, g_gpu, ϕ, xMg_batch; n_MC, cor_ends, pbm_covar_indices, - int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc)) + int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc, + is_testmode = true)) @test ζsP_d isa Union{GPUArraysCore.AbstractGPUMatrix, LinearAlgebra.Adjoint{FT,<:GPUArraysCore.AbstractGPUMatrix}} @test ζsMs_d isa Union{GPUArraysCore.AbstractGPUArray, @@ -254,7 +257,8 @@ test_scenario = (scenario) -> begin _ζsP, _ζsMs, _σ = CP.generate_ζ( rng, g_gpu, ϕ, xMg_batch; n_MC, cor_ends, pbm_covar_indices, - int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc) + int_unc=interpreters.unc, int_μP_ϕg_unc=interpreters.μP_ϕg_unc, + is_testmode = false) sum(_ζsP) + sum(_ζsMs) + sum(_σ) end, CA.getdata(ϕ)) @test gr[1] isa GPUArraysCore.AbstractGPUVector @@ -338,14 +342,16 @@ test_scenario = (scenario) -> begin neg_elbo_gtf(rng, ϕ_ini, g, f, py, xM[:, i_sites], xP[:, i_sites], y_o[:, i_sites], y_unc[:, i_sites], i_sites; int_unc, int_μP_ϕg_unc, - cor_ends, pbm_covar_indices, transP, transMs, priorsP, priorsM,) + cor_ends, pbm_covar_indices, transP, transMs, priorsP, priorsM, + is_testmode = true) ) @test cost isa Float64 gr = Zygote.gradient( ϕ -> neg_elbo_gtf(rng, ϕ, g, f, py, xM[:, i_sites], xP[:, i_sites], y_o[:, i_sites], y_unc[:, i_sites], i_sites; int_unc, int_μP_ϕg_unc, - cor_ends, pbm_covar_indices, transP, transMs, priorsP, priorsM,), + cor_ends, pbm_covar_indices, transP, transMs, priorsP, priorsM, + is_testmode = false), CA.getdata(ϕ_ini)) @test gr[1] isa Vector end @@ -363,6 +369,7 @@ test_scenario = (scenario) -> begin xMg_batch, xP_batch, y_o[:, i_sites], y_unc[:, i_sites], i_sites; int_unc, int_μP_ϕg_unc, n_MC=3, cor_ends, pbm_covar_indices, transP, transMs, priorsP, priorsM, + is_testmode = true, ) ) @test cost isa Float64 @@ -371,6 +378,7 @@ test_scenario = (scenario) -> begin xMg_batch, xP_batch, y_o[:, i_sites], y_unc[:, i_sites], i_sites; int_unc, int_μP_ϕg_unc, n_MC=3, cor_ends, pbm_covar_indices, transP, transMs, priorsP, priorsM, + is_testmode = false, ), ϕ) @test gr[1] isa GPUArraysCore.AbstractGPUVector @@ -391,7 +399,9 @@ test_scenario = (scenario) -> begin int_μP_ϕg_unc, int_unc, transP, transM, cdev = identity, - n_sample_pred, cor_ends, pbm_covar_indices) + n_sample_pred, cor_ends, pbm_covar_indices, + is_testmode = true, + ) ) @test θsP isa AbstractMatrix @test θsMs isa AbstractArray{T,3} where {T} @@ -417,7 +427,8 @@ test_scenario = (scenario) -> begin transP, transM, #cdev = cpu_device(), cdev = identity, # do not transfer to CPU - n_sample_pred, cor_ends, pbm_covar_indices) + n_sample_pred, cor_ends, pbm_covar_indices, + is_testmode = true) ) # this variant without the problem, does not attach axes @test θsP isa AbstractMatrix @@ -426,7 +437,7 @@ test_scenario = (scenario) -> begin @test all(int_mP(θsP)[:r0, :] .> 0) # xP_dev = ggdev(xP); - f_pred_dev = fmap(ggdev, f_pred) + f_pred_dev = ggdev(f_pred) #fmap(ggdev, f_pred) y = @inferred f_pred_dev(θsP, θsMs, xP_dev) #@benchmark f_pred_dev(θsP, θsMs, xP_dev) @test y isa GPUArraysCore.AbstractGPUArray diff --git a/test/test_missingdriver.jl b/test/test_missingdriver.jl new file mode 100644 index 0000000..d24c871 --- /dev/null +++ b/test/test_missingdriver.jl @@ -0,0 +1,138 @@ +using Test +using HybridVariationalInference +using HybridVariationalInference: HybridVariationalInference as CP +using SimpleChains +using StableRNGs + +import Zygote + +import CUDA, cuDNN +using MLDataDevices + +gdev = gpu_device() +cdev = cpu_device() + +using OptimizationOptimisers +using Lux # in order to load extension + + +@testset "repeat_rowvector_dummy" begin + x = collect(1.0:5.0) + is_dummy = [false, false, true, true] + xm = CP.repeat_rowvector_dummy(x', is_dummy) + @test xm[1,:] == xm[2,:] == x + @test all(isnan.(xm[3:4,:])) + # + gr = Zygote.gradient(x -> sum(CP.repeat_rowvector_dummy(x', is_dummy)), x)[1] + @test gr == fill(2.0, size(x)) + gr = Zygote.gradient(x -> sum(CP.repeat_rowvector_dummy(x', is_dummy).^2), x)[1] + @test gr == 2 .*2 .*x + gr = Zygote.gradient(x -> sum(CP.repeat_rowvector_dummy(x', is_dummy) .* 2.0), x)[1] + @test gr == fill(2*2.0,5) + gr = Zygote.gradient(x -> sum(abs2, CP.repeat_rowvector_dummy(x', is_dummy)), x)[1] + @test gr == 2 .*2 .*x + gr = Zygote.gradient(x -> sum(sin, CP.repeat_rowvector_dummy(x', is_dummy)), x)[1] + @test gr == 2 .* cos.(x) + gr = (Zygote.gradient(x) do x + y = CP.repeat_rowvector_dummy(x', is_dummy) + z = sum(abs2, y[1,:]) + 3*sum(y[2,:]) + end)[1] + @test gr == 2 .*x .+ 3 + # + gdev = gpu_device() + cdev = cpu_device() + x_dev = gdev(x) + is_dummy_dev = gdev(is_dummy) + #is_dummy_dev = x_dev .>= 4.0 + xm_dev = CP.repeat_rowvector_dummy(x_dev', is_dummy_dev) + gr = Zygote.gradient(x -> sum(sin, CP.repeat_rowvector_dummy(x', is_dummy_dev)), x_dev)[1]; + @test cdev(gr) ≈ 2 .* cos.(x) +end + +@testset "repeat_rowvector_dummymatrix" begin + x = collect(1.0:5.0) + is_dummy = fill(false, (4, length(x))) + is_dummy[3:4, 2] .= true + xm = CP.repeat_rowvector_dummy(x', is_dummy) + @test xm[:,[1,3,4,5]] == xm[:,[1,3,4,5]] == repeat(x[[1,3,4,5]]', 4) + @test xm[1:2,2] == fill(x[2], 2) + @test all(isnan.(xm[3:4,2])) + # + #tmp = Zygote.gradient(is_dummy -> sum(repeat_rowvector_dummy(x', is_dummy)), is_dummy) + gr = Zygote.gradient(x -> sum(CP.repeat_rowvector_dummy(x', is_dummy)), x)[1] + @test gr == [4,2,4,4,4.0] + gr = Zygote.gradient(x -> sum(CP.repeat_rowvector_dummy(x', is_dummy).^2), x)[1] + @test gr == [4,2,4,4,4.0] .* 2.0 .*x + gr = Zygote.gradient(x -> sum(sin, CP.repeat_rowvector_dummy(x', is_dummy)), x)[1] + @test gr == [4,2,4,4,4.0] .* cos.(x) + gr = (Zygote.gradient(x) do x + y = CP.repeat_rowvector_dummy(x', is_dummy) + z = sum(abs2, y[1,:]) + 3*sum(y[2:end,:]) + end)[1] + @test gr == 2 .* x .+ [3,1,3,3,3.0] .* 3 + # + x_dev = gdev(x) + is_dummy_dev = gdev(is_dummy) + #is_dummy_dev = x_dev .>= 4.0 + xm_dev = CP.repeat_rowvector_dummy(x_dev', is_dummy_dev) + #tmp = Zygote.gradient(is_dummy -> sum(repeat_rowvector_dummy(x', is_dummy)), is_dummy) + gr = Zygote.gradient(x -> sum(sin, CP.repeat_rowvector_dummy(x', is_dummy_dev)), x_dev)[1]; + @test cdev(gr) ≈ [4,2,4,4,4.0] .* cos.(x) +end + + +function test_driverNaN(scenario::Val{scen}) where scen + prob = HybridProblem(DoubleMM.DoubleMMCase(); scenario); + if (:use_rangescaling ∈ scen) + @test prob.g isa RangeScalingModelApplicator + end + solver_point = HybridPointSolver(; alg=Adam(0.02)) + rng = StableRNG(111) + (;ϕ, resopt, probo) = solve(prob, solver_point; rng, + #callback = callback_loss(100), # output during fitting + #callback = callback_loss(10), # output during fitting + epochs = 2, + is_omit_priors = (:f_on_gpu ∈ scen), # prior computation does not work on gpu + scenario, + ); + @test all(isfinite.(ϕ)) + (;y_pred, θMs, θP) = predict_point_hvi(rng, probo; scenario); + _,_,y_obs,_ = get_hybridproblem_train_dataloader(prob; scenario).data + @test size(y_pred) == size(y_obs) + y_predc = cdev(y_pred) + @test all(isfinite.(y_predc[isfinite.(y_obs)])) + # + # takes long, only activate on suspicious + () -> begin + solver = HybridPosteriorSolver(; alg=Adam(0.02), n_MC=3) + (; probo, interpreters) = solve(prob, solver; rng, + callback = callback_loss(10), # output during fitting + epochs = 2, + scenario, + ); + @test all(isfinite.(probo.θP)) + n_sample_pred = 12 + (; y, θsP, θsMs, entropy_ζ) = predict_hvi(rng, probo; scenario, n_sample_pred); + @test size(y) == (size(y_pred)..., n_sample_pred) + yc = cdev(y) + _ = map(eachslice(yc; dims = 3)) do ycs + @test all(isfinite.(ycs[isfinite.(y_obs)])) + end + end +end + +@testset "HybridPointSolver driverNaN cpu" begin + scenario = Val((:driverNAN,)) + test_driverNaN(scenario) +end + +# @testset "HybridPointSolver driverNaN ML on gpu" begin +# scenario = Val((:driverNAN, :use_Lux, :use_gpu)) +# test_driverNaN(scenario) +# end + +@testset "HybridPointSolver driverNaN also PBM on gpu" begin + scenario = Val((:driverNAN, :use_rangescaling, :use_Lux, :use_dropout, :use_gpu, :f_on_gpu)) + test_driverNaN(scenario) +end + diff --git a/test/test_util_ca.jl b/test/test_util_ca.jl index 1370c9c..a4871ad 100644 --- a/test/test_util_ca.jl +++ b/test/test_util_ca.jl @@ -6,7 +6,7 @@ using ComponentArrays: ComponentArrays as CA @testset "compose_axes" begin @test (@inferred CP._add_interval(;ranges=(Val(1:3),), length = Val(2))) == (Val(1:3), Val(4:5)) ls = Val.((3,1,2)) - @test (@inferred CP._construct_invervals(;lengths=ls)) == Val.((1:3, 4:4, 5:6)) + @test (@inferred CP._construct_intervals(;lengths=ls)) == Val.((1:3, 4:4, 5:6)) v1 = CA.ComponentVector(A=1:3) v2 = CA.ComponentVector(B=1:2) v3 = CA.ComponentVector(P=(x=1, y=2), Ms=zeros(3,2)) diff --git a/test/test_util_gpu.jl b/test/test_util_gpu.jl index 04a2b8e..71ebec8 100644 --- a/test/test_util_gpu.jl +++ b/test/test_util_gpu.jl @@ -1,25 +1,25 @@ using Test using HybridVariationalInference: HybridVariationalInference as HVI -using ComponentArrays +using ComponentArrays: ComponentArrays as CA using MLDataDevices import CUDA, cuDNN using FillArrays -@testset "ones_similar_x" begin +@testset "ones_similar_x" begin A = rand(Float64, 3, 4); - @test HVI.ones_similar_x(A, 3) isa FillArrays.AbstractFill #Vector - @test HVI.ones_similar_x(A, size(A,1)) isa FillArrays.AbstractFill #Vector#Vector + @test @inferred HVI.ones_similar_x(A, 3) isa FillArrays.AbstractFill #Vector + @test @inferred HVI.ones_similar_x(A, size(A,1)) isa FillArrays.AbstractFill #Vector#Vector end gdev = gpu_device() if gdev isa MLDataDevices.CUDADevice @testset "ones_similar_x" begin B = CUDA.rand(Float32, 5, 2); # GPU matrix - @test HVI.ones_similar_x(B, size(B,1)) isa CUDA.CuArray - @test HVI.ones_similar_x(ComponentVector(b=B), size(B,1)) isa CUDA.CuArray - @test HVI.ones_similar_x(B', size(B,1)) isa CUDA.CuArray - @test HVI.ones_similar_x(@view(B[:,2]), size(B,1)) isa CUDA.CuArray - @test HVI.ones_similar_x(ComponentVector(b=B)[:,1], size(B,1)) isa CUDA.CuArray + @test @inferred HVI.ones_similar_x(B, size(B,1)) isa CUDA.CuArray + @test @inferred HVI.ones_similar_x(CA.ComponentVector(b=B), size(B,1)) isa CUDA.CuArray + @test @inferred HVI.ones_similar_x(B', size(B,1)) isa CUDA.CuArray + @test @inferred HVI.ones_similar_x(@view(B[:,2]), size(B,1)) isa CUDA.CuArray + @test @inferred HVI.ones_similar_x(CA.ComponentVector(b=B)[:,1], size(B,1)) isa CUDA.CuArray end end