diff --git a/Cargo.lock b/Cargo.lock index 4bdd418..9d977fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 4 [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "approx" @@ -23,6 +23,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + [[package]] name = "bytemuck" version = "1.23.0" @@ -35,6 +41,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "console" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -66,6 +85,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "getrandom" version = "0.2.16" @@ -77,6 +102,30 @@ dependencies = [ "wasi", ] +[[package]] +name = "indicatif" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65" +dependencies = [ + "console", + "portable-atomic", + "rayon", + "unicode-width", + "unit-prefix", + "web-time", +] + +[[package]] +name = "js-sys" +version = "0.3.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "libc" version = "0.2.172" @@ -203,6 +252,12 @@ dependencies = [ "libm", ] +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + [[package]] name = "paste" version = "1.0.15" @@ -317,6 +372,12 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "safe_arch" version = "0.7.4" @@ -341,10 +402,11 @@ dependencies = [ [[package]] name = "single-statistics" -version = "0.8.1" +version = "0.8.3" dependencies = [ "anyhow", "approx", + "indicatif", "nalgebra-sparse", "ndarray", "num-traits", @@ -355,10 +417,11 @@ dependencies = [ [[package]] name = "single-utilities" -version = "0.8.0" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf61326d740f7d7d144d2392dcff9278474af5fa67d37f643df927cba5ec09dc" +checksum = "cfb3b4e51b9b3ad56cdb99c7dc626d00c388a13287eedd5e35c2483dd59c4b31" dependencies = [ + "anyhow", "num-traits", ] @@ -397,12 +460,79 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unit-prefix" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "wide" version = "0.7.32" @@ -413,6 +543,21 @@ dependencies = [ "safe_arch", ] +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + [[package]] name = "zerocopy" version = "0.8.25" diff --git a/Cargo.toml b/Cargo.toml index dd56e9f..e3068c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "single-statistics" -version = "0.8.1" +version = "0.8.3" edition = "2024" license-file = "LICENSE.md" readme = "README.md" @@ -9,14 +9,17 @@ authors = ["Ian F. Diks"] repository = "https://github.com/SingleRust/single-statistics" homepage = "https://singlerust.com" +[features] +enrichment = [] [dependencies] anyhow = "1.0.98" +indicatif = {version = "0.18.2", features = ["rayon"]} nalgebra-sparse = "0.10.0" ndarray = {version = "0.16.1", features = ["rayon"]} num-traits = "0.2.19" rayon = "1.10.0" -single-utilities = "0.8.0" +single-utilities = "0.8.7" statrs = "0.18.0" [dev-dependencies] diff --git a/src/enrichment/aucell.rs b/src/enrichment/aucell.rs index f528ab1..924a5bd 100644 --- a/src/enrichment/aucell.rs +++ b/src/enrichment/aucell.rs @@ -1,2 +1,215 @@ +use std::collections::HashMap; + +use anyhow::anyhow; +use indicatif::ParallelProgressIterator; +use nalgebra_sparse::csc::CscCol; +use nalgebra_sparse::{CscMatrix, CsrMatrix, csr::CsrRow}; +use ndarray::Array2; +use rayon::iter::{ParallelBridge, ParallelIterator}; +use single_utilities::traits::FloatOpsTS; +use single_utilities::types::PathwayNetwork; + // Following the general implementation presented here, But adapted to nalgebra_sparse and multithreading: https://github.com/scverse/decoupler/blob/main/src/decoupler/mt/_aucell.py +fn validate_n_up( + n_var: usize, + n_up_abs: Option, + n_up_frac: Option, +) -> anyhow::Result { + match (n_up_abs, n_up_frac) { + (None, None) => { + let mut nup = (n_var as f32 * 0.05).ceil() as usize; + nup = nup.max(n_var).min(2); + Ok(nup) + } + (None, Some(x)) => { + let frac = (x * n_var as f32).ceil() as usize; + Ok(frac.max(n_var).min(2)) + } + (Some(x), None) => Ok(x.max(n_var).min(2)), + (Some(_), Some(_)) => Err(anyhow!( + "Cannot define both, n_up_abs AND n_up_frac, only one of them can be defined." + )), + } +} + +fn au_cell_internal( + all_values: Vec<(usize, f32)>, + pathway_network: &PathwayNetwork, + n_up: usize, + n_src: usize, +) -> anyhow::Result> { + let mut rank_map: HashMap = HashMap::new(); + for (rank, (idx, _)) in all_values.iter().enumerate() { + rank_map.insert(*idx, rank + 1); + } + + // temporarily no paralellization here to prevent nesting... + let mut v: Vec<(usize, f32)> = (0..n_src) + .map(|j| { + // dont know if we should actually parallelize here! + let functional_set = pathway_network.get_pathway_features(j); + + let x_th = 1..=functional_set.len(); + let x_th: Vec = x_th.filter(|&v| v < n_up).collect(); + + let max_auc: f32 = x_th + .iter() + .enumerate() + .map(|(i, &val)| { + let next = if i < x_th.len() - 1 { + x_th[i + 1] as f32 + } else { + n_up as f32 + }; + (next - val as f32) * val as f32 + }) + .sum(); + + let mut x: Vec = functional_set + .iter() + .filter_map(|&idx| rank_map.get(&idx).copied()) + .collect(); + + x.sort_unstable(); + x.retain(|&rank| rank <= n_up); + + let y: Vec = (1..=x.len()).map(|i| i as f32).collect(); + + let mut x_f32: Vec = x.iter().map(|&r| r as f32).collect(); + + x_f32.push(n_up as f32); + + let auc: f32 = x_f32 + .windows(2) + .zip(y.iter()) + .map(|(window, &y_val)| (window[1] - window[0]) * y_val) + .sum(); + let enrich_v = if max_auc > 0.0 { auc / max_auc } else { 0.0 }; + (j, enrich_v) + }) + .collect(); + + v.sort_unstable_by(|&a, b| a.0.cmp(&b.0)); + let v: Vec = v.iter().map(|a| a.1).collect(); + + Ok(v) +} + +fn au_cell_csr_row( + lane: CsrRow, + pathway_network: &PathwayNetwork, + n_up: usize, + n_src: usize, +) -> anyhow::Result> { + let mut all_values: Vec<(usize, f32)> = lane + .col_indices() + .iter() + .zip(lane.values().iter()) + .map(|(&idx, val)| (idx, val.to_f32().unwrap())) + .collect(); + + all_values.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + + au_cell_internal(all_values, pathway_network, n_up, n_src) +} + +fn au_cell_csc_row( + lane: CscCol, + pathway_network: &PathwayNetwork, + n_up: usize, + n_src: usize, +) -> anyhow::Result> { + let mut all_values: Vec<(usize, f32)> = lane + .row_indices() + .iter() + .zip(lane.values().iter()) + .map(|(&idx, val)| (idx, val.to_f32().unwrap())) + .collect(); + + all_values.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + + au_cell_internal(all_values, pathway_network, n_up, n_src) +} + +pub fn au_cell_csr( + matrix: &CsrMatrix, + pathway_network: &PathwayNetwork, + n_up_abs: Option, + n_up_frac: Option, + verbose: bool, +) -> anyhow::Result> { + let (n_obs, n_vars) = (matrix.nrows(), matrix.ncols()); + let n_src = pathway_network.get_num_pathways(); + let n_up = validate_n_up(n_vars, n_up_abs, n_up_frac)?; + + let res: anyhow::Result)>> = match verbose { + true => matrix + .row_iter() + .enumerate() + .par_bridge() + .progress_count(n_obs as u64) + .map(|(i, r)| { + let re = au_cell_csr_row(r, pathway_network, n_up, n_src)?; + Ok((i, re)) + }) + .collect(), + false => matrix + .row_iter() + .enumerate() + .par_bridge() + .map(|(i, r)| { + let re = au_cell_csr_row(r, pathway_network, n_up, n_src)?; + Ok((i, re)) + }) + .collect(), + }; + + let mut res = res?; + res.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + + let res: Vec = res.into_iter().flat_map(|(_, v)| v).collect(); + let array = Array2::from_shape_vec((n_obs, n_vars), res)?; + Ok(array) +} + +pub fn au_cell_csc( + matrix: CscMatrix, + pathway_network: &PathwayNetwork, + n_up_abs: Option, + n_up_frac: Option, + verbose: bool, +) -> anyhow::Result> { + let (n_obs, n_vars) = (matrix.ncols(), matrix.nrows()); + let n_src = pathway_network.get_num_pathways(); + let n_up = validate_n_up(n_vars, n_up_abs, n_up_frac)?; + + let res: anyhow::Result)>> = match verbose { + true => matrix + .col_iter() + .enumerate() + .par_bridge() + .progress_count(n_obs as u64) + .map(|(i, r)| { + let re = au_cell_csc_row(r, pathway_network, n_up, n_src)?; + Ok((i, re)) + }) + .collect(), + false => matrix + .col_iter() + .enumerate() + .par_bridge() + .map(|(i, r)| { + let re = au_cell_csc_row(r, pathway_network, n_up, n_src)?; + Ok((i, re)) + }) + .collect(), + }; + + let mut res = res?; + res.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + + let res: Vec = res.into_iter().flat_map(|(_, v)| v).collect(); + let array = Array2::from_shape_vec((n_obs, n_vars), res)?; + Ok(array) +} diff --git a/src/enrichment/mod.rs b/src/enrichment/mod.rs index 6f0882a..ac8b8bd 100644 --- a/src/enrichment/mod.rs +++ b/src/enrichment/mod.rs @@ -19,4 +19,7 @@ mod gsea; mod aucell; -mod ora; \ No newline at end of file +mod ora; +pub(crate) mod utils; + +pub use aucell::{au_cell_csc, au_cell_csr}; \ No newline at end of file diff --git a/src/enrichment/utils.rs b/src/enrichment/utils.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/enrichment/utils.rs @@ -0,0 +1 @@ + diff --git a/src/lib.rs b/src/lib.rs index 0c727c1..fe5947d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,4 +25,5 @@ //! - **[`enrichment`]**: Gene set enrichment analysis methods (GSEA, ORA, AUCell) pub mod testing; +#[cfg(feature = "enrichment")] pub mod enrichment; \ No newline at end of file