Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Aug 11, 2020
1 parent 12e266a commit c5e8aa0
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 45 deletions.
60 changes: 31 additions & 29 deletions libradicl/src/pugutils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ where
/// equivalence class labels of all vertices in the arboresence).
fn collapse_vertices(
v: u32,
vset: &HashSet::<u32>, // the set of vertices already covered
vset: &HashSet<u32>, // the set of vertices already covered
g: &petgraph::graphmap::GraphMap<(u32, u32), (), petgraph::Directed>,
eqmap: &EqMap,
) -> (Vec<u32>, u32) {
Expand Down Expand Up @@ -110,14 +110,14 @@ fn collapse_vertices(
let n = g.to_index(nv) as u32;

// check if we should add this vertex or not
// vset contains the the current set of
// *uncovered* vertices in this component (i.e. those
// vset contains the the current set of
// *uncovered* vertices in this component (i.e. those
// that we still need to explain by some molecule).
// so, if n is *not* in vset, then it is not in the
// uncovered set, and so it has already been
// so, if n is *not* in vset, then it is not in the
// uncovered set, and so it has already been
// explained / covered.
//
// if n hasn't been covered yet, then
//
// if n hasn't been covered yet, then
// check if we've seen n in this traversal
// yet. The `insert()` method returns true
// if the set didn't have the element, false
Expand Down Expand Up @@ -327,44 +327,44 @@ pub(super) fn get_num_molecules_trivial_discard_all_ambig(
counts
}

/// given the connected component (subgraph) of `g` defined by the
/// vertices in `vertex_ids`, apply the cell-ranger-like algorithm
/// given the connected component (subgraph) of `g` defined by the
/// vertices in `vertex_ids`, apply the cell-ranger-like algorithm
/// within this subgraph.
fn get_num_molecules_large_component(
g: &petgraph::graphmap::GraphMap<(u32, u32), (), petgraph::Directed>,
eq_map: &EqMap,
vertex_ids: &[u32],
eq_map: &EqMap,
vertex_ids: &[u32],
tid_to_gid: &[u32],
num_genes: usize,
log: &slog::Logger
log: &slog::Logger,
) -> Vec<u32> {
let mut counts = vec![0u32; num_genes];

// TODO: better capacity
let mut umi_gene_count_vec: Vec<(u64, u32, u32)> = vec![];

// build a temporary hashmap from each
// equivalence class id in the current subgraph
// to the set of (UMI, frequency) pairs contained
// build a temporary hashmap from each
// equivalence class id in the current subgraph
// to the set of (UMI, frequency) pairs contained
// in the subgraph
let mut tmp_map = HashMap::<u32, Vec<(u64, u32)>>::new();

// for each vertex id in the subgraph
for vertex_id in vertex_ids {
// get the corresponding vertex which is
// get the corresponding vertex which is
// an (eq_id, UMI index) pair
let vert = g.from_index(*vertex_id as usize);
// add the corresponding (UMI, frequency) pair to the map
// add the corresponding (UMI, frequency) pair to the map
// for this eq_id
let umis = tmp_map.entry(vert.0).or_insert_with(Vec::new);
umis.push( eq_map.eqc_info[vert.0 as usize].umis[vert.1 as usize] );
umis.push(eq_map.eqc_info[vert.0 as usize].umis[vert.1 as usize]);
}

for (k, v) in tmp_map.iter() {
// get the (umi, count) pairs
let umis = v;//&eqinfo.umis;
let eqid = k;//&eqinfo.eq_num;
// project the transcript ids to gene ids
let umis = v; //&eqinfo.umis;
let eqid = k; //&eqinfo.eq_num;
// project the transcript ids to gene ids
let mut gset: Vec<u32> = eq_map
.refs_for_eqc(*eqid)
.iter()
Expand Down Expand Up @@ -476,7 +476,6 @@ fn get_num_molecules_large_component(
counts
}


/// Given the digraph `g` representing the PUGs within the current
/// cell, the EqMap `eqmap` to decode all equivalence classes
/// and the transcript-to-gene map `tid_to_gid`, apply the parsimonious
Expand All @@ -497,7 +496,7 @@ pub(super) fn get_num_molecules(

/*
if petgraph::algo::is_cyclic_directed(g) {
warn!(log, "\n\ngraph contains a cycle!\n\n");
warn!(log, "\n\ngraph contains a cycle!\n\n");
}
*/

Expand Down Expand Up @@ -532,12 +531,15 @@ pub(super) fn get_num_molecules(

for (_comp_label, comp_verts) in comps.iter() {
if comp_verts.len() > 1 {

if comp_verts.len() > 1000 {
warn!(log, "\n\nfound connected component with {} vertices\n\n", comp_verts.len());
let gene_increments =
get_num_molecules_large_component(
g, eqmap, comp_verts, tid_to_gid, num_genes, log);
warn!(
log,
"\n\nfound connected component with {} vertices\n\n",
comp_verts.len()
);
let gene_increments = get_num_molecules_large_component(
g, eqmap, comp_verts, tid_to_gid, num_genes, log,
);
for (gn, val) in gene_increments.iter().enumerate() {
if *val > 0 {
let e = gene_eqclass_hash.entry(vec![gn as u32]).or_insert(0);
Expand All @@ -546,11 +548,11 @@ pub(super) fn get_num_molecules(
}
continue;
}

// vset will hold the set of vertices that are
// covered.
let mut vset = HashSet::<u32>::from_iter(comp_verts.iter().cloned());


// we will remove covered vertices from vset until they are
// all gone (until all vertices have been covered)
while !vset.is_empty() {
Expand Down
46 changes: 30 additions & 16 deletions libradicl/src/quant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crossbeam_queue::ArrayQueue;
// use fasthash::sea;
use needletail::bitkmer::*;
use scroll::Pwrite;
use smallvec::{SmallVec, smallvec};
use smallvec::{smallvec, SmallVec};
use std::collections::HashMap;
use std::fs;
use std::fs::File;
Expand All @@ -44,15 +44,15 @@ use self::libradicl::utils::*;

/// Extracts the parsimonious UMI graphs (PUGs) from the
/// equivalence class map for a given cell.
/// The returned graph is a directed graph (potentially with
/// bidirected edges) where each node consists of an (equivalence
/// class, UMI ID) pair. Note, crucially, that the UMI ID is simply
/// the rank of the UMI in the list of all distinct UMIs for this
/// equivalence class. There is a directed edge between any pair of
/// The returned graph is a directed graph (potentially with
/// bidirected edges) where each node consists of an (equivalence
/// class, UMI ID) pair. Note, crucially, that the UMI ID is simply
/// the rank of the UMI in the list of all distinct UMIs for this
/// equivalence class. There is a directed edge between any pair of
/// vertices whose set of transcripts overlap and whose UMIs are within
/// a Hamming distance of 1 of each other. If one node has more than
/// twice the frequency of the other, the edge is directed from the
/// more frequent to the less freuqent node. Otherwise, edges are
/// a Hamming distance of 1 of each other. If one node has more than
/// twice the frequency of the other, the edge is directed from the
/// more frequent to the less freuqent node. Otherwise, edges are
/// added in both directions.
fn extract_graph(
eqmap: &EqMap,
Expand Down Expand Up @@ -87,7 +87,7 @@ fn extract_graph(

let mut graph = DiGraphMap::<(u32, u32), ()>::new();
let mut hset = Vec::with_capacity(eqmap.num_eq_classes());
let mut idxvec : SmallVec<[u32; 128]> = SmallVec::new();
let mut idxvec: SmallVec<[u32; 128]> = SmallVec::new();

// insert all of the nodes up front to avoid redundant
// checks later.
Expand Down Expand Up @@ -158,10 +158,14 @@ fn extract_graph(

//hset.clear();
//hset.resize(eqmap.num_eq_classes(), 0u8);
for i in &idxvec { hset[*i as usize] = 0u8; }
for i in &idxvec {
hset[*i as usize] = 0u8;
}
let stf = idxvec.len() > 128;
idxvec.clear();
if stf { idxvec.shrink_to_fit(); }
if stf {
idxvec.shrink_to_fit();
}

// for every reference id in this eq class
for r in eqmap.refs_for_eqc(eqid as u32) {
Expand Down Expand Up @@ -457,8 +461,13 @@ pub fn quantify(
}
ResolutionStrategy::Parsimony => {
let g = extract_graph(&eq_map, &log);
let gene_eqc =
pugutils::get_num_molecules(&g, &eq_map, &tid_to_gid, num_genes, &log);
let gene_eqc = pugutils::get_num_molecules(
&g,
&eq_map,
&tid_to_gid,
num_genes,
&log,
);
counts = em_optimize(
&gene_eqc,
&mut unique_evidence,
Expand All @@ -470,8 +479,13 @@ pub fn quantify(
}
ResolutionStrategy::Full => {
let g = extract_graph(&eq_map, &log);
let gene_eqc =
pugutils::get_num_molecules(&g, &eq_map, &tid_to_gid, num_genes, &log);
let gene_eqc = pugutils::get_num_molecules(
&g,
&eq_map,
&tid_to_gid,
num_genes,
&log,
);
counts = em_optimize(
&gene_eqc,
&mut unique_evidence,
Expand Down

0 comments on commit c5e8aa0

Please sign in to comment.