Skip to content

Commit

Permalink
per cell writing
Browse files Browse the repository at this point in the history
  • Loading branch information
k3yavi committed Aug 5, 2020
1 parent 0eecc2d commit 8aa0395
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 20 deletions.
1 change: 1 addition & 0 deletions libradicl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,6 @@ needletail = "0.4"
num-format = "0.4.0"
statrs = "0.13.0"
rand = "0.7.3"
flate2 = "1.0.16"
sprs = { git = "https://github.com/k3yavi/sprs" } #"0.8"
sce = { git = "https://github.com/k3yavi/SingleCellExperiment" }
52 changes: 32 additions & 20 deletions libradicl/src/quant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ use std::io::{BufReader, BufWriter};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};

use flate2::write::GzEncoder;
use flate2::Compression;

use self::libradicl::em::em_optimize;
use self::libradicl::pugutils;
use self::libradicl::schema::{EqMap, PUGEdgeType, ResolutionStrategy};
Expand Down Expand Up @@ -365,17 +368,22 @@ pub fn quantify(
// well need a protected handle to write out the barcode
let bc_path = output_path.join("barcodes.txt");
let bc_file = fs::File::create(bc_path)?;
let bc_writer = Arc::new(Mutex::new(BufWriter::new(bc_file)));

let mat_path = output_path.join("counts.eds.gz");
let buffered = BufWriter::new(fs::File::create(mat_path)?);
let mat_file = GzEncoder::new(buffered, Compression::default());

let bc_writer = Arc::new(Mutex::new((BufWriter::new(bc_file), mat_file)));

// TODO: guess capacity better
// TODO: in the future, we may not want to hold the
// entire triplet matrix in memory at once?
// @k3yavi: Changing this to usize for testing
// this handle is protected since threads will need to update it
let omat = Arc::new(Mutex::new(TriMatI::<f32, usize>::new((
num_genes,
hdr.num_chunks as usize,
))));
//let omat = Arc::new(Mutex::new(TriMatI::<f32, usize>::new((
// num_genes,
// hdr.num_chunks as usize,
//))));

// for each worker, spawn off a thread
for _worker in 0..n_workers {
Expand All @@ -391,7 +399,7 @@ pub fn quantify(
let bc_type = bc_type;
let umi_type = umi_type;
// will need a shared handle to the count matrix
let omatrix = Arc::clone(&omat);
//let omatrix = Arc::clone(&omat);
// and the barcode file
let bcout = bc_writer.clone();
// and will need to know the barcode length
Expand All @@ -404,6 +412,8 @@ pub fn quantify(
let mut unique_evidence = vec![false; num_genes];
let mut no_ambiguity = vec![false; num_genes];
let mut eq_map = EqMap::new(ref_count);
let mut omat = TriMatI::<f32, usize>::new((num_genes, 1));

// pop from the work queue until everything is
// processed
while cells_remaining.load(Ordering::SeqCst) > 0 {
Expand Down Expand Up @@ -469,20 +479,22 @@ pub fn quantify(
// done clearing

// update the matrix
{
let mut omat = omatrix.lock().unwrap();
for (i, v) in counts.iter().enumerate() {
if *v > 0.0 {
omat.add_triplet(i, cell_num, *v);
}
for (i, v) in counts.into_iter().enumerate() {
if v > 0.0 {
omat.add_triplet(i, cell_num, v);
}
}

// write to barcode file
{
let bc_mer: BitKmer = (bc, bclen as u8);
let mut bc_writer = bcout.lock().unwrap();
writeln!(&mut bc_writer, "{}\t{}", cell_num, unsafe {

// writing the files
let writer = &mut *bcout.lock().unwrap();
sce::eds::append_writer(&mut writer.1, &omat.to_csr())
.expect("can't write matrix");

// write to barcode file
writeln!(&mut writer.0, "{}\t{}", cell_num, unsafe {
std::str::from_utf8_unchecked(&bitmer_to_bytes(bc_mer)[..])
})
.expect("can't write to barcode file.");
Expand Down Expand Up @@ -523,10 +535,10 @@ pub fn quantify(
// waiting
}

let mat_path = output_path.join("counts.eds.gz");
{
let omat = omat.lock().unwrap();
sce::eds::writer(&mat_path, &omat.to_csr())?;
}
//let mat_path = output_path.join("counts.eds.gz");
//{
// let omat = omat.lock().unwrap();
// sce::eds::writer(&mat_path, &omat.to_csr())?;
//}
Ok(())
}

0 comments on commit 8aa0395

Please sign in to comment.