Skip to content

Commit

Permalink
use ori in generate-permit-list and propagate
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Aug 18, 2020
1 parent c921562 commit 30a37bc
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 53 deletions.
4 changes: 2 additions & 2 deletions libradicl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fasthash = "0.4.0"
slog = "2.5.2"
slog-term = "2.6.0"
slog-async = "2.5.0"
serde = "1.0.114"
serde = { version = "1.0.114", features = ["derive"] }
bincode = "1.3.1"
csv = "1.1.3"
indicatif = "0.15.0"
Expand All @@ -29,4 +29,4 @@ flate2 = "1.0.16"
smallvec = "1.4.1"
serde_json = "1.0.57"
sprs = "0.8.1"
sce = { git = "https://github.com/k3yavi/SingleCellExperiment" }
sce = { git = "https://github.com/k3yavi/SingleCellExperiment" }
18 changes: 17 additions & 1 deletion libradicl/src/cellfilter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ use self::slog::crit;
use self::slog::info;

use crate as libradicl;
use bio_types::strand::Strand;
use fasthash::sea::Hash64;
use fasthash::RandomState;
use libradicl::exit_codes;
use needletail::bitkmer::*;
use num_format::{Locale, ToFormattedString};
use serde_json::json;
use std::collections::HashMap;
use std::fs::File;
use std::io::BufReader;
Expand Down Expand Up @@ -175,6 +177,7 @@ pub fn generate_permit_list(
input_file: String,
output_dir: String,
filter_meth: CellFilterMethod,
expected_ori: Strand,
//top_k: Option<usize>,
//valid_bc_file: Option<String>,
//use_knee_distance: bool,
Expand Down Expand Up @@ -242,7 +245,7 @@ pub fn generate_permit_list(

for _ in 0..(hdr.num_chunks as usize) {
let c = libradicl::Chunk::from_bytes(&mut br, &bc_type, &umi_type);
libradicl::update_barcode_hist(&mut hm, &c);
libradicl::update_barcode_hist(&mut hm, &c, &expected_ori);
num_reads += c.reads.len();
}

Expand Down Expand Up @@ -349,6 +352,19 @@ pub fn generate_permit_list(
bincode::serialize_into(&mut s_writer, &full_permit_list)
.expect("couldn't serialize permit list.");

let meta_info = json!({
"expected_ori" : expected_ori.strand_symbol()
});

let m_path = parent.join("generate_permit_list.json");
let mut m_file = std::fs::File::create(&m_path).expect("could not create metadata file.");

let meta_info_string =
serde_json::to_string_pretty(&meta_info).expect("could not format json.");
m_file
.write_all(meta_info_string.as_bytes())
.expect("cannot write to generate_permit_list.json file");

info!(
log,
"total number of corrected barcodes : {}",
Expand Down
34 changes: 29 additions & 5 deletions libradicl/src/collate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ extern crate serde;
extern crate slog;

use self::indicatif::{ProgressBar, ProgressStyle};
use self::slog::info;
use self::slog::{crit, info};
use bio_types::strand::Strand;
use serde_json::json;
use std::collections::HashMap;
use std::fs::File;
use std::io::BufReader;
Expand All @@ -21,13 +22,35 @@ pub fn collate(
input_dir: String,
rad_file: String,
max_records: u32,
expected_ori: Strand,
//expected_ori: Strand,
log: &slog::Logger,
) -> Result<(), Box<dyn std::error::Error>> {
let parent = std::path::Path::new(&input_dir);

// open the metadata file and read the json
let meta_data_file = File::open(parent.join("generate_permit_list.json"))
.expect("could not open the generate_permit_list.json file.");
let mdata: serde_json::Value = serde_json::from_reader(meta_data_file)?;

// next line is ugly — should be a better way. We need a char to
// get the strand, so we get the correct field as a `str` then
// use the chars iterator and get the first char.
let ori_str: char = mdata["expected_ori"]
.as_str()
.unwrap()
.chars()
.next()
.unwrap();
let expected_ori = match Strand::from_char(&ori_str) {
Ok(s) => s,
Err(e) => {
crit!(log, "invalid metadata {}.", e);
std::process::exit(1);
}
};

let mut ofile = File::create(parent.join("map.collated.rad")).unwrap();
let i_file = File::open(&rad_file).unwrap();

let mut br = BufReader::new(i_file);

let hdr = libradicl::RADHeader::from_bytes(&mut br);
Expand All @@ -37,10 +60,11 @@ pub fn collate(

info!(
log,
"paired : {:?}, ref_count : {:?}, num_chunks : {:?}",
"paired : {:?}, ref_count : {:?}, num_chunks : {:?}, expected_ori : {:?}",
hdr.is_paired != 0,
hdr.ref_count,
hdr.num_chunks
hdr.num_chunks,
expected_ori
);
// file-level
let fl_tags = libradicl::TagSection::from_bytes(&mut br);
Expand Down
4 changes: 2 additions & 2 deletions libradicl/src/em.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,8 @@ pub fn run_bootstrap(
sample_var[i] = (alphas_square[i] / num_bootstraps as f32) - (mean_alpha * mean_alpha);
}

bootstraps.push(sample_mean.clone());
bootstraps.push(sample_var.clone());
bootstraps.push(sample_mean);
bootstraps.push(sample_var);
}

bootstraps
Expand Down
25 changes: 22 additions & 3 deletions libradicl/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ impl ReadRecord {
rec.refs.push(v & utils::MASK_TOP_BIT_U32);
}
}

// make sure these are sorted in this step.
quickersort::sort(&mut rec.refs[..]);
rec
Expand Down Expand Up @@ -365,7 +366,6 @@ pub fn process_corrected_cb_chunk<T: Read>(
reader.read_exact(&mut buf).unwrap();
let _nbytes = buf.pread::<u32>(0).unwrap();
let nrec = buf.pread::<u32>(4).unwrap();

// for each record, read it
for _ in 0..(nrec as usize) {
let rr = ReadRecord::from_bytes_keep_ori(reader, &bct, &umit, expected_ori);
Expand Down Expand Up @@ -574,9 +574,28 @@ impl RADHeader {
pub fn update_barcode_hist(
hist: &mut HashMap<u64, u64, fasthash::RandomState<fasthash::sea::Hash64>>,
chunk: &Chunk,
expected_ori: &Strand,
) {
for r in &chunk.reads {
*hist.entry(r.bc).or_insert(0) += 1;
match expected_ori {
Strand::Unknown => {
for r in &chunk.reads {
*hist.entry(r.bc).or_insert(0) += 1;
}
}
Strand::Forward => {
for r in &chunk.reads {
if r.dirs.iter().any(|&x| x) {
*hist.entry(r.bc).or_insert(0) += 1;
}
}
}
Strand::Reverse => {
for r in &chunk.reads {
if r.dirs.iter().any(|&x| !x) {
*hist.entry(r.bc).or_insert(0) += 1;
}
}
}
}
}

Expand Down
6 changes: 4 additions & 2 deletions libradicl/src/quant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ extern crate slog;
use self::indicatif::{ProgressBar, ProgressStyle};
use self::petgraph::prelude::*;
#[allow(unused_imports)]
use self::slog::crit;
use self::slog::info;
use self::slog::{crit, info, warn};
use crate as libradicl;
use crossbeam_queue::ArrayQueue;

Expand Down Expand Up @@ -513,6 +512,9 @@ pub fn quantify(
cells_remaining.fetch_sub(1, Ordering::SeqCst);
let mut nbr = BufReader::new(&buf[..]);
let mut c = libradicl::Chunk::from_bytes(&mut nbr, &bc_type, &umi_type);
if c.reads.is_empty() {
warn!(log, "Discovered empty chunk; should not happen! cell_num = {}, _nbyte = {}, nrec = {}", cell_num, _nbyte, nrec);
}
let bc = c.reads.first().expect("chunk with no reads").bc;
eq_map.init_from_chunk(&mut c);

Expand Down
77 changes: 39 additions & 38 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ fn main() {
.version(version)
.author(crate_authors)
.arg(Arg::from("-i, --input=<input> 'input RAD file'"))
.arg(Arg::from("-d, --expected-ori=[expected-ori] 'the expected orientation of alignments'"))
.arg(Arg::from(
"-o, --output-dir=<output-dir> 'output directory'",
))
Expand Down Expand Up @@ -79,9 +80,9 @@ fn main() {
.arg(Arg::from("-i, --input-dir=<input-dir> 'input directory made by generate-permit-list'"))
.arg(Arg::from("-r, --rad-file=<rad-file> 'the RAD file to be collated'"))
.arg(Arg::from("-m, --max-records=[max-records] 'the maximum number of read records to keep in memory at once'")
.default_value("10000000"))
.arg(Arg::from("-e, --expected-ori=[expected-ori] 'the expected orientation of alignments'")
.default_value("fw"));
.default_value("10000000"));
//.arg(Arg::from("-e, --expected-ori=[expected-ori] 'the expected orientation of alignments'")
// .default_value("fw"));

let quant_app = App::new("quant")
.about("Quantify expression from a collated RAD file")
Expand Down Expand Up @@ -129,6 +130,39 @@ fn main() {
.value_of_t("output-dir")
.expect("no input directory specified");

let valid_ori: bool;
let expected_ori = match t.value_of("expected-ori").unwrap().to_uppercase().as_str() {
"RC" => {
valid_ori = true;
Strand::Reverse
}
"FW" => {
valid_ori = true;
Strand::Forward
}
"BOTH" => {
valid_ori = true;
Strand::Unknown
}
"EITHER" => {
valid_ori = true;
Strand::Unknown
}
_ => {
valid_ori = false;
Strand::Unknown
}
};

if !valid_ori {
crit!(
log,
"{} is not a valid option for --expected-ori",
expected_ori
);
std::process::exit(1);
}

let mut fmeth = CellFilterMethod::KneeFinding;

let expect_cells: Option<usize> = match t.value_of_t("expect-cells") {
Expand Down Expand Up @@ -161,50 +195,17 @@ fn main() {
}
Err(_) => None,
};
let nc = generate_permit_list(input_file, output_dir, fmeth, &log).unwrap();
let nc = generate_permit_list(input_file, output_dir, fmeth, expected_ori, &log).unwrap();
if nc == 0 {
warn!(log, "found 0 corrected barcodes; please check the input.");
}
}

if let Some(ref t) = opts.subcommand_matches("collate") {
let valid_ori: bool;
let expected_ori = match t.value_of("expected-ori").unwrap().to_uppercase().as_str() {
"RC" => {
valid_ori = true;
Strand::Reverse
}
"FW" => {
valid_ori = true;
Strand::Forward
}
"BOTH" => {
valid_ori = true;
Strand::Unknown
}
"EITHER" => {
valid_ori = true;
Strand::Unknown
}
_ => {
valid_ori = false;
Strand::Unknown
}
};

if !valid_ori {
crit!(
log,
"{} is not a valid option for --expected-ori",
expected_ori
);
std::process::exit(1);
}

let input_dir: String = t.value_of_t("input-dir").unwrap();
let rad_file: String = t.value_of_t("rad-file").unwrap();
let max_records: u32 = t.value_of_t("max-records").unwrap();
libradicl::collate::collate(input_dir, rad_file, max_records, expected_ori, &log)
libradicl::collate::collate(input_dir, rad_file, max_records, &log)
.expect("could not collate.");
}

Expand Down

0 comments on commit 30a37bc

Please sign in to comment.