Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ codegen-units = 1
[patch.crates-io]
lexical-util = { path = "vendor/lexical-util" }
noodles-bam = { path = "../noodles/noodles-bam" }
noodles-bcf = { path = "../noodles/noodles-bcf" }
noodles-bgzf = { path = "../noodles/noodles-bgzf" }
noodles-core = { path = "../noodles/noodles-core" }
noodles-cram = { path = "../noodles/noodles-cram" }
Expand Down
5 changes: 3 additions & 2 deletions rust/bioscript-cli/src/cli_bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::{
use bioscript_formats::{
GenotypeLoadOptions, GenotypeSourceFormat, GenotypeStore, InferredSex, InspectOptions,
PrepareRequest, SexDetectionConfidence, SexInference, inspect_file, prepare_indexes,
shell_flags,
shell_flags, convert_23andme_grch37_to_grch38,
};
use bioscript_runtime::{BioscriptRuntime, RuntimeConfig, StageTiming};
use bioscript_schema::{
Expand Down Expand Up @@ -57,7 +57,7 @@ fn run_cli() -> Result<(), String> {
Ok(())
}

const USAGE: &str = "usage: bioscript <script.py|manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--input-file <path>] [--output-file <path>] [--observations-file <path>] [--asset id=path] [--participant-id <id>] [--trace-report <path>] [--timing-report <path>] [--filter key=value] [--input-format auto|text|zip|vcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--auto-index] [--cache-dir <path>] [--max-duration-ms N] [--max-memory-bytes N] [--max-allocations N] [--max-recursion-depth N]\n bioscript report <manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> --input-file <path> [--input-file <path>...] --output-dir <dir> [--html] [--open] [--root <dir>] [--input-format auto|text|zip|vcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--allow-md5-mismatch] [--detect-sex] [--sample-sex male|female|unknown] [--analysis-max-duration-ms N]\n bioscript review <manifest.yaml|package.yaml|package.zip> --cases <cases.yaml> --output-dir <dir> [--html] [--root <dir>] [--filter key=value]\n bioscript import-package <package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--output-dir <dir>]\n bioscript validate-variants <path> [--report <file>]\n bioscript validate-panels <path> [--report <file>]\n bioscript validate-assays <path> [--report <file>]\n bioscript prepare [--root <dir>] [--input-file <path>] [--reference-file <path>] [--input-format auto|text|zip|vcf|cram] [--cache-dir <path>]\n bioscript inspect <path> [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--detect-sex]";
const USAGE: &str = "usage: bioscript <script.py|manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--input-file <path>] [--output-file <path>] [--observations-file <path>] [--asset id=path] [--participant-id <id>] [--trace-report <path>] [--timing-report <path>] [--filter key=value] [--input-format auto|text|zip|vcf|bcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--auto-index] [--cache-dir <path>] [--max-duration-ms N] [--max-memory-bytes N] [--max-allocations N] [--max-recursion-depth N]\n bioscript report <manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> --input-file <path> [--input-file <path>...] --output-dir <dir> [--html] [--open] [--root <dir>] [--input-format auto|text|zip|vcf|bcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--allow-md5-mismatch] [--detect-sex] [--sample-sex male|female|unknown] [--analysis-max-duration-ms N]\n bioscript review <manifest.yaml|package.yaml|package.zip> --cases <cases.yaml> --output-dir <dir> [--html] [--root <dir>] [--filter key=value]\n bioscript import-package <package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--output-dir <dir>]\n bioscript validate-variants <path> [--report <file>]\n bioscript validate-panels <path> [--report <file>]\n bioscript validate-assays <path> [--report <file>]\n bioscript prepare [--root <dir>] [--input-file <path>] [--reference-file <path>] [--input-format auto|text|zip|vcf|bcf|cram] [--cache-dir <path>]\n bioscript inspect <path> [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--detect-sex]\n bioscript liftover-23andme <input.txt> <output.txt> [--unmapped <unmapped.tsv>]";

struct CliOptions {
script_path: Option<PathBuf>,
Expand Down Expand Up @@ -90,6 +90,7 @@ fn dispatch_subcommand(args: &[String]) -> Result<bool, String> {
"validate-assays" => run_validate_assays(rest).map(|()| true),
"prepare" => run_prepare(rest).map(|()| true),
"inspect" => run_inspect(rest).map(|()| true),
"liftover-23andme" => run_liftover_23andme(rest).map(|()| true),
_ => Ok(false),
}
}
Expand Down
90 changes: 90 additions & 0 deletions rust/bioscript-cli/src/cli_commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,53 @@ fn run_inspect(args: Vec<String>) -> Result<(), String> {
Ok(())
}

fn run_liftover_23andme(args: Vec<String>) -> Result<(), String> {
let mut input: Option<PathBuf> = None;
let mut output: Option<PathBuf> = None;
let mut unmapped: Option<PathBuf> = None;

let mut iter = args.into_iter();
while let Some(arg) = iter.next() {
match arg.as_str() {
"--unmapped" => {
unmapped = Some(PathBuf::from(
iter.next().ok_or("--unmapped requires a path")?,
));
}
other if input.is_none() => input = Some(PathBuf::from(other)),
other if output.is_none() => output = Some(PathBuf::from(other)),
other => return Err(format!("unexpected argument: {other}")),
}
}

let Some(input) = input else {
return Err(
"usage: bioscript liftover-23andme <input.txt> <output.txt> [--unmapped <unmapped.tsv>]"
.to_owned(),
);
};
let Some(output) = output else {
return Err(
"usage: bioscript liftover-23andme <input.txt> <output.txt> [--unmapped <unmapped.tsv>]"
.to_owned(),
);
};
let unmapped = unmapped.unwrap_or_else(|| output.with_extension("unmapped.tsv"));

let stats = convert_23andme_grch37_to_grch38(&input, &output, &unmapped)
.map_err(|err| err.to_string())?;
println!("total_markers={}", stats.total_markers);
println!("mapped={}", stats.mapped);
println!("unmapped={}", stats.unmapped);
println!(
"reverse_strand_genotypes={}",
stats.reverse_strand_genotypes
);
println!("output={}", output.display());
println!("unmapped_report={}", unmapped.display());
Ok(())
}

fn run_validate_variants(args: Vec<String>) -> Result<(), String> {
let mut path: Option<PathBuf> = None;
let mut report_path: Option<PathBuf> = None;
Expand Down Expand Up @@ -321,6 +368,20 @@ alleles:
assert!(run_inspect(vec!["sample.cram".to_owned(), "--input-index".to_owned()])
.unwrap_err()
.contains("--input-index requires"));

assert!(run_liftover_23andme(Vec::new())
.unwrap_err()
.contains("usage"));
assert!(run_liftover_23andme(vec!["input.txt".to_owned()])
.unwrap_err()
.contains("usage"));
assert!(run_liftover_23andme(vec![
"input.txt".to_owned(),
"output.txt".to_owned(),
"--unmapped".to_owned(),
])
.unwrap_err()
.contains("--unmapped requires"));
}

#[test]
Expand Down Expand Up @@ -365,6 +426,35 @@ alleles:
fs::remove_dir_all(dir).unwrap();
}

#[test]
fn liftover_23andme_command_uses_bundled_chain() {
let dir = temp_dir("liftover");
let input = dir.join("genome.txt");
let output = dir.join("genome.grch38.txt");
let unmapped = dir.join("unmapped.tsv");
fs::write(
&input,
"# We are using reference human assembly build 37\n\
rs1800437\t19\t46181392\tCG\n",
)
.unwrap();

run_liftover_23andme(vec![
input.display().to_string(),
output.display().to_string(),
"--unmapped".to_owned(),
unmapped.display().to_string(),
])
.unwrap();

let lifted = fs::read_to_string(&output).unwrap();
assert!(lifted.contains("# Coordinates lifted"));
assert!(lifted.contains("rs1800437\t19\t45678134\tCG"));
assert!(fs::read_to_string(&unmapped).unwrap().contains("reason"));

fs::remove_dir_all(dir).unwrap();
}

#[test]
fn validate_panels_and_assays_cover_report_and_error_paths() {
let dir = temp_dir("panels-assays");
Expand Down
7 changes: 6 additions & 1 deletion rust/bioscript-cli/src/report_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,11 @@ fn loader_with_inspection(
let mut loader = base.clone();
if loader.format.is_none() {
loader.format = if inspection.container == bioscript_formats::FileContainer::Zip {
Some(GenotypeSourceFormat::Zip)
if inspection.detected_kind == bioscript_formats::DetectedKind::Bcf {
Some(GenotypeSourceFormat::Bcf)
} else {
Some(GenotypeSourceFormat::Zip)
}
} else {
match inspection.detected_kind {
bioscript_formats::DetectedKind::AlignmentBam => {
Expand All @@ -322,6 +326,7 @@ fn loader_with_inspection(
Some(GenotypeSourceFormat::Cram)
}
bioscript_formats::DetectedKind::Vcf => Some(GenotypeSourceFormat::Vcf),
bioscript_formats::DetectedKind::Bcf => Some(GenotypeSourceFormat::Bcf),
bioscript_formats::DetectedKind::GenotypeText => Some(GenotypeSourceFormat::Text),
_ => None,
}
Expand Down
2 changes: 1 addition & 1 deletion rust/bioscript-formats/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ crate-type = ["rlib"]
[dependencies]
bioscript-core = { path = "../bioscript-core" }
flate2 = "1.1.9"
noodles = { version = "0.110.0", features = ["bam", "bgzf", "core", "cram", "csi", "fasta", "sam", "tabix", "vcf"] }
noodles = { version = "0.110.0", features = ["bam", "bcf", "bgzf", "core", "cram", "csi", "fasta", "sam", "tabix", "vcf"] }
zip = { version = "2.2.0", default-features = false, features = ["deflate"] }

[lints.clippy]
Expand Down
Binary file not shown.
Loading
Loading