-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #374 from AlexsLemonade/development
Merging in `development` for `v0.5.2` release
- Loading branch information
Showing
92 changed files
with
2,762 additions
and
91 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
|
||
name: Check nextflow stub | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
- development | ||
|
||
jobs: | ||
nf-stub-check: | ||
runs-on: ubuntu-22.04 | ||
steps: | ||
|
||
- name: Checkout repo | ||
uses: actions/checkout@v3 | ||
|
||
- name: Check Nextflow workflow | ||
uses: docker://nextflow/nextflow:21.10.6 | ||
with: | ||
args: nextflow -log stub-run.log run main.nf -stub -profile stub -ansi-log false | ||
|
||
- name: Check Nextflow with checkpoints from previous run | ||
uses: docker://nextflow/nextflow:21.10.6 | ||
with: | ||
args: nextflow -log checkpoint-run.log run main.nf -stub -profile stub -ansi-log false | ||
|
||
- name: Join log files | ||
run: cat stub-run.log checkpoint-run.log > nextflow-runs.log | ||
|
||
- name: Upload nextflow log | ||
if: ${{ always() }} | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: nextflow-log | ||
path: nextflow-runs.log | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/usr/bin/env nextflow | ||
nextflow.enable.dsl=2 | ||
|
||
include { annotate_celltypes } from './modules/classify-celltypes.nf' | ||
|
||
// parameter checks | ||
param_error = false | ||
|
||
if (!file(params.run_metafile).exists()) { | ||
log.error("The 'run_metafile' file '${params.run_metafile}' can not be found.") | ||
param_error = true | ||
} | ||
|
||
if (!file(params.celltype_refs_metafile).exists()) { | ||
log.error("The 'celltype_refs_metafile' file '${params.celltype_refs_metafile}' can not be found.") | ||
param_error = true | ||
} | ||
|
||
workflow { | ||
|
||
// select runs to use | ||
if (params.project){ | ||
// projects will use all runs in the project & supersede run_ids | ||
run_ids = [] | ||
}else{ | ||
run_ids = params.run_ids?.tokenize(',') ?: [] | ||
} | ||
run_all = run_ids[0] == "All" | ||
if (run_all){ | ||
log.info("Executing workflow for all runs in the run metafile.") | ||
} | ||
|
||
// read in metadata file and filter to libraries/ projects of interest | ||
processed_sce_ch = Channel.fromPath(params.run_metafile) | ||
.splitCsv(header: true, sep: '\t') | ||
.map{[ | ||
run_id: it.scpca_run_id, | ||
library_id: it.scpca_library_id, | ||
sample_id: it.scpca_sample_id, | ||
project_id: it.scpca_project_id, | ||
submitter: it.submitter, | ||
technology: it.technology, | ||
seq_unit: it.seq_unit, | ||
]} | ||
.filter{it.seq_unit in ['cell', 'nucleus']} | ||
// filter to only single-cell and remove any CITE-seq or multiplexed data | ||
.filter{it.technology.startsWith("10Xv")} | ||
.filter{run_all | ||
|| (it.run_id in run_ids) | ||
|| (it.library_id in run_ids) | ||
|| (it.sample_id in run_ids) | ||
|| (it.submitter == params.project) | ||
|| (it.project_id == params.project) | ||
} | ||
// tuple of meta, processed rds file to use as input to cell type annotation | ||
.map{meta -> tuple(meta, | ||
file("${params.results_dir}/${meta.project_id}/${meta.sample_id}/${meta.library_id}_processed.rds") | ||
)} | ||
|
||
annotate_celltypes(processed_sce_ch) | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
# This script is used to classify and annotate cells using SingleR | ||
|
||
# import libraries | ||
suppressPackageStartupMessages({ | ||
library(optparse) | ||
library(SingleCellExperiment) | ||
}) | ||
|
||
# set up arguments | ||
option_list <- list( | ||
make_option( | ||
opt_str = c("-i", "--input_sce_file"), | ||
type = "character", | ||
help = "path to rds file with input sce object" | ||
), | ||
make_option( | ||
opt_str = c("-o", "--output_sce_file"), | ||
type = "character", | ||
help = "path to output rds file to store processed sce object. Must end in .rds" | ||
), | ||
make_option( | ||
opt_str = c("--singler_models"), | ||
type = "character", | ||
help = "list of models generated for use with SingleR. Each input file contains | ||
a list of models generated from a single reference, one each for each label type: | ||
`label.main`, `label.fine`, and `label.ont`." | ||
), | ||
make_option( | ||
opt_str = c("--seed"), | ||
type = "integer", | ||
help = "A random seed for reproducibility." | ||
), | ||
make_option( | ||
opt_str = c("-t", "--threads"), | ||
type = "integer", | ||
default = 1, | ||
help = "Number of multiprocessing threads to use." | ||
) | ||
) | ||
|
||
opt <- parse_args(OptionParser(option_list = option_list)) | ||
|
||
# Set up ----------------------------------------------------------------------- | ||
|
||
# set seed | ||
set.seed(opt$random_seed) | ||
|
||
# check that input file file exists | ||
if(!file.exists(opt$input_sce_file)){ | ||
stop("Missing input SCE file") | ||
} | ||
|
||
# check that references all exist | ||
model_files <- unlist(stringr::str_split(opt$singler_models, ",")) | ||
if(!all(file.exists(model_files))){ | ||
missing_files <- model_files[which(!file.exists(model_files))] | ||
glue::glue(" | ||
Missing model file(s): {missing_files} | ||
") | ||
stop("Please make sure that all provided SingleR models exist.") | ||
} | ||
|
||
# set up multiprocessing params | ||
if(opt$threads > 1){ | ||
bp_param = BiocParallel::MulticoreParam(opt$threads) | ||
} else { | ||
bp_param = BiocParallel::SerialParam() | ||
} | ||
|
||
# read in input rds file | ||
sce <- readr::read_rds(opt$input_sce_file) | ||
|
||
# read in references as a list of lists | ||
# each file contains a named list of models generated using the same reference dataset | ||
# but unique labels in the reference dataset | ||
model_names <- stringr::str_remove(basename(model_files), "_model.rds") | ||
names(model_files) <- model_names | ||
model_list <- purrr::map(model_files, readr::read_rds) |> | ||
# ensure we have label type before reference name | ||
# example: label.main_HumanPrimaryCellAtlasData | ||
# where `label.main` is the name of the model stored in the file and | ||
# `HumanPrimaryCellAtlasData` is the name of the reference used for each file containing a list of models | ||
purrr::imap(\(model_list, ref_name){ | ||
names(model_list) <- glue::glue("{names(model_list)}_{ref_name}") | ||
model_list | ||
}) |> | ||
purrr::flatten() | ||
|
||
# SingleR classify ------------------------------------------------------------- | ||
|
||
# create a partial function for mapping easily | ||
classify_sce <- purrr::partial(SingleR::classifySingleR, | ||
test = sce, | ||
fine.tune=TRUE, | ||
BPPARAM = bp_param) | ||
# run singleR for all provided models | ||
all_singler_results <- model_list |> | ||
purrr::map(classify_sce) | ||
|
||
# Annotate sce ----------------------------------------------------------------- | ||
|
||
# create a dataframe with a single column of annotations for each model used | ||
all_annotations_df <- all_singler_results |> | ||
purrr::map_dfc(\(result) result$pruned.labels ) |> | ||
DataFrame() | ||
|
||
colData(sce) <- cbind(colData(sce), all_annotations_df) | ||
|
||
# store results in metadata | ||
metadata(sce)$singler_results <- all_singler_results | ||
|
||
# export sce with annotations added | ||
readr::write_rds(sce, | ||
opt$output_sce_file, | ||
compress = 'gz') | ||
|
Oops, something went wrong.