-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #272 from LCR-BCCRC/module/fishhook/1.0
Module/fishhook/1.0
- Loading branch information
Showing
7 changed files
with
487 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
name: fishhook | ||
channels: | ||
- conda-forge | ||
- defaults | ||
dependencies: | ||
- _libgcc_mutex=0.1=conda_forge | ||
- _openmp_mutex=4.5=2_gnu | ||
- _r-mutex=1.0.0=anacondar_1 | ||
- _sysroot_linux-64_curr_repodata_hack=3=haa98f57_10 | ||
- binutils_impl_linux-64=2.38=h2a08ee3_1 | ||
- binutils_linux-64=2.38.0=hc2dff05_0 | ||
- blas=1.0=openblas | ||
- bwidget=1.9.11=1 | ||
- bzip2=1.0.8=h7b6447c_0 | ||
- c-ares=1.19.0=h5eee18b_0 | ||
- ca-certificates=2023.05.30=h06a4308_0 | ||
- cairo=1.16.0=hb05425b_4 | ||
- curl=7.88.1=h5eee18b_1 | ||
- expat=2.4.9=h6a678d5_0 | ||
- fontconfig=2.14.1=h4c34cd2_2 | ||
- freetype=2.12.1=h4a9f257_0 | ||
- fribidi=1.0.10=h7b6447c_0 | ||
- gcc_impl_linux-64=11.2.0=h1234567_1 | ||
- gcc_linux-64=11.2.0=h5c386dc_0 | ||
- gfortran_impl_linux-64=11.2.0=h1234567_1 | ||
- gfortran_linux-64=11.2.0=hc2dff05_0 | ||
- glib=2.69.1=he621ea3_2 | ||
- graphite2=1.3.14=h295c915_1 | ||
- gxx_impl_linux-64=11.2.0=h1234567_1 | ||
- gxx_linux-64=11.2.0=hc2dff05_0 | ||
- harfbuzz=4.3.0=hf52aaf7_1 | ||
- icu=58.2=he6710b0_3 | ||
- jpeg=9e=h5eee18b_1 | ||
- kernel-headers_linux-64=3.10.0=h57e8cba_10 | ||
- krb5=1.20.1=h143b758_1 | ||
- ld_impl_linux-64=2.38=h1181459_1 | ||
- lerc=3.0=h295c915_0 | ||
- libcurl=7.88.1=h251f7ec_1 | ||
- libdeflate=1.17=h5eee18b_0 | ||
- libedit=3.1.20221030=h5eee18b_0 | ||
- libev=4.33=h7f8727e_1 | ||
- libffi=3.4.4=h6a678d5_0 | ||
- libgcc-devel_linux-64=11.2.0=h1234567_1 | ||
- libgcc-ng=13.1.0=he5830b7_0 | ||
- libgfortran-ng=11.2.0=h00389a5_1 | ||
- libgfortran5=11.2.0=h1234567_1 | ||
- libgomp=13.1.0=he5830b7_0 | ||
- libnghttp2=1.52.0=h2d74bed_1 | ||
- libopenblas=0.3.21=h043d6bf_0 | ||
- libpng=1.6.39=h5eee18b_0 | ||
- libssh2=1.10.0=hdbd6064_2 | ||
- libstdcxx-devel_linux-64=11.2.0=h1234567_1 | ||
- libstdcxx-ng=13.1.0=hfd8a6a1_0 | ||
- libtiff=4.5.0=h6a678d5_2 | ||
- libuuid=1.41.5=h5eee18b_0 | ||
- libwebp-base=1.2.4=h5eee18b_1 | ||
- libxcb=1.15=h7f8727e_0 | ||
- libxml2=2.10.3=hcbfbd50_0 | ||
- lz4-c=1.9.4=h6a678d5_0 | ||
- make=4.2.1=h1bed415_1 | ||
- ncurses=6.4=h6a678d5_0 | ||
- openssl=3.0.9=h7f8727e_0 | ||
- pango=1.50.7=h05da053_0 | ||
- pcre=8.45=h295c915_0 | ||
- pcre2=10.37=he7ceb23_1 | ||
- pip=23.1.2=py311h06a4308_0 | ||
- pixman=0.40.0=h7f8727e_1 | ||
- python=3.11.3=h955ad1f_1 | ||
- r-askpass=1.1=r42h76d94ec_0 | ||
- r-assertthat=0.2.1=r42hc72bb7e_4 | ||
- r-base=4.2.0=h1ae530e_0 | ||
- r-base64enc=0.1_3=r42h57805ef_1006 | ||
- r-brew=1.0_8=r42hc72bb7e_2 | ||
- r-brio=1.1.3=r42h76d94ec_0 | ||
- r-bslib=0.5.0=r42hc72bb7e_1 | ||
- r-cachem=1.0.6=r42h76d94ec_0 | ||
- r-callr=3.7.3=r42hc72bb7e_1 | ||
- r-cli=3.6.1=r42ha503ecb_1 | ||
- r-clipr=0.8.0=r42hc72bb7e_2 | ||
- r-commonmark=1.8.0=r42h76d94ec_0 | ||
- r-cpp11=0.4.3=r42hc72bb7e_1 | ||
- r-crayon=1.5.2=r42hc72bb7e_2 | ||
- r-credentials=1.3.2=r42hc72bb7e_2 | ||
- r-curl=4.3.2=r42h76d94ec_0 | ||
- r-desc=1.4.2=r42hc72bb7e_2 | ||
- r-devtools=2.4.5=r42hc72bb7e_1 | ||
- r-diffobj=0.3.5=r42h76d94ec_0 | ||
- r-digest=0.6.29=r42h884c59f_0 | ||
- r-downlit=0.4.2=r42hc72bb7e_2 | ||
- r-ellipsis=0.3.2=r42h76d94ec_0 | ||
- r-evaluate=0.21=r42hc72bb7e_1 | ||
- r-fansi=1.0.3=r42h76d94ec_0 | ||
- r-fastmap=1.1.0=r42h884c59f_0 | ||
- r-fontawesome=0.5.1=r42hc72bb7e_1 | ||
- r-fs=1.5.2=r42h884c59f_0 | ||
- r-gert=1.6.0=r42h76d94ec_0 | ||
- r-gh=1.4.0=r42hc72bb7e_1 | ||
- r-gitcreds=0.1.2=r42hc72bb7e_2 | ||
- r-glue=1.6.2=r42h76d94ec_0 | ||
- r-highr=0.10=r42hc72bb7e_1 | ||
- r-htmltools=0.5.5=r42ha503ecb_1 | ||
- r-htmlwidgets=1.6.2=r42hc72bb7e_1 | ||
- r-httpuv=1.6.11=r42ha503ecb_1 | ||
- r-httr=1.4.6=r42hc72bb7e_1 | ||
- r-httr2=0.2.3=r42hc72bb7e_1 | ||
- r-ini=0.3.1=r42hc72bb7e_1005 | ||
- r-jquerylib=0.1.4=r42hc72bb7e_2 | ||
- r-jsonlite=1.8.0=r42h76d94ec_0 | ||
- r-knitr=1.39=r42h6115d3f_0 | ||
- r-later=1.3.1=r42ha503ecb_1 | ||
- r-lifecycle=1.0.1=r42h142f84f_0 | ||
- r-magrittr=2.0.3=r42h76d94ec_0 | ||
- r-memoise=2.0.1=r42hc72bb7e_2 | ||
- r-mime=0.12=r42h76d94ec_0 | ||
- r-miniui=0.1.1.1=r42hc72bb7e_1004 | ||
- r-openssl=2.0.2=r42h76d94ec_0 | ||
- r-pillar=1.9.0=r42hc72bb7e_1 | ||
- r-pkgbuild=1.4.2=r42hc72bb7e_0 | ||
- r-pkgconfig=2.0.3=r42hc72bb7e_3 | ||
- r-pkgdown=2.0.7=r42hc72bb7e_1 | ||
- r-pkgload=1.3.2=r42hc72bb7e_1 | ||
- r-praise=1.0.0=r42hc72bb7e_1007 | ||
- r-prettyunits=1.1.1=r42hc72bb7e_3 | ||
- r-processx=3.7.0=r42h06615bd_1 | ||
- r-profvis=0.3.8=r42h57805ef_0 | ||
- r-promises=1.2.0.1=r42ha503ecb_2 | ||
- r-ps=1.7.0=r42h76d94ec_0 | ||
- r-purrr=0.3.4=r42h76d94ec_0 | ||
- r-r6=2.5.1=r42hc72bb7e_2 | ||
- r-ragg=1.2.5=r42hd65d3ba_0 | ||
- r-rappdirs=0.3.3=r42h76d94ec_0 | ||
- r-rcmdcheck=1.4.0=r42h785f33e_2 | ||
- r-rcpp=1.0.10=r42ha503ecb_1 | ||
- r-rematch2=2.1.2=r42hc72bb7e_3 | ||
- r-remotes=2.4.2=r42hc72bb7e_2 | ||
- r-rlang=1.1.1=r42ha503ecb_1 | ||
- r-rmarkdown=2.14=r42h6115d3f_0 | ||
- r-roxygen2=7.2.3=r42ha503ecb_1 | ||
- r-rprojroot=2.0.3=r42hc72bb7e_0 | ||
- r-rstudioapi=0.14=r42hc72bb7e_2 | ||
- r-rversions=2.1.2=r42hc72bb7e_2 | ||
- r-sass=0.4.6=r42ha503ecb_1 | ||
- r-sessioninfo=1.2.2=r42hc72bb7e_2 | ||
- r-shiny=1.7.4=r42h785f33e_1 | ||
- r-sourcetools=0.1.7_1=r42ha503ecb_1 | ||
- r-stringi=1.7.6=r42h884c59f_0 | ||
- r-stringr=1.4.0=r42h6115d3f_0 | ||
- r-sys=3.4=r42h76d94ec_0 | ||
- r-systemfonts=1.0.4=r42haf97adc_2 | ||
- r-testthat=3.1.4=r42h884c59f_0 | ||
- r-textshaping=0.3.6=r42h884c59f_0 | ||
- r-tibble=3.1.7=r42h76d94ec_0 | ||
- r-tinytex=0.45=r42hc72bb7e_1 | ||
- r-urlchecker=1.0.1=r42hc72bb7e_2 | ||
- r-usethis=2.2.1=r42hc72bb7e_0 | ||
- r-utf8=1.2.2=r42h76d94ec_0 | ||
- r-vctrs=0.4.1=r42h884c59f_0 | ||
- r-waldo=0.5.1=r42hc72bb7e_1 | ||
- r-whisker=0.4.1=r42hc72bb7e_1 | ||
- r-withr=2.5.0=r42hc72bb7e_2 | ||
- r-xfun=0.31=r42h76d94ec_0 | ||
- r-xml2=1.3.3=r42h884c59f_0 | ||
- r-xopen=1.0.0=r42hc72bb7e_1005 | ||
- r-xtable=1.8_4=r42hc72bb7e_5 | ||
- r-yaml=2.3.5=r42h76d94ec_0 | ||
- r-zip=2.2.0=r42h76d94ec_0 | ||
- readline=8.2=h5eee18b_0 | ||
- setuptools=67.8.0=py311h06a4308_0 | ||
- sqlite=3.41.2=h5eee18b_0 | ||
- sysroot_linux-64=2.17=h57e8cba_10 | ||
- tk=8.6.12=h1ccaba5_0 | ||
- tktable=2.10=h14c3975_0 | ||
- wheel=0.38.4=py311h06a4308_0 | ||
- xz=5.4.2=h5eee18b_0 | ||
- zlib=1.2.13=h5eee18b_0 | ||
- zstd=1.5.5=hc292b87_0 | ||
prefix: /home/jacky/miniconda3/envs/fishhook |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
lcr-modules: | ||
|
||
fishhook: | ||
inputs: | ||
# Available wildcards: {seq_type} | ||
master_maf: "_UPDATE_" | ||
sample_sets: "_UPDATE_" | ||
|
||
sample_set: ["_UPDATE_"] | ||
seq_types: ["genome", "capture"] | ||
include_non_coding: True | ||
prepare_mafs: "{SCRIPTSDIR}/generate_smg_inputs/1.0/generate_smg_inputs.R" | ||
src_dir: "{MODSDIR}/src/" | ||
|
||
options: | ||
include_silent_mutation: False #Include silent mutations in maf file | ||
tiles_size: 1000 #This is the size of tiles you want to split the genome into, leave blank if you want to use a gene list as hypothese | ||
gene_list: "" #If you do not provide a tile size, please provide a gene list in gtf format (e.g GENCODE v19 genes for GRCH37: gencode.v19.genes.gtf) for Fishhook to do a gene-centric analysis | ||
gene_list_only_protein_coding: True #Only include protein coding genes in gene list | ||
covariates: "" #please supply a coveriate file in bed format, leave it blank if you want to run FishHook without covariates | ||
|
||
conda_envs: | ||
prepare_mafs: "{REPODIR}/envs/gatk/gatkR.yaml" | ||
fishhook: "{MODSDIR}/envs/fishhook.yaml" | ||
|
||
threads: | ||
fishhook: 1 | ||
|
||
resources: | ||
fishhook: | ||
mem_mb: 16000 | ||
|
||
pairing_config: | ||
genome: | ||
run_paired_tumours: True | ||
run_unpaired_tumours_with: "unmatched_normal" | ||
run_paired_tumours_as_unpaired: False | ||
capture: | ||
run_paired_tumours: True | ||
run_unpaired_tumours_with: "unmatched_normal" | ||
run_paired_tumours_as_unpaired: False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../../envs/fishhook/fishhook.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
#!/usr/bin/env snakemake | ||
|
||
|
||
##### ATTRIBUTION ##### | ||
|
||
|
||
# Original Author: Jacky Yiu | ||
# Module Author: Jacky Yiu | ||
# Contributors: N/A | ||
|
||
|
||
##### SETUP ##### | ||
|
||
# Import package with useful functions for developing analysis modules | ||
import oncopipe as op | ||
|
||
# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe | ||
min_oncopipe_version="1.0.11" | ||
import pkg_resources | ||
try: | ||
from packaging import version | ||
except ModuleNotFoundError: | ||
sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version") | ||
|
||
# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe | ||
|
||
current_version = pkg_resources.get_distribution("oncopipe").version | ||
if version.parse(current_version) < version.parse(min_oncopipe_version): | ||
print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m') | ||
print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m') | ||
sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)") | ||
|
||
# End of dependency checking section | ||
|
||
# Setup module and store module-specific configuration in `CFG` | ||
# `CFG` is a shortcut to `config["lcr-modules"]["fishhook"]` | ||
CFG = op.setup_module( | ||
name = "fishhook", | ||
version = "1.0", | ||
subdirectories = ["inputs", "fishhook", "outputs"], | ||
) | ||
|
||
# Define rules to be run locally when using a compute cluster | ||
localrules: | ||
_fishhook_input_maf, | ||
_fishhook_input_subsets, | ||
_fishhook_prepare_maf, | ||
_install_fishhook, | ||
_run_fishhook, | ||
_fishhook_output_tsv, | ||
_fishhook_all, | ||
|
||
|
||
##### RULES ##### | ||
|
||
|
||
# Symlinks the input files into the module results directory (under '00-inputs/') | ||
rule _fishhook_input_maf: | ||
input: | ||
maf = CFG["inputs"]["master_maf"] | ||
output: | ||
maf = CFG["dirs"]["inputs"] + "maf/{seq_type}/input.maf" | ||
run: | ||
op.absolute_symlink(input.maf, output.maf) | ||
|
||
# Symlinks the input files into the module results directory (under '00-inputs/') | ||
rule _fishhook_input_subsets: | ||
input: | ||
sample_sets = CFG["inputs"]["sample_sets"] | ||
output: | ||
sample_sets = CFG["dirs"]["inputs"] + "sample_sets/sample_sets.tsv" | ||
run: | ||
op.absolute_symlink(input.sample_sets, output.sample_sets) | ||
|
||
# Prepare the maf file for the input to Fishhook | ||
rule _fishhook_prepare_maf: | ||
input: | ||
maf = expand( | ||
str(rules._fishhook_input_maf.output.maf), | ||
allow_missing=True, | ||
seq_type=CFG["seq_types"] | ||
), | ||
sample_sets = ancient(str(rules._fishhook_input_subsets.output.sample_sets)) | ||
output: | ||
maf = temp(CFG["dirs"]["inputs"] + "maf/{sample_set}.maf"), | ||
contents = CFG["dirs"]["inputs"] + "maf/{sample_set}.maf.content" | ||
log: | ||
stdout = CFG["logs"]["inputs"] + "{sample_set}/prepare_maf.stdout.log", | ||
stderr = CFG["logs"]["inputs"] + "{sample_set}/prepare_maf.stderr.log" | ||
conda: | ||
CFG["conda_envs"]["prepare_mafs"] | ||
params: | ||
include_non_coding = str(CFG["include_non_coding"]).upper(), | ||
script = CFG["prepare_mafs"] | ||
shell: | ||
op.as_one_line(""" | ||
Rscript {params.script} | ||
{input.maf} | ||
{input.sample_sets} | ||
$(dirname {output.maf})/ | ||
{wildcards.sample_set} | ||
FishHook | ||
{params.include_non_coding} | ||
> {log.stdout} 2> {log.stderr} | ||
""") | ||
|
||
|
||
# Install fishhook | ||
# only available from github, not through conda/CRAN/Biocmanager | ||
rule _install_fishhook: | ||
output: | ||
complete = CFG["dirs"]["inputs"] + "fishhook_installed.success" | ||
conda: | ||
CFG["conda_envs"]["fishhook"] | ||
log: | ||
input = CFG["logs"]["inputs"] + "install_fishhook.log" | ||
shell: | ||
""" | ||
R -q -e 'devtools::install_github(c("jokergoo/ComplexHeatmap","mskilab/gTrack", "mskilab/fishHook"))' >> {log.input} && | ||
touch {output.complete}""" | ||
|
||
|
||
# Example variant calling rule (multi-threaded; must be run on compute server/cluster) | ||
rule _run_fishhook: | ||
input: | ||
fishhook = ancient(str(CFG["dirs"]["inputs"] + "fishhook_installed.success")), | ||
maf = str(rules._fishhook_prepare_maf.output.maf) | ||
output: | ||
tsv = CFG["dirs"]["fishhook"] + "{sample_set}/fishhook.output.maf" | ||
conda: | ||
CFG["conda_envs"]["fishhook"] | ||
threads: | ||
CFG["threads"]["fishhook"] | ||
resources: | ||
**CFG["resources"]["fishhook"] | ||
params: | ||
tiles_size = CFG["options"]["tiles_size"], | ||
coveriate = CFG["options"]["covariates"], | ||
include_silent = CFG["options"]["include_silent_mutation"], | ||
gene_list = CFG["options"]["gene_list"], | ||
gene_list_only_protein_coding = CFG["options"]["gene_list_only_protein_coding"] | ||
script: | ||
"scr/R/run_fishhook.R" | ||
|
||
|
||
# Symlinks the final output files into the module results directory (under '99-outputs/') | ||
rule _fishhook_output_tsv: | ||
input: | ||
tsv = str(rules._run_fishhook.output.tsv) | ||
output: | ||
tsv = CFG["dirs"]["outputs"] + "tsv/{sample_set}/{sample_set}.fishhook.tsv" | ||
run: | ||
op.relative_symlink(input.tsv, output.tsv, in_module= True) | ||
|
||
|
||
# Generates the target sentinels for each run, which generate the symlinks | ||
rule _fishhook_all: | ||
input: | ||
expand( | ||
[ | ||
str(rules._fishhook_output_tsv.output.tsv), | ||
], | ||
sample_set=CFG["sample_set"]) | ||
|
||
|
||
##### CLEANUP ##### | ||
|
||
|
||
# Perform some clean-up tasks, including storing the module-specific | ||
# configuration on disk and deleting the `CFG` variable | ||
op.cleanup_module(CFG) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../../schemas/base/base-1.0.yaml |
Oops, something went wrong.