Skip to content

Commit

Permalink
Merge pull request #272 from LCR-BCCRC/module/fishhook/1.0
Browse files Browse the repository at this point in the history
Module/fishhook/1.0
  • Loading branch information
Kdreval committed Jul 11, 2023
2 parents 94cdc89 + d35c4ed commit 4ebe1be
Show file tree
Hide file tree
Showing 7 changed files with 487 additions and 0 deletions.
177 changes: 177 additions & 0 deletions envs/fishhook/fishhook.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
name: fishhook
channels:
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- _r-mutex=1.0.0=anacondar_1
- _sysroot_linux-64_curr_repodata_hack=3=haa98f57_10
- binutils_impl_linux-64=2.38=h2a08ee3_1
- binutils_linux-64=2.38.0=hc2dff05_0
- blas=1.0=openblas
- bwidget=1.9.11=1
- bzip2=1.0.8=h7b6447c_0
- c-ares=1.19.0=h5eee18b_0
- ca-certificates=2023.05.30=h06a4308_0
- cairo=1.16.0=hb05425b_4
- curl=7.88.1=h5eee18b_1
- expat=2.4.9=h6a678d5_0
- fontconfig=2.14.1=h4c34cd2_2
- freetype=2.12.1=h4a9f257_0
- fribidi=1.0.10=h7b6447c_0
- gcc_impl_linux-64=11.2.0=h1234567_1
- gcc_linux-64=11.2.0=h5c386dc_0
- gfortran_impl_linux-64=11.2.0=h1234567_1
- gfortran_linux-64=11.2.0=hc2dff05_0
- glib=2.69.1=he621ea3_2
- graphite2=1.3.14=h295c915_1
- gxx_impl_linux-64=11.2.0=h1234567_1
- gxx_linux-64=11.2.0=hc2dff05_0
- harfbuzz=4.3.0=hf52aaf7_1
- icu=58.2=he6710b0_3
- jpeg=9e=h5eee18b_1
- kernel-headers_linux-64=3.10.0=h57e8cba_10
- krb5=1.20.1=h143b758_1
- ld_impl_linux-64=2.38=h1181459_1
- lerc=3.0=h295c915_0
- libcurl=7.88.1=h251f7ec_1
- libdeflate=1.17=h5eee18b_0
- libedit=3.1.20221030=h5eee18b_0
- libev=4.33=h7f8727e_1
- libffi=3.4.4=h6a678d5_0
- libgcc-devel_linux-64=11.2.0=h1234567_1
- libgcc-ng=13.1.0=he5830b7_0
- libgfortran-ng=11.2.0=h00389a5_1
- libgfortran5=11.2.0=h1234567_1
- libgomp=13.1.0=he5830b7_0
- libnghttp2=1.52.0=h2d74bed_1
- libopenblas=0.3.21=h043d6bf_0
- libpng=1.6.39=h5eee18b_0
- libssh2=1.10.0=hdbd6064_2
- libstdcxx-devel_linux-64=11.2.0=h1234567_1
- libstdcxx-ng=13.1.0=hfd8a6a1_0
- libtiff=4.5.0=h6a678d5_2
- libuuid=1.41.5=h5eee18b_0
- libwebp-base=1.2.4=h5eee18b_1
- libxcb=1.15=h7f8727e_0
- libxml2=2.10.3=hcbfbd50_0
- lz4-c=1.9.4=h6a678d5_0
- make=4.2.1=h1bed415_1
- ncurses=6.4=h6a678d5_0
- openssl=3.0.9=h7f8727e_0
- pango=1.50.7=h05da053_0
- pcre=8.45=h295c915_0
- pcre2=10.37=he7ceb23_1
- pip=23.1.2=py311h06a4308_0
- pixman=0.40.0=h7f8727e_1
- python=3.11.3=h955ad1f_1
- r-askpass=1.1=r42h76d94ec_0
- r-assertthat=0.2.1=r42hc72bb7e_4
- r-base=4.2.0=h1ae530e_0
- r-base64enc=0.1_3=r42h57805ef_1006
- r-brew=1.0_8=r42hc72bb7e_2
- r-brio=1.1.3=r42h76d94ec_0
- r-bslib=0.5.0=r42hc72bb7e_1
- r-cachem=1.0.6=r42h76d94ec_0
- r-callr=3.7.3=r42hc72bb7e_1
- r-cli=3.6.1=r42ha503ecb_1
- r-clipr=0.8.0=r42hc72bb7e_2
- r-commonmark=1.8.0=r42h76d94ec_0
- r-cpp11=0.4.3=r42hc72bb7e_1
- r-crayon=1.5.2=r42hc72bb7e_2
- r-credentials=1.3.2=r42hc72bb7e_2
- r-curl=4.3.2=r42h76d94ec_0
- r-desc=1.4.2=r42hc72bb7e_2
- r-devtools=2.4.5=r42hc72bb7e_1
- r-diffobj=0.3.5=r42h76d94ec_0
- r-digest=0.6.29=r42h884c59f_0
- r-downlit=0.4.2=r42hc72bb7e_2
- r-ellipsis=0.3.2=r42h76d94ec_0
- r-evaluate=0.21=r42hc72bb7e_1
- r-fansi=1.0.3=r42h76d94ec_0
- r-fastmap=1.1.0=r42h884c59f_0
- r-fontawesome=0.5.1=r42hc72bb7e_1
- r-fs=1.5.2=r42h884c59f_0
- r-gert=1.6.0=r42h76d94ec_0
- r-gh=1.4.0=r42hc72bb7e_1
- r-gitcreds=0.1.2=r42hc72bb7e_2
- r-glue=1.6.2=r42h76d94ec_0
- r-highr=0.10=r42hc72bb7e_1
- r-htmltools=0.5.5=r42ha503ecb_1
- r-htmlwidgets=1.6.2=r42hc72bb7e_1
- r-httpuv=1.6.11=r42ha503ecb_1
- r-httr=1.4.6=r42hc72bb7e_1
- r-httr2=0.2.3=r42hc72bb7e_1
- r-ini=0.3.1=r42hc72bb7e_1005
- r-jquerylib=0.1.4=r42hc72bb7e_2
- r-jsonlite=1.8.0=r42h76d94ec_0
- r-knitr=1.39=r42h6115d3f_0
- r-later=1.3.1=r42ha503ecb_1
- r-lifecycle=1.0.1=r42h142f84f_0
- r-magrittr=2.0.3=r42h76d94ec_0
- r-memoise=2.0.1=r42hc72bb7e_2
- r-mime=0.12=r42h76d94ec_0
- r-miniui=0.1.1.1=r42hc72bb7e_1004
- r-openssl=2.0.2=r42h76d94ec_0
- r-pillar=1.9.0=r42hc72bb7e_1
- r-pkgbuild=1.4.2=r42hc72bb7e_0
- r-pkgconfig=2.0.3=r42hc72bb7e_3
- r-pkgdown=2.0.7=r42hc72bb7e_1
- r-pkgload=1.3.2=r42hc72bb7e_1
- r-praise=1.0.0=r42hc72bb7e_1007
- r-prettyunits=1.1.1=r42hc72bb7e_3
- r-processx=3.7.0=r42h06615bd_1
- r-profvis=0.3.8=r42h57805ef_0
- r-promises=1.2.0.1=r42ha503ecb_2
- r-ps=1.7.0=r42h76d94ec_0
- r-purrr=0.3.4=r42h76d94ec_0
- r-r6=2.5.1=r42hc72bb7e_2
- r-ragg=1.2.5=r42hd65d3ba_0
- r-rappdirs=0.3.3=r42h76d94ec_0
- r-rcmdcheck=1.4.0=r42h785f33e_2
- r-rcpp=1.0.10=r42ha503ecb_1
- r-rematch2=2.1.2=r42hc72bb7e_3
- r-remotes=2.4.2=r42hc72bb7e_2
- r-rlang=1.1.1=r42ha503ecb_1
- r-rmarkdown=2.14=r42h6115d3f_0
- r-roxygen2=7.2.3=r42ha503ecb_1
- r-rprojroot=2.0.3=r42hc72bb7e_0
- r-rstudioapi=0.14=r42hc72bb7e_2
- r-rversions=2.1.2=r42hc72bb7e_2
- r-sass=0.4.6=r42ha503ecb_1
- r-sessioninfo=1.2.2=r42hc72bb7e_2
- r-shiny=1.7.4=r42h785f33e_1
- r-sourcetools=0.1.7_1=r42ha503ecb_1
- r-stringi=1.7.6=r42h884c59f_0
- r-stringr=1.4.0=r42h6115d3f_0
- r-sys=3.4=r42h76d94ec_0
- r-systemfonts=1.0.4=r42haf97adc_2
- r-testthat=3.1.4=r42h884c59f_0
- r-textshaping=0.3.6=r42h884c59f_0
- r-tibble=3.1.7=r42h76d94ec_0
- r-tinytex=0.45=r42hc72bb7e_1
- r-urlchecker=1.0.1=r42hc72bb7e_2
- r-usethis=2.2.1=r42hc72bb7e_0
- r-utf8=1.2.2=r42h76d94ec_0
- r-vctrs=0.4.1=r42h884c59f_0
- r-waldo=0.5.1=r42hc72bb7e_1
- r-whisker=0.4.1=r42hc72bb7e_1
- r-withr=2.5.0=r42hc72bb7e_2
- r-xfun=0.31=r42h76d94ec_0
- r-xml2=1.3.3=r42h884c59f_0
- r-xopen=1.0.0=r42hc72bb7e_1005
- r-xtable=1.8_4=r42hc72bb7e_5
- r-yaml=2.3.5=r42h76d94ec_0
- r-zip=2.2.0=r42h76d94ec_0
- readline=8.2=h5eee18b_0
- setuptools=67.8.0=py311h06a4308_0
- sqlite=3.41.2=h5eee18b_0
- sysroot_linux-64=2.17=h57e8cba_10
- tk=8.6.12=h1ccaba5_0
- tktable=2.10=h14c3975_0
- wheel=0.38.4=py311h06a4308_0
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=h5eee18b_0
- zstd=1.5.5=hc292b87_0
prefix: /home/jacky/miniconda3/envs/fishhook
41 changes: 41 additions & 0 deletions modules/fishhook/1.0/config/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
lcr-modules:

fishhook:
inputs:
# Available wildcards: {seq_type}
master_maf: "_UPDATE_"
sample_sets: "_UPDATE_"

sample_set: ["_UPDATE_"]
seq_types: ["genome", "capture"]
include_non_coding: True
prepare_mafs: "{SCRIPTSDIR}/generate_smg_inputs/1.0/generate_smg_inputs.R"
src_dir: "{MODSDIR}/src/"

options:
include_silent_mutation: False #Include silent mutations in maf file
tiles_size: 1000 #This is the size of tiles you want to split the genome into, leave blank if you want to use a gene list as hypothese
gene_list: "" #If you do not provide a tile size, please provide a gene list in gtf format (e.g GENCODE v19 genes for GRCH37: gencode.v19.genes.gtf) for Fishhook to do a gene-centric analysis
gene_list_only_protein_coding: True #Only include protein coding genes in gene list
covariates: "" #please supply a coveriate file in bed format, leave it blank if you want to run FishHook without covariates

conda_envs:
prepare_mafs: "{REPODIR}/envs/gatk/gatkR.yaml"
fishhook: "{MODSDIR}/envs/fishhook.yaml"

threads:
fishhook: 1

resources:
fishhook:
mem_mb: 16000

pairing_config:
genome:
run_paired_tumours: True
run_unpaired_tumours_with: "unmatched_normal"
run_paired_tumours_as_unpaired: False
capture:
run_paired_tumours: True
run_unpaired_tumours_with: "unmatched_normal"
run_paired_tumours_as_unpaired: False
1 change: 1 addition & 0 deletions modules/fishhook/1.0/envs/fishhook.yaml
171 changes: 171 additions & 0 deletions modules/fishhook/1.0/fishhook.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/usr/bin/env snakemake


##### ATTRIBUTION #####


# Original Author: Jacky Yiu
# Module Author: Jacky Yiu
# Contributors: N/A


##### SETUP #####

# Import package with useful functions for developing analysis modules
import oncopipe as op

# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
min_oncopipe_version="1.0.11"
import pkg_resources
try:
from packaging import version
except ModuleNotFoundError:
sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")

# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe

current_version = pkg_resources.get_distribution("oncopipe").version
if version.parse(current_version) < version.parse(min_oncopipe_version):
print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")

# End of dependency checking section

# Setup module and store module-specific configuration in `CFG`
# `CFG` is a shortcut to `config["lcr-modules"]["fishhook"]`
CFG = op.setup_module(
name = "fishhook",
version = "1.0",
subdirectories = ["inputs", "fishhook", "outputs"],
)

# Define rules to be run locally when using a compute cluster
localrules:
_fishhook_input_maf,
_fishhook_input_subsets,
_fishhook_prepare_maf,
_install_fishhook,
_run_fishhook,
_fishhook_output_tsv,
_fishhook_all,


##### RULES #####


# Symlinks the input files into the module results directory (under '00-inputs/')
rule _fishhook_input_maf:
input:
maf = CFG["inputs"]["master_maf"]
output:
maf = CFG["dirs"]["inputs"] + "maf/{seq_type}/input.maf"
run:
op.absolute_symlink(input.maf, output.maf)

# Symlinks the input files into the module results directory (under '00-inputs/')
rule _fishhook_input_subsets:
input:
sample_sets = CFG["inputs"]["sample_sets"]
output:
sample_sets = CFG["dirs"]["inputs"] + "sample_sets/sample_sets.tsv"
run:
op.absolute_symlink(input.sample_sets, output.sample_sets)

# Prepare the maf file for the input to Fishhook
rule _fishhook_prepare_maf:
input:
maf = expand(
str(rules._fishhook_input_maf.output.maf),
allow_missing=True,
seq_type=CFG["seq_types"]
),
sample_sets = ancient(str(rules._fishhook_input_subsets.output.sample_sets))
output:
maf = temp(CFG["dirs"]["inputs"] + "maf/{sample_set}.maf"),
contents = CFG["dirs"]["inputs"] + "maf/{sample_set}.maf.content"
log:
stdout = CFG["logs"]["inputs"] + "{sample_set}/prepare_maf.stdout.log",
stderr = CFG["logs"]["inputs"] + "{sample_set}/prepare_maf.stderr.log"
conda:
CFG["conda_envs"]["prepare_mafs"]
params:
include_non_coding = str(CFG["include_non_coding"]).upper(),
script = CFG["prepare_mafs"]
shell:
op.as_one_line("""
Rscript {params.script}
{input.maf}
{input.sample_sets}
$(dirname {output.maf})/
{wildcards.sample_set}
FishHook
{params.include_non_coding}
> {log.stdout} 2> {log.stderr}
""")


# Install fishhook
# only available from github, not through conda/CRAN/Biocmanager
rule _install_fishhook:
output:
complete = CFG["dirs"]["inputs"] + "fishhook_installed.success"
conda:
CFG["conda_envs"]["fishhook"]
log:
input = CFG["logs"]["inputs"] + "install_fishhook.log"
shell:
"""
R -q -e 'devtools::install_github(c("jokergoo/ComplexHeatmap","mskilab/gTrack", "mskilab/fishHook"))' >> {log.input} &&
touch {output.complete}"""


# Example variant calling rule (multi-threaded; must be run on compute server/cluster)
rule _run_fishhook:
input:
fishhook = ancient(str(CFG["dirs"]["inputs"] + "fishhook_installed.success")),
maf = str(rules._fishhook_prepare_maf.output.maf)
output:
tsv = CFG["dirs"]["fishhook"] + "{sample_set}/fishhook.output.maf"
conda:
CFG["conda_envs"]["fishhook"]
threads:
CFG["threads"]["fishhook"]
resources:
**CFG["resources"]["fishhook"]
params:
tiles_size = CFG["options"]["tiles_size"],
coveriate = CFG["options"]["covariates"],
include_silent = CFG["options"]["include_silent_mutation"],
gene_list = CFG["options"]["gene_list"],
gene_list_only_protein_coding = CFG["options"]["gene_list_only_protein_coding"]
script:
"scr/R/run_fishhook.R"


# Symlinks the final output files into the module results directory (under '99-outputs/')
rule _fishhook_output_tsv:
input:
tsv = str(rules._run_fishhook.output.tsv)
output:
tsv = CFG["dirs"]["outputs"] + "tsv/{sample_set}/{sample_set}.fishhook.tsv"
run:
op.relative_symlink(input.tsv, output.tsv, in_module= True)


# Generates the target sentinels for each run, which generate the symlinks
rule _fishhook_all:
input:
expand(
[
str(rules._fishhook_output_tsv.output.tsv),
],
sample_set=CFG["sample_set"])


##### CLEANUP #####


# Perform some clean-up tasks, including storing the module-specific
# configuration on disk and deleting the `CFG` variable
op.cleanup_module(CFG)
1 change: 1 addition & 0 deletions modules/fishhook/1.0/schemas/base-1.0.yaml

0 comments on commit 4ebe1be

Please sign in to comment.