Skip to content

Commit

Permalink
Merge pull request #22 from NBISweden/develop
Browse files Browse the repository at this point in the history
v2.3
  • Loading branch information
johnne committed Jun 10, 2021
2 parents 513685a + 5569f9b commit e007b32
Show file tree
Hide file tree
Showing 29 changed files with 806 additions and 945 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/main.yml
Expand Up @@ -63,6 +63,9 @@ jobs:
run: |
bash .test/scripts/prep_eggnog.sh
bash .test/scripts/prep_taxonomy.sh
mkdir -p results/annotation/emapper-test
cp .test/data/emapper-out.tsv results/annotation/emapper-test/emapper-test.emapper.annotations
snakemake --config sample_list=.test/config/emapper-test.tsv --use-conda -j 2 --configfile .test/config/annotate.yaml -p results/annotation/emapper-test/{kos,modules,pathways,enzymes}.parsed.tsv
snakemake --use-conda -j 2 --configfile .test/config/annotate.yaml -p annotate
rm -r results/assembly examples/data/sample*
# Test Metabat2 with Megahit (200k reads/sample)
Expand Down
2 changes: 2 additions & 0 deletions .test/config/emapper-test.tsv
@@ -0,0 +1,2 @@
sample unit assembly fq1 fq2
sample1 1 emapper-test R1 R2
2 changes: 2 additions & 0 deletions .test/config/kraken.yaml
Expand Up @@ -224,6 +224,7 @@ kraken:
# download a prebuilt kraken2 database from the CCB servers
# choose from "minikraken_8GB","16S_Greengenes","16S_RDP","16S_Silva"
prebuilt: "16S_Greengenes"
prebuilt_url: "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz"
# if you already have access to a built kraken database you may specify the
# database path here (path must contain hash.k2d, opts.k2d and taxo.k2d files
custom: ""
Expand All @@ -241,6 +242,7 @@ centrifuge:
prebuilt: "p_compressed+h+v"
# if you already have access to a built centrifuge database you may specify
# the path to it here.
prebuilt_url: ""
custom: ""
# minimum score for classifications by centrifuge.
# because centrifuge doesn't have a filtering algorithm,
Expand Down
5 changes: 5 additions & 0 deletions .test/data/emapper-out.tsv
@@ -0,0 +1,5 @@
#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs
micpun 296587.XP_002502426.1 3.47e-243 667.0 COG0837@1|root,2QSB1@2759|Eukaryota,37UJV@33090|Viridiplantae,34HS7@3041|Chlorophyta 3041|Chlorophyta G Glucokinase - - - - - - - - - - - iRC1080.CRv4_Au5_s1_g1623_t1 Glucokinase
tr|K4QD32|K4QD32_STREQ 759913.SDSE_1735 7.26e-208 575.0 COG1940@1|root,COG1940@2|Bacteria,1UZ80@1239|Firmicutes,4HD5J@91061|Bacilli,1M9XA@119603|Streptococcus dysgalactiae group 91061|Bacilli GK ROK family ypbG - 2.7.1.2 ko:K00845 ko00010,ko00052,ko00500,ko00520,ko00521,ko00524,ko01100,ko01110,ko01120,ko01130,ko01200,map00010,map00052,map00500,map00520,map00521,map00524,map01100,map01110,map01120,map01130,map01200 M00001,M00549 R00299,R01600,R01786 RC00002,RC00017 ko00000,ko00001,ko00002,ko01000 - - - ROK
1000565.METUNv1_03812 1000565.METUNv1_03812 9.6e-207 714.5 COG0012@1|root,COG0012@2|Bacteria,1MVM4@1224|Proteobacteria,2VJ1W@28216|Betaproteobacteria,2KUD2@206389|Rhodocyclales 206389|Rhodocyclales J ATPase that binds to both the 70S ribosome and the 50S ribosomal subunit in a nucleotide-independent manner ychF - - ko:K06942 - - - - ko00000,ko03009 - - - MMR_HSR1,YchF-GTPase_C
362663.ECP_0061 362663.ECP_0061 0.0 1624.8 COG0417@1|root,COG0417@2|Bacteria,1MVY9@1224|Proteobacteria,1RMQ1@1236|Gammaproteobacteria,3XPER@561|Escherichia 1236|Gammaproteobacteria L DNA polymerase polB GO:0003674,GO:0003824,GO:0003887,GO:0004518,GO:0004527,GO:0004529,GO:0004536,GO:0006139,GO:0006259,GO:0006260,GO:0006261,GO:0006281,GO:0006725,GO:0006807,GO:0006950,GO:0006974,GO:0007154,GO:0008150,GO:0008152,GO:0008296,GO:0008408,GO:0009058,GO:0009059,GO:0009432,GO:0009605,GO:0009987,GO:0009991,GO:0016740,GO:0016772,GO:0016779,GO:0016787,GO:0016788,GO:0016796,GO:0016895,GO:0018130,GO:0019438,GO:0031668,GO:0033554,GO:0034061,GO:0034641,GO:0034645,GO:0034654,GO:0043170,GO:0044237,GO:0044238,GO:0044249,GO:0044260,GO:0044271,GO:0045004,GO:0045005,GO:0046483,GO:0050896,GO:0051716,GO:0071496,GO:0071704,GO:0071897,GO:0090304,GO:0090305,GO:0140097,GO:1901360,GO:1901362,GO:1901576 2.7.7.7 ko:K02336 - - - - ko00000,ko01000,ko03400 - - - DNA_pol_B,DNA_pol_B_exo1
3 changes: 3 additions & 0 deletions Dockerfile
Expand Up @@ -9,6 +9,9 @@ SHELL ["/bin/bash", "-c"]
# Set workdir
WORKDIR /analysis

# Set tmpdir
ENV TMPDIR="/scratch"

RUN apt-get update && \
apt-get install -y --no-install-recommends curl && apt-get clean

Expand Down
9 changes: 7 additions & 2 deletions config/config.yaml
Expand Up @@ -60,6 +60,9 @@ cutadapt:
rev_adapter_sequence: AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
# maximum allowed error rate as value between 0 and 1 (no. of errors divided by length of matching region)
error_rate: 0.1
# extra parameters to pass to cutadapt
# for instance you can run cutadapt with quality trimming by specifying "-q 10" here
extra_params: ""

# parameters for sortmerna
sortmerna:
Expand Down Expand Up @@ -254,9 +257,10 @@ kraken:
standard_db: False
# download a prebuilt kraken2 database from the CCB servers
# choose from "minikraken_8GB","16S_Greengenes","16S_RDP","16S_Silva"
prebuilt: "minikraken_8GB"
prebuilt: "Standard-8"
# if you already have access to a built kraken database you may specify the
# database path here (path must contain hash.k2d, opts.k2d and taxo.k2d files
prebuilt_url: "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_8gb_20200919.tar.gz"
custom: ""
# should kraken2 run with reduced memory requirements?
# setting reduce_memory to True makes kraken2 run in "--memory-mapping" mode
Expand All @@ -269,9 +273,10 @@ centrifuge:
# CCB servers. choose from:
# "p+h+v", "nt_2018_2_12", "nt_2018_3_3", "p_compressed+h+v" or "p_compressed_2018_4_15"
# see http://ccb.jhu.edu/software/centrifuge/ for more info
prebuilt: "p_compressed+h+v"
prebuilt: "refseq-compressed"
# if you already have access to a built centrifuge database you may specify
# the path to it here.
prebuilt_url: "https://genome-idx.s3.amazonaws.com/centrifuge/p_compressed%2Bh%2Bv.tar.gz"
custom: ""
# minimum score for classifications by centrifuge.
# because centrifuge doesn't have a filtering algorithm,
Expand Down
9 changes: 5 additions & 4 deletions environment.yml
Expand Up @@ -7,10 +7,11 @@ channels:

dependencies:
# core packages for workflow
- python=3.7.6
- snakemake-minimal=5.18.1
- pandas=1.0.1
- biopython=1.76
- python>=3.7
- snakemake-minimal>=5.18
- mamba
- pandas
- biopython

# packages required to create snakemake reports
- pygments
Expand Down
6 changes: 2 additions & 4 deletions workflow/Snakefile
Expand Up @@ -21,13 +21,11 @@ def all_input(wildcards):
wanted_input = []

if config["run_preprocessing"] or config["preprocessing"]["fastqc"]:
wanted_input.append(opj(config["paths"]["results"], "report",
"samples_report.html"))
wanted_input.append(results+"/report/samples_report.html")

if config["run_assembly"]:
# add assembly stats
wanted_input.append(opj(config["paths"]["results"], "report",
"assembly", "assembly_stats.pdf"))
wanted_input.append(results+"/report/assembly/assembly_stats.pdf")
# get annotation input
wanted_input += annotation_input(config, assemblies)
# get binning input
Expand Down
4 changes: 2 additions & 2 deletions workflow/envs/annotation.yml
Expand Up @@ -3,9 +3,9 @@ channels:
- conda-forge
- defaults
dependencies:
- python=2.7.15
- python=3.7.6
- prodigal=2.6.3
- pfam_scan=1.6
- eggnog-mapper=2.0.1
- eggnog-mapper=2.1.2
- infernal=1.1.2
- trnascan-se=2.0.5
1 change: 1 addition & 0 deletions workflow/envs/concoct.yml
Expand Up @@ -6,3 +6,4 @@ dependencies:
- python=3.7.6
- biopython=1.76
- concoct=1.1.0
- samtools=1.9
1 change: 1 addition & 0 deletions workflow/envs/metabat.yml
Expand Up @@ -5,3 +5,4 @@ channels:
- defaults
dependencies:
- metabat2=2.14
- libdeflate=1.0
2 changes: 1 addition & 1 deletion workflow/envs/plotting.yml
Expand Up @@ -4,6 +4,6 @@ channels:
- defaults
dependencies:
- python=3.8.2
- seaborn=0.10.1
- seaborn=0.11.1
- pandas=1.0.3
- jupyter=1.0.0
3 changes: 2 additions & 1 deletion workflow/envs/preprocess.yml
Expand Up @@ -10,4 +10,5 @@ dependencies:
- fastqc=0.11.9
- multiqc=1.8
- fastuniq=1.1
- bowtie2=2.3.5.1
- bowtie2=2.4.1
- tbb=2020.3
1 change: 1 addition & 0 deletions workflow/envs/quantify.yml
Expand Up @@ -8,3 +8,4 @@ dependencies:
- samtools=1.9
- picard=2.21.9
- bowtie2=2.4.1
- tbb=2020.3

0 comments on commit e007b32

Please sign in to comment.