Skip to content

Commit

Permalink
feat: QC (#6)
Browse files Browse the repository at this point in the history
* added qc standard

* added_qc

* reduce conda envs

* fixed linting and formatting

* changed .config in testing and del  normFactor log

* fixed formatting

* fixed spp

* added samtools to env

* fixed qc and peak calling for both se and pe
  • Loading branch information
DavideBrex committed Nov 22, 2023
1 parent d05a8dc commit b956ba0
Show file tree
Hide file tree
Showing 21 changed files with 1,281 additions and 47 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ report/**
Dockerfile
config/samples_sheet_ER.csv
config/samples_sheet_bap1.csv
dag.svg
report.html
config/config_bap1.yaml
config/samples_sheet_bap1Short.csv
14 changes: 9 additions & 5 deletions .test/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ trimming: true

aligner: bowtie

#type of normalization to perform to generate the bigwig files (one from RAW, Orlando , RX-Input)
normalization_type: "RX-Input"

resources:
ref:
#if bowtie index is not available leave only "" , it will be created
Expand Down Expand Up @@ -50,17 +53,18 @@ params:
memory: 1
deeptools:
#change this according to: https://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html
effective_genome_length: 2701495761
effective_genome_length: 11000000
#read extension, only for SE
read_extension: 75
peakCalling:
#specify the ucsc genome name (hg38, mm10, dm6, etc.)
genome: "hg38"
chrom_sizes: "data/hg38.chromsizes"
macs2:
pvalue: 1e-5
pvalue: 0.05
epic2:
fdr: 0.0005
#effective genome fraction, change this according to https://github.com/biocore-ntnu/epic2/blob/master/epic2/effective_sizes/hg38_50.txt
# in the github it is indicated as Effective genome size, and you need to know the read length in order to set it
egf: 0.869
fdr: 0.05
edd:
fdr: 0.05

Expand Down
12 changes: 8 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ trimming: true

aligner: bowtie

#type of normalization to perform to generate the bigwig files (one from RAW, Orlando , RX-Input)
normalization_type: "RX-Input"

resources:
ref:
#if bowtie index is not available leave only "" , it will be created
Expand Down Expand Up @@ -54,13 +57,14 @@ params:
#read extension, only for SE
read_extension: 75
peakCalling:
#specify the ucsc genome name (hg38, mm10, dm6, etc.)
genome: "hg38"
chrom_sizes: ".test/data/hg38.chromsizes"
macs2:
pvalue: 1e-5
pvalue: 0.05
epic2:
fdr: 0.0005
#effective genome fraction, change this according to https://github.com/biocore-ntnu/epic2/blob/master/epic2/effective_sizes/hg38_50.txt
# in the github it is indicated as Effective genome size, and you need to know the read length in order to set it
egf: 0.869
fdr: 0.05
edd:
fdr: 0.05

Expand Down
4 changes: 2 additions & 2 deletions config/samples_sheet.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ H3K4me3_untreated,1,H3K4me3,Input_untreated,1,narrow,.test/data/fastq/H3K4me3_un
H3K4me3_untreated,1,H3K4me3,Input_untreated,1,narrow,.test/data/fastq/H3K4me3_untreated-1_Homo_sapiens_L2.fastq.gz,
H3K4me3_untreated,2,H3K4me3,Input_untreated,1,narrow,.test/data/fastq/H3K4me3_untreated-2_Homo_sapiens_L1.fastq.gz,
H3K4me3_untreated,2,H3K4me3,Input_untreated,1,narrow,.test/data/fastq/H3K4me3_untreated-2_Homo_sapiens_L2.fastq.gz,
H3K4me3_EGF,1,H3K4me3,Input_EGF,1,narrow,.test/data/fastq/H3K4me3_EGF-1_Homo_sapiens.fastq.gz,
H3K4me3_EGF,2,H3K4me3,Input_EGF,2,narrow,.test/data/fastq/H3K4me3_EGF-2_Homo_sapiens.fastq.gz,
H3K4me3_EGF,1,H3K4me3,Input_EGF,1,broad,.test/data/fastq/H3K4me3_EGF-1_Homo_sapiens.fastq.gz,
H3K4me3_EGF,2,H3K4me3,Input_EGF,2,broad,.test/data/fastq/H3K4me3_EGF-2_Homo_sapiens.fastq.gz,
H3K9me2_untreated,1,H3K9me2,Input_untreated,1,very-broad,.test/data/fastq/H3K9me2_untreated-1_Homo_sapiens.fastq.gz,
H3K9me2_EGF,1,H3K9me2,Input_EGF,1,very-broad,.test/data/fastq/H3K9me2_EGF-1_Homo_sapiens.fastq.gz,
Input_untreated,1,,,,,.test/data/fastq/Input-untreated-1_Homo_sapiens_ChIP-Seq.fastq.gz,
Expand Down
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ include: "rules/trim.smk"
include: "rules/align.smk"
include: "rules/createBigWigs.smk"
include: "rules/callPeaks.smk"
include: "rules/qc.smk"


rule all:
Expand Down
10 changes: 10 additions & 0 deletions workflow/envs/Renv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: Renv
channels:
- conda-forge
- bioconda
- nodefaults ## block user/system channels
dependencies:
- r-base=4.1.0
- r-spp
- samtools
- gawk
7 changes: 0 additions & 7 deletions workflow/envs/pysam.yaml

This file was deleted.

8 changes: 8 additions & 0 deletions workflow/envs/qc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- fastqc
- multiqc
- deeptools
6 changes: 5 additions & 1 deletion workflow/envs/epic2.yaml → workflow/envs/various.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
name: epic2
name: various
channels:
- conda-forge
- bioconda
dependencies:
- epic2
- pysam
- chromap
- bedtools
- chip-r

6 changes: 4 additions & 2 deletions workflow/rules/align.smk
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ if config["aligner"] == "bowtie":
outdir
),
benchmark:
"{}results/.benchmarks/{{id}}.align.benchmark.txt".format(outdir)
"{}results/.benchmarks/{{id}}_spike.align.benchmark.txt".format(outdir)
shell:
"""
bowtie -p {threads} {params.bowtie} -x {params.index} {params.inputsel} 2> {log.align} \
Expand All @@ -90,8 +90,10 @@ rule clean_spike:
sample_ref="{}results/bam/{{id}}.clean.bam".format(outdir),
sample_spike="{}results/bam_spike/{{id}}_spike.clean.bam".format(outdir),
conda:
"../envs/pysam.yaml"
"../envs/various.yaml"
log:
"{}results/logs/spike/{{id}}.removeSpikeDups".format(outdir),
benchmark:
"{}results/.benchmarks/{{id}}.clean_spike.benchmark.txt".format(outdir)
script:
"../scripts/remove_spikeDups.py"
21 changes: 18 additions & 3 deletions workflow/rules/callPeaks.smk
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ rule macs2_callNarrowPeak:
+ " --pvalue "
+ config["params"]["peakCalling"]["macs2"]["pvalue"]
+ " --keep-dup all",
benchmark:
"{}results/.benchmarks/{{sample}}.macs2.benchmark.txt".format(outdir)
wrapper:
"v2.6.0/bio/macs2/callpeak"

Expand All @@ -38,22 +40,30 @@ rule epic2_callBroadPeaks:
broadPeaks="{}results/peakCalling/epic2/{{sample}}_broadPeaks.bed".format(
outdir
),
summary="{}results/logs/peakCalling/epic2/{{sample}}_summary.txt".format(outdir),
log:
"{}results/logs/peakCalling/epic2/{{sample}}_callpeak.log".format(outdir),
params:
fdr=config["params"]["peakCalling"]["epic2"]["fdr"],
genome=config["params"]["peakCalling"]["genome"],
egf=config["params"]["peakCalling"]["epic2"]["egf"],
chrom_size=config["params"]["peakCalling"]["chrom_sizes"],
benchmark:
"{}results/.benchmarks/{{sample}}.epic2.benchmark.txt".format(outdir)
conda:
"../envs/epic2.yaml"
"../envs/various.yaml"
shell:
"""
epic2 -t {input.treatment} \
-c {input.control} \
--guess-bampe \
-fdr {params.fdr} \
--genome {params.genome} \
--chromsizes {params.chrom_size} \
--effective-genome-fraction {params.egf} \
--output {output.broadPeaks} \
2> {log}
# Count the number of peaks and create a summary file
echo "Total Peaks: $(wc -l < {output.broadPeaks})" > {output.summary}
"""


Expand All @@ -67,13 +77,16 @@ rule edd_callVeryBroadPeaks:
veryBroadPeaks="{}results/peakCalling/edd/{{sample}}/{{sample}}_peaks.bed".format(
outdir
),
summary="{}results/logs/peakCalling/edd/{{sample}}_summary.txt".format(outdir),
log:
"{}results/logs/peakCalling/edd/{{sample}}_callpeak.log".format(outdir),
params:
chrom_size=config["params"]["peakCalling"]["chrom_sizes"],
fdr=config["params"]["peakCalling"]["edd"]["fdr"],
blacklist=config["resources"]["ref"]["blacklist"],
output_dir="{}results/peakCalling/edd/{{sample}}".format(outdir),
benchmark:
"{}results/.benchmarks/{{sample}}.edd.benchmark.txt".format(outdir)
conda:
"../envs/edd.yaml"
threads: 8
Expand All @@ -88,4 +101,6 @@ rule edd_callVeryBroadPeaks:
2> {log}
mv {params.output_dir}/log.txt {params.output_dir}/{wildcards.sample}_runlog.txt
mv {params.output_dir}/{wildcards.sample}_runlog.txt $(dirname {log})
# Count the number of peaks and create a summary file
echo "Total Peaks: $(wc -l < {output.veryBroadPeaks})" > {output.summary}
"""
Loading

0 comments on commit b956ba0

Please sign in to comment.