Skip to content
This repository has been archived by the owner on Oct 23, 2023. It is now read-only.

Commit

Permalink
improving multiqc report, deeptools runtime, and other details
Browse files Browse the repository at this point in the history
  • Loading branch information
tovahmarkowitz committed Apr 22, 2019
1 parent 282b258 commit 51e94d6
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 28 deletions.
8 changes: 6 additions & 2 deletions Results-template/Scripts/createtable
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
'''
Author: Skyler Kuhn
Date created: 08/18/2018
Last modified: 08/23/2018
Last modified: 04/17/2019 by Tovah Markowitz: markowitzte@nih.gov
Email: kuhnsa@nih.gov
About: This program takes standard input from the concatenation of each *qc.metric file.The results
Expand Down Expand Up @@ -30,10 +30,14 @@ def file2table():
pass

df = pd.DataFrame(tabledict)
df.index.name = 'SampleName'
df.reset_index(inplace=True)
#print(df[['NSC', 'FRiP', 'PCB1', 'PCB2', 'RSC']]) #re-order columns
#cols = df.columns.tolist() # view df columns names
#orderedcols = ordercolumns(cols)
print(df.to_string())
# print(df.to_string())
print(df.to_string(index=False,justify="left"))


if __name__ == '__main__':
file2table()
Expand Down
7 changes: 5 additions & 2 deletions Results-template/Scripts/jaccard_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@

def split_infiles(infiles):
# breaks the infile string with space-delimited file names and creates a list
infileList = infiles.split(" ")
infileList = infiles.strip("\'").strip('\"').split(" ")
if len(infileList) == 1:
infileList = infileList[0].split(";")
return(infileList)

def loop_jaccard(infileList, genomefile):
Expand All @@ -32,6 +34,7 @@ def loop_jaccard(infileList, genomefile):
out = []
for z in range(nfiles):
fileA = infileList[z]
print("fileA is: " + fileA)
a = BedTool(fileA)
a = a.sort(g=genomefile)
for y in range(z+1,nfiles):
Expand Down Expand Up @@ -68,7 +71,7 @@ def main():

parser = optparse.OptionParser(description=desc)

parser.add_option('-i', dest='infiles', default='', help='A space-delimited list of \
parser.add_option('-i', dest='infiles', default='', help='A space- or semicolon-delimited list of \
input files for analysis.')
parser.add_option('-o', dest='outfile', default='', help='The name of the output file \
where all the jaccard score information will be saved.')
Expand Down
4 changes: 2 additions & 2 deletions Results-template/Scripts/ngsqc_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def name_outimage(directory, ext, group):
"""this version uses ext to create the output file name
"""
if group == "":
outfileName = directory + "/NGSQC." + ext + ".pdf"
outfileName = directory + "/NGSQC." + ext + ".png"
else:
outfileName = directory + "/" + group + ".NGSQC." + ext + ".pdf"
outfileName = directory + "/" + group + ".NGSQC." + ext + ".png"
return outfileName

#############################################
Expand Down
54 changes: 32 additions & 22 deletions Rules/InitialChIPseqQC.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,13 @@ if se == 'yes' :
expand(join(workpath,deeptools_dir,"{group}.metagene_profile.{ext}.pdf"), zip, group=deepgroups,ext=deepexts),
expand(join(workpath,deeptools_dir,"{group}.TSS_profile.{ext}.pdf"), zip, group=deepgroups,ext=deepexts),
# ngsqc
expand(join(workpath,"QC","{group}.NGSQC.sorted.Q5DD.pdf"),group=groups),
expand(join(workpath,"QC","{group}.NGSQC.sorted.Q5DD.png"),group=groups),
# preseq
expand(join(workpath,preseq_dir,"{name}.ccurve"),name=samples),
# QC Table
expand(join(workpath,"QC","{name}.nrf"), name=samples),
expand(join(workpath,"QC","{name}.qcmetrics"), name=samples),
join(workpath,"QCTable.txt"),
join(workpath,"QC","QCTable.txt"),


rule trim_se: # actually trim, filter polyX and remove black listed reads
Expand Down Expand Up @@ -327,13 +327,13 @@ if pe == 'yes':
expand(join(workpath,deeptools_dir,"{group}.metagene_profile.{ext}.pdf"), zip, group=deepgroups,ext=deepexts),
expand(join(workpath,deeptools_dir,"{group}.TSS_profile.{ext}.pdf"), zip, group=deepgroups,ext=deepexts),
# ngsqc
expand(join(workpath,"QC","{group}.NGSQC.sorted.Q5DD.pdf"),group=groups),
expand(join(workpath,"QC","{group}.NGSQC.sorted.Q5DD.png"),group=groups),
# preseq
expand(join(workpath,preseq_dir,"{name}.ccurve"),name=samples),
# QC Table
expand(join(workpath,"QC","{name}.nrf"), name=samples),
expand(join(workpath,"QC","{name}.qcmetrics"), name=samples),
join(workpath,"QCTable.txt"),
join(workpath,"QC","QCTable.txt"),


rule trim_pe: # trim, remove PolyX and remove BL reads
Expand Down Expand Up @@ -445,7 +445,6 @@ samtools flagstat {output.outbam2} > {output.flagstat2}
out6=join(workpath,bam_dir,"{name}.bwa.Q5.duplic"),
params:
rname='pl:dedup',
batch='--mem=98g --time=10:00:00 --gres=lscratch:800',
picardver=config['bin'][pfamily]['tool_versions']['PICARDVER'],
samtoolsver=config['bin'][pfamily]['tool_versions']['SAMTOOLSVER'],
javaram='16g',
Expand Down Expand Up @@ -482,7 +481,6 @@ rule ppqt:
pdf= join(workpath,bam_dir,"{name}.sorted{ext}pdf"),
params:
rname="pl:ppqt",
batch='--mem=24g --time=10:00:00 --gres=lscratch:800',
samtoolsver=config['bin'][pfamily]['tool_versions']['SAMTOOLSVER'],
rver=config['bin'][pfamily]['tool_versions']['RVER'],
run:
Expand All @@ -507,7 +505,6 @@ rule bam2bw:
outbw=join(workpath,bw_dir,"{name}.{ext}.RPGC.bw"),
params:
rname="pl:bam2bw",
batch='--mem=24g --time=10:00:00 --gres=lscratch:800',
reflen=config['references'][pfamily]['REFLEN'],
deeptoolsver=config['bin'][pfamily]['tool_versions']['DEEPTOOLSVER'],
run:
Expand Down Expand Up @@ -547,9 +544,9 @@ rule deeptools_prep:
bam=expand(join(workpath,bam_dir,"{name}.{ext}.bam"),name=samples,ext=extensions2),
bw2=expand(join(workpath,bw_dir,"{name}.sorted.Q5DD.RPGC.inputnorm.bw"),name=sampleswinput)
output:
expand(join(workpath,bw_dir,"{ext}.deeptools_prep"),ext=extensions),
expand(join(workpath,bam_dir,"{group}.{ext}.deeptools_prep"),group=groups,ext=extensions2),
expand(join(workpath,bw_dir,"{group2}.{ext2}.deeptools_prep"), zip, group2=deepgroups,ext2=deepexts),
temp(expand(join(workpath,bw_dir,"{ext}.deeptools_prep"),ext=extensions)),
temp(expand(join(workpath,bam_dir,"{group}.{ext}.deeptools_prep"),group=groups,ext=extensions2)),
temp(expand(join(workpath,bw_dir,"{group2}.{ext2}.deeptools_prep"), zip, group2=deepgroups,ext2=deepexts)),
params:
rname="pl:deeptools_prep",
batch="--mem=10g --time=1:00:00",
Expand Down Expand Up @@ -626,15 +623,15 @@ rule deeptools_fingerprint:
params:
rname="pl:deeptools_fingerprint",
deeptoolsver=config['bin'][pfamily]['tool_versions']['DEEPTOOLSVER'],
batch="--cpus-per-task=8"
nthreads="8"
run:
import re
commoncmd="module load {params.deeptoolsver}; module load python;"
listfile=list(map(lambda z:z.strip().split(),open(input.prep,'r').readlines()))
ext=listfile[0][0]
bams=listfile[1]
labels=listfile[2]
cmd="plotFingerprint -b "+" ".join(bams)+" --labels "+" ".join(labels)+" -p 4 --skipZeros --outQualityMetrics "+output.metrics+" --plotFile "+output.image
cmd="plotFingerprint -b "+" ".join(bams)+" --labels "+" ".join(labels)+" -p "+params.nthreads+" --skipZeros --outQualityMetrics "+output.metrics+" --plotFile "+output.image
if se == "yes":
cmd+=" -e 200"
shell(commoncmd+cmd)
Expand All @@ -649,15 +646,15 @@ rule deeptools_fingerprint_Q5DD:
params:
rname="pl:deeptools_fingerprint_Q5DD",
deeptoolsver=config['bin'][pfamily]['tool_versions']['DEEPTOOLSVER'],
batch="--cpus-per-task=8"
nthreads="8"
run:
import re
commoncmd="module load {params.deeptoolsver}; module load python;"
listfile=list(map(lambda z:z.strip().split(),open(input[0],'r').readlines()))
ext=listfile[0][0]
bams=listfile[1]
labels=listfile[2]
cmd="plotFingerprint -b "+" ".join(bams)+" --labels "+" ".join(labels)+" -p 4 --skipZeros --outQualityMetrics "+output.metrics+" --plotFile "+output.image+" --outRawCounts "+output.raw
cmd="plotFingerprint -b "+" ".join(bams)+" --labels "+" ".join(labels)+" -p "+params.nthreads+" --skipZeros --outQualityMetrics "+output.metrics+" --plotFile "+output.image+" --outRawCounts "+output.raw
if se == "yes":
cmd+=" -e 200"
shell(commoncmd+cmd)
Expand All @@ -676,7 +673,8 @@ rule deeptools_genes:
params:
rname="pl:deeptools_genes",
deeptoolsver=config['bin'][pfamily]['tool_versions']['DEEPTOOLSVER'],
prebed=config['references'][pfamily]['GENEINFO']
prebed=config['references'][pfamily]['GENEINFO'],
nthreads="16"
run:
import re
commoncmd="module load {params.deeptoolsver}; module load python;"
Expand All @@ -685,8 +683,8 @@ rule deeptools_genes:
bws=listfile[1]
labels=listfile[2]
cmd1="awk -v OFS='\t' -F'\t' '{{print $1, $2, $3, $5, \".\", $4}}' "+params.prebed+" > "+output.bed
cmd2="computeMatrix scale-regions -S "+" ".join(bws)+" -R "+output.bed+" -p 4 --upstream 1000 --regionBodyLength 2000 --downstream 1000 --skipZeros -o "+output.metamat+" --samplesLabel "+" ".join(labels)
cmd3="computeMatrix reference-point -S "+" ".join(bws)+" -R "+output.bed+" -p 4 --referencePoint TSS --upstream 3000 --downstream 3000 --skipZeros -o "+output.TSSmat+" --samplesLabel "+" ".join(labels)
cmd2="computeMatrix scale-regions -S "+" ".join(bws)+" -R "+output.bed+" -p "+params.nthreads+" --upstream 1000 --regionBodyLength 2000 --downstream 1000 --skipZeros -o "+output.metamat+" --samplesLabel "+" ".join(labels)
cmd3="computeMatrix reference-point -S "+" ".join(bws)+" -R "+output.bed+" -p "+params.nthreads+" --referencePoint TSS --upstream 3000 --downstream 3000 --skipZeros -o "+output.TSSmat+" --samplesLabel "+" ".join(labels)
cmd4="plotHeatmap -m "+output.metamat+" -out "+output.metaheat+" --colorMap 'PuOr_r' --yAxisLabel 'average RPGC' --regionsLabel 'genes' --legendLocation 'none'"
cmd5="plotHeatmap -m "+output.TSSmat+" -out "+output.TSSheat+" --colorMap 'RdBu_r' --yAxisLabel 'average RPGC' --regionsLabel 'genes' --legendLocation 'none'"
cmd6="plotProfile -m "+output.metamat+" -out "+output.metaline+" --plotHeight 15 --plotWidth 15 --perGroup --yAxisLabel 'average RPGC' --plotType 'se' --legendLocation upper-right"
Expand Down Expand Up @@ -851,7 +849,8 @@ rule ngsqc_plot:
input:
ngsqc=expand(join(workpath,"QC","{name}.sorted.Q5DD.NGSQC_report.txt"),name=samples),
output:
out=expand(join(workpath,"QC","{group}.NGSQC.sorted.Q5DD.pdf"),group=groups),
png=expand(join(workpath,"QC","{group}.NGSQC.sorted.Q5DD.png"),group=groups),
txt=join(workpath,"QC","NGSQC.sorted.Q5DD.txt")
params:
rname="pl:ngsqc_plot",
script=join(workpath,"Scripts","ngsqc_plot.py"),
Expand All @@ -861,15 +860,18 @@ rule ngsqc_plot:
cmd1 = ""
cmd2 = ""
cmd3 = ""
cmd4 = ""
for group in groups:
labels = [ sample for sample in samples if sample in groupdatawinput[group] ]
cmd1 = cmd1 + "mkdir " + group + "; "
for label in labels:
cmd1 = cmd1 + "cp " + workpath + "/QC/" + label + ".sorted.Q5DD.NGSQC_report.txt " + group + "; "
cmd2 = cmd2 + "python " + params.script + " -d '" + group + "' -e 'sorted.Q5DD' -g '" + group + "'; "
cmd3 = cmd3 + "mv " + group + "/" + group + ".NGSQC.sorted.Q5DD.pdf " + workpath + "/QC/" + group + ".NGSQC.sorted.Q5DD.pdf" + "; "
cmd3 = cmd3 + "if [ -f NGSQC.sorted.Q5DD.txt ]; then tail -n +2 " + group + "/NGSQC.sorted.Q5DD.txt >> NGSQC.sorted.Q5DD.txt; else cp " + group + "/NGSQC.sorted.Q5DD.txt NGSQC.sorted.Q5DD.txt; fi; "
cmd4 = cmd4 + "mv " + group + "/" + group + ".NGSQC.sorted.Q5DD.png " + workpath + "/QC/" + group + ".NGSQC.sorted.Q5DD.png" + "; "
cmd5 = "mv NGSQC.sorted.Q5DD.txt " + workpath + "/QC/NGSQC.sorted.Q5DD.txt; "
shell(commoncmd1)
shell(commoncmd2 + cmd1 + cmd2 + cmd3)
shell(commoncmd2 + cmd1 + cmd2 + cmd3 + cmd4 + cmd5)

rule QCstats:
input:
Expand Down Expand Up @@ -906,7 +908,7 @@ rule QCTable:
inputstring=" ".join(expand(join(workpath,"QC","{name}.qcmetrics"), name=samples)),
filterCollate=join(workpath,"Scripts","createtable"),
output:
qctable=join(workpath,"QCTable.txt"),
qctable=join(workpath,"QC","QCTable.txt"),
threads: 16
shell: """
cat {params.inputstring} | {params.filterCollate} > {output.qctable}
Expand All @@ -919,10 +921,11 @@ rule multiqc:
expand(join(workpath,bam_dir,"{name}.sorted.Q5DD.bam.flagstat"), name=samples),
expand(join(workpath,bam_dir,"{name}.sorted.Q5.bam.flagstat"), name=samples),
expand(join(workpath,bam_dir,"{name}.{ext}.ppqt"),name=samples,ext=extensions2),
join(workpath,"QCTable.txt"),
join(workpath,"QC","QCTable.txt"),
join(workpath,"rawQC"),
join(workpath,"QC"),
expand(join(workpath,deeptools_dir,"{group}.fingerprint.raw.sorted.Q5DD.tab"),group=groups),
expand(join(workpath,"QC","{group}.NGSQC.sorted.Q5DD.png"),group=groups),
output:
join(workpath,"Reports","multiqc_report.html")
params:
Expand All @@ -933,7 +936,14 @@ rule multiqc:
threads: 1
shell: """
module load {params.multiqc}
RE="(QC/.*NGSQC.sorted.Q5DD).png"
for ngsqc in `ls QC/*.NGSQC.sorted.Q5DD.png`;
do
if [[ $ngsqc =~ $RE ]]; then root=${{BASH_REMATCH[1]}}; fi
cp $ngsqc ${{root}}_mqc.png
done
cd Reports && multiqc -f -c {params.qcconfig} --interactive -e cutadapt --ignore {params.dir} -d ../
rm ../QC/*_mqc.png
"""


9 changes: 9 additions & 0 deletions cluster.json
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@
"deeptools": {
"mem": "24g"
},
"deeptools_fingerprint": {
"threads":"8"
},
"deeptools_fingerprint_Q5DD": {
"threads":"8"
},
"deeptools_genes": {
"threads":"16"
},
"deeptools_prep": {
"mem": "4g",
"threads": "2"
Expand Down

0 comments on commit 51e94d6

Please sign in to comment.