Skip to content

Commit

Permalink
qsub params; other bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
gavinha committed Aug 3, 2018
1 parent 57216bf commit f89eb2e
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 23 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Viswanathan SR*, Ha G*, Hoff A*, et al. Structural Alterations Driving Castratio
Gavin Ha
Fred Hutchinson Cancer Research Center
contact: <gavinha@gmail.com> or <gha@fredhutch.org>
Date: July 26, 2018
Date: August 3, 2018

## Requirements
### Software packages or libraries
Expand Down Expand Up @@ -62,7 +62,7 @@ snakemake -s TitanCNA.snakefile -np
# run the workflow locally using 5 cores
snakemake -s TitanCNA.snakefile --cores 5
# run the workflow on qsub using a maximum of 50 jobs. Broad UGER cluster parameters can be set directly in config/cluster.sh.
snakemake -s TitanCNA.snakefile --cluster-sync "qsub" -j 50 --jobscript config/cluster.sh
snakemake -s TitanCNA.snakefile --cluster-sync "qsub -l h_vmem={params.mem},h_rt={params.runtime}" -j 50 --jobscript config/cluster.sh
```
This will also run both `moleculeCoverage.snakefile` and `getPhasedAlleleCounts.snakefile` which generate the necessary inputs for `TitanCNA.snakfile`.

Expand Down
15 changes: 12 additions & 3 deletions TitanCNA.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ rule runTitanCNA:
alphaR=config["TitanCNA_alphaR"],
#alleleModel=config["TitanCNA_alleleModel"],
txnExpLen=config["TitanCNA_txnExpLen"],
plotYlim=config["TitanCNA_plotYlim"]
plotYlim=config["TitanCNA_plotYlim"],
mem=config["TitanCNA_mem"],
runtime=config["TitanCNA_runtime"],
pe=config["TitanCNA_numCores"]
log:
"logs/titan/titanCNA_ploidy{ploidy}/{tumor}_cluster{clustNum}.log"
shell:
Expand All @@ -77,7 +80,10 @@ rule combineTitanAndIchorCNA:
combineScript=config["TitanCNA_combineTitanIchorCNA"],
libdir=config["TitanCNA_libdir"],
centromere=config["centromere"],
gender=config["gender"]
gender=config["gender"],
mem=config["std_mem"],
runtime=config["std_runtime"],
pe=config["std_numCores"]
log:
"logs/titan/titanCNA_ploidy{ploidy}/{tumor}_cluster{clustNum}.combineTitanIchorCNA.log"
shell:
Expand All @@ -92,7 +98,10 @@ rule selectSolution:
solutionsTxt="results/titan/optimalClusterSolution.txt",
params:
solutionRscript=config["TitanCNA_selectSolutionRscript"],
threshold=config["TitanCNA_solutionThreshold"]
threshold=config["TitanCNA_solutionThreshold"],
mem=config["std_mem"],
runtime=config["std_runtime"],
pe=config["std_numCores"]
log:
"logs/titan/optSolution/selectSolution.log"
shell:
Expand Down
6 changes: 5 additions & 1 deletion code/getMoleculeCoverage.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ normalizeMaleX <- as.logical(opt$normalizeMaleX)
includeHOMD <- as.logical(opt$includeHOMD)
fracReadsInChrYForMale <- opt$fracReadsInChrYForMale
outDir <- opt$outDir
outPlotDir <- paste0(outDir, "/", patientID)
libdirTitanCNA <- opt$libdirTitanCNA
libdirIchorCNA <- opt$libdirIchorCNA
plotFileType <- opt$plotFileType
Expand Down Expand Up @@ -142,6 +143,8 @@ if (is.null(centromere) || centromere == "None" || centromere == "NULL"){ # no c
}
centromere <- read.delim(centromere,header=T,stringsAsFactors=F,sep="\t")

save.image(outImage)

## LOAD IN WIG FILES ##
numSamples <- 1
tumour_counts <- list()
Expand All @@ -150,6 +153,7 @@ for (i in 1:numSamples) {
id <- patientID
## create output directories for each sample ##
dir.create(paste0(outDir, "/"), recursive = TRUE)
dir.create(paste0(outPlotDir, "/"), recursive = TRUE)
### LOAD TUMOUR AND NORMAL FILES ###
message("Loading tumour files from ", tumour_file)
tumour_doc <- loadBXcountsFromBEDDir(tumour_file, chrs = chrsAll, minReads = minReadsPerBX)
Expand Down Expand Up @@ -302,7 +306,7 @@ for (n in normal){
hmmResults.cor$results$n[s, iter] <- 1.0
}
## plot solution ##
outPlotFile <- paste0(outDir, "/", id, "/", id, "_genomeWide_", "n", n, "-p", p)
outPlotFile <- paste0(outPlotDir, "/", id, "_genomeWide_", "n", n, "-p", p)
mainName[counter] <- paste0(id, ", n: ", n, ", p: ", p, ", log likelihood: ", signif(hmmResults.cor$results$loglik[hmmResults.cor$results$iter], digits = 4))
plotGWSolution(hmmResults.cor, s=s, outPlotFile=outPlotFile, plotFileType=plotFileType,
plotYLim=plotYLim, estimateScPrevalence=estimateScPrevalence, main=mainName[counter])
Expand Down
14 changes: 8 additions & 6 deletions code/getPhasedHETSitesFromLLRVCF.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ option_list <- list(
make_option(c("--genomeStyle"), type = "character", default="NCBI", help = "Chr naming convention. NCBI (e.g. 1) or UCSC (e.g. chr1). Default: [%default]"),
make_option(c("-s","--snpDB"), type="character", default=NULL, help= "VCF file of list of known SNPs (e.g. HapMap, 1000 Genomes, dbSNP)"),
make_option(c("--altCountField"), type="character", default="AD", help="Alternate allele count field name in genotype. [Default: %default]"),
make_option(c("-o","--outVCFgz"), type = "character", help = "Output VCF file with suffix \"_phasedHets.vcf\" [Required]"),
make_option(c("-o","--outVCF"), type = "character", help = "Output VCF file with suffix \"_phasedHets.vcf\" [Required]"),
make_option(c("--libdir"), type="character", default=NULL, help="Path to TitanCNA package directory. If provided, will source scripts after loading installed TitanCNA package.")
)

Expand All @@ -29,6 +29,7 @@ opt <- parse_args(parseobj)
print(opt)

library(TitanCNA)
library(GenomeInfoDb)
library(VariantAnnotation)

vcfFile <- opt$inVCF
Expand All @@ -39,8 +40,8 @@ chrs <- eval(parse(text = opt$chrs))
minQUAL <- opt$minQuality
minDepth <- opt$minDepth
minVAF <- opt$minVAF
outVCFgz <- opt$outVCFgz
outVCF <- gsub("gz", "", outVCFgz)
outVCF <- opt$outVCF
outVCFgz <- paste0(outVCF, ".gz")
libdir <- opt$libdir
altCountField <- opt$altCountField

Expand All @@ -59,14 +60,14 @@ filterFlags <- c("PASS", "10X_RESCUED_MOLECULE_HIGH_DIVERSITY")
#minQUAL <- 100
keepGenotypes <- c("1|0", "0|1", "0/1")

save.image("tmp.RData")


#vcfFiles <- list.files(LRdir, pattern = "_phased_variants.vcf.gz$", full.name = TRUE)

#for (i in 1:length(vcfFiles)){
#id <- gsub("_phased_variants.vcf.gz", "", basename(vcfFile))
hap <- getHaplotypesFromVCF(vcfFile,
chrs = chrs, build = genomeBuild, style = genomeStyle,
chrs = chrs, build = genomeBuild, genomeStyle = genomeStyle,
filterFlags = filterFlags, minQUAL = minQUAL, minDepth = minDepth,
minVAF = minVAF, keepGenotypes = keepGenotypes,
altCountField = altCountField, snpDB = snpFile)
Expand All @@ -75,10 +76,11 @@ hap <- getHaplotypesFromVCF(vcfFile,
## remove BX genotype field to make things faster
geno(hap$vcf)$BX <- NULL

message("Writing to file: ", outVCF)
writeVcf(hap$vcf, filename = outVCF)
bgzipFile <- bgzip(outVCF, dest = outVCFgz, overwrite = TRUE)
indexTabix(bgzipFile, format = "vcf")

# outFile <- paste0(outDir, "/", id, "_phasedGR.rds")
# saveRDS(hap$geno, file = outFile)

Expand Down
17 changes: 15 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,15 @@ gender: male
bamFileName: phased_possorted_bam.bam
phaseVariantFileName: phased_variants.vcf.gz

## params for each step ##
## standard parameters for qsub memory, runtime, pe (parallel environment)
# use what your scheduler designates for parallel environment; does not apply to local run
# invoke using:
# snakemake -s TitanCNA.snakefile --cluster-sync "qsub -l h_vmem={params.mem},h_rt={params.runtime} {params.pe}" -j 200 --jobscript config/cluster.sh
std_mem: 4G
std_runtime: "03:00:00"
std_numCores: -pe smp 1 -binding linear:1

## params for each step ##
## bxTools ##
bx_mapQual: 60
bx_bedFileRoot: data/10kb_hg38/10kb_hg38
Expand All @@ -41,13 +48,14 @@ molCov_maxCN: 8
het_minVCFQuality: 100
het_minDepth: 10
het_minVAF: 0.25
het_mem: 16G
het_runtime: "06:00:00"

## bam filters for tumor allelic counts ##
het_minBaseQuality: 10
het_minMapQuality: 20

## TitanCNA params ##
TitanCNA_numCores: 1
TitanCNA_maxNumClonalClusters: 1
TitanCNA_chrs: c(1:22, \"X\")
TitanCNA_normalInit: 0.5
Expand All @@ -61,4 +69,9 @@ TitanCNA_alphaR: 5000
TitanCNA_txnExpLen: 1e15
TitanCNA_plotYlim: c(-2,4)
TitanCNA_solutionThreshold: 0.05
TitanCNA_mem: 16G
TitanCNA_runtime: "10:00:00"
TitanCNA_numCores: -pe smp 1 -binding linear:1



12 changes: 9 additions & 3 deletions getPhasedAlleleCounts.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@ rule getHETsites:
minQual=config["het_minVCFQuality"],
minDepth=config["het_minDepth"],
minVAF=config["het_minVAF"],
libdir=config["TitanCNA_libdir"]
libdir=config["TitanCNA_libdir"],
mem=config["het_mem"],
runtime=config["het_runtime"],
pe=config["std_numCores"]
log:
"logs/phasedCounts/hetPosns/{tumor}.phasedHETsites.log"
shell:
"Rscript {params.getHETsitesScript} --inVCF {input} --genomeBuild {params.genomeBuild} -- genomeStyle {params.genomeStyle} --snpDB {params.snpDB} --minQuality {params.minQual} --minDepth {params.minDepth} --minVAF {params.minVAF} --altCountField AD --libdir {params.libdir} --outVCFgz {output} 2> {log}"
"Rscript {params.getHETsitesScript} --inVCF {input} --genomeBuild {params.genomeBuild} --genomeStyle {params.genomeStyle} --snpDB {params.snpDB} --minQuality {params.minQual} --minDepth {params.minDepth} --minVAF {params.minVAF} --altCountField AD --libdir {params.libdir} --outVCF {output} 2> {log}"


rule getAlleleCountsByChr:
Expand All @@ -43,7 +46,10 @@ rule getAlleleCountsByChr:
countScript=config["phaseCounts_counts_script"],
mapQ=config["het_minMapQuality"],
baseQ=config["het_minBaseQuality"],
vcfQ=config["het_minVCFQuality"]
vcfQ=config["het_minVCFQuality"],
mem=config["std_mem"],
runtime=config["std_runtime"],
pe=config["std_numCores"]
log:
"logs/phasedCounts/tumCounts/{tumor}/{tumor}.tumCounts.{chr}.log"
shell:
Expand Down
17 changes: 11 additions & 6 deletions moleculeCoverage.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ rule correctMolCov:
input:
expand("results/bxTile/{samples}/{samples}.bxTile.{chr}.bed", samples=config["samples"], chr=CHRS),
#expand("results/moleculeCoverage/{tumor}/{tumor}.cna.seg", tumor=config["pairings"]),
#expand("results/moleculeCoverage/{tumor}/{tumor}.seg.txt", tumor=config["pairings"]),
#expand("results/moleculeCoverage/{tumor}/{tumor}.params.txt", tumor=config["pairings"]),
expand("results/moleculeCoverage/{tumor}/{tumor}.BXcounts.txt", tumor=config["pairings"]),
expand("results/bxTile/{samples}/", samples=config["samples"])

Expand All @@ -33,8 +35,9 @@ rule bxTile:
samTools=config["samTools"],
mapQual=config["bx_mapQual"],
bedFile=config["bx_bedFileRoot"]
resources:
mem=4
mem=config["std_mem"],
runtime=config["std_runtime"],
pe=config["std_numCores"]
log:
"logs/bxTile/{samples}/{samples}.bxTile.{chr}.log"
shell:
Expand All @@ -48,7 +51,8 @@ rule moleculeCoverage:
output:
corrDepth="results/moleculeCoverage/{tumor}/{tumor}.BXcounts.txt",
cna="results/moleculeCoverage/{tumor}/{tumor}.cna.seg",
#segTxt="results/moleculeCoverage/{tumor}/{tumor}.seg.txt",
segTxt="results/moleculeCoverage/{tumor}/{tumor}.seg.txt",
paramTxt="results/moleculeCoverage/{tumor}/{tumor}.params.txt",
outDir="results/moleculeCoverage/{tumor}/",
params:
molCovScript=config["molCov_script"],
Expand All @@ -61,9 +65,10 @@ rule moleculeCoverage:
mapwig=config["molCov_mapWig"],
titanLibDir=config["TitanCNA_libdir"],
ichorLibDir=config["ichorCNA_libdir"],
centromere=config["centromere"]
resources:
mem=4
centromere=config["centromere"],
mem=config["std_mem"],
runtime=config["std_runtime"],
pe=config["std_numCores"]
log:
"logs/moleculeCoverage/{tumor}.molCov.log"
shell:
Expand Down

0 comments on commit f89eb2e

Please sign in to comment.