-
Notifications
You must be signed in to change notification settings - Fork 0
/
ChIPQCrds.R
executable file
·129 lines (126 loc) · 4.32 KB
/
ChIPQCrds.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#=================================================================
# r-argparse Argument Parser
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Loads r-argparse package and creates a command line parser. Parses
#arguments into a named list. Universal arguments for input bam/
#csv path, output rds path, and list of chromosomes for analysis.
#Single mode exclusive argument for peaks file path, multiple mode
#exclusive argument for number of worker cpus.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
library("argparse")
QCparser <- ArgumentParser()
QCparser$add_argument(
"-i", "--input",
type = "character",
default = NULL,
help = "Path to input sample (.bam) or sample sheet (.csv) to analyze."
)
QCparser$add_argument(
"-p", "--peaks",
type = "character",
default = NULL,
help = "Path to called peaks for sample reads. [Single Sample (.bam)]"
)
QCparser$add_argument(
"-o", "--output",
type = "character",
default = "Analysis.rds",
help = "Path to output rds file to generate."
)
QCparser$add_argument(
"-c", "--chromosomes",
type = "character", nargs="*",
default = NULL,
help = "Space separated list of chromosomes to analyze. Analyzes all if none specified."
)
QCparser$add_argument(
"-w", "--workers",
type = "integer",
default = 1,
help = "Number of worker cpus. [Multiple Samples (.csv)]"
)
QCargs <- QCparser$parse_args()
#=================================================================
#=================================================================
# Parameter Processing
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Determines script mode based on whether script is handling a
#single sample or multiple samples. Loads necessary arguments
#into their own variables. If necessary, makes columns of input
#csv into names to handle hyphens.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
if (grepl(QCargs$input, pattern = ".bam")) {
QCmode <- "single"
} else if (grepl(QCargs$input, pattern = ".csv")) {
QCmode <- "multiple"
} else {
stop("Invalid input format; must be .bam (single sample) or .csv (multiple samples).")
}
switch(QCmode,
single = {
QCreads <- QCargs$input
QCpeaks <- QCargs$peaks
},
multiple = {
QCsampleSheet <- read.csv(QCargs$input)
MNblacklist <- c("Replicate", "bamReads", "bamControl", "Peaks")
for (MNcol in colnames(QCsampleSheet)) {
if (!(MNcol %in% MNblacklist) && !is.null(MNcol)) {
QCsampleSheet[, MNcol] <- make.names(QCsampleSheet[, MNcol])
}
}
QCworkers <- QCargs$workers
}
)
QCrdsPath <- QCargs$output
QCchromosomes <- QCargs$chromosomes
#=================================================================
#=================================================================
# BiocParallel Processor Determination
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Loads BiocParallel package and creates a serial or parallel
#processor based on the requested number of worker cpus.
#Registers processor for use. (Multiple mode only)
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
if (QCmode == "multiple") {
library(BiocParallel)
if (QCworkers>=2) {
QCprocessor <- MulticoreParam(
workers = QCworkers,
progressbar = TRUE,
jobname = "QCrds"
)
} else {
QCprocessor <- SerialParam()
}
register(QCprocessor)
}
#=================================================================
#=================================================================
# ChIPQC Analysis and Output
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Loads ChIPQC package and runs analysis in sample or standard mode
#depending on input type. Outputs analysis to rds file and prints
#location.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
library("ChIPQC")
switch(QCmode,
single = {
QCanalysis <- ChIPQCsample(
reads = QCreads,
peaks = QCpeaks,
annotation = NULL,
chromosomes=QCchromosomes
)
},
multiple = {
QCanalysis <- ChIPQC(
experiment = QCsampleSheet,
annotation = NULL,
chromosomes = QCchromosomes
)
}
)
saveRDS(QCanalysis, file=QCrdsPath)
print(paste("Analysis saved to rds file:", QCrdsPath))
#=================================================================