/
assembly.R
162 lines (154 loc) · 4.59 KB
/
assembly.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#' Create an assembly for a custom JBrowse view
#'
#' Creates the necessary configuration string for an
#' indexed fasta or bgzip fasta so that it can be used
#' as the assembly in a JBrowse custom linear genome view.
#'
#' The string returned by \code{assembly} is stringified JSON.
#' JBrowseR is an interface to JBrowse 2, which receives its
#' configuration in JSON format. The stringified JSON returned
#' by \code{assembly} is parsed into a JavaScript object in the
#' browser, and is used to configure the genome browser.
#'
#' It is important to note that while only the fasta file is
#' passed as an argument, \code{assembly} assumes that a fasta
#' index of the same name is located with the fasta file (as
#' well as a gzi file in the case of a bgzip fasta).
#'
#' For example:
#'
#' \code{assembly("data/hg38.fa")}
#'
#' Assumes that \code{data/hg38.fa.fai} also exists.
#'
#' \code{assembly("data/hg38.fa", bgzip = TRUE)}
#'
#' Assumes that \code{data/hg38.fa.fai} and \code{data/hg38.fa.gzi} both exist.
#'
#' This is a JBrowse 2 convention, and the default naming output of samtools
#' and bgzip.
#'
#' For more information on creating these files, visit
#' \url{https://jbrowse.org/jb2/docs/quickstart_web#adding-a-genome-assembly}
#'
#' @param assembly_data the URL to your fasta file
#' @param bgzip whether or not your fasta is bgzip compressed
#' @param aliases a vector of strings of the aliases for the assembly
#' @param refname_aliases the URL to a file containing reference
#' name aliases. For more info see
#' \url{https://jbrowse.org/jb2/docs/config_guide#configuring-reference-name-aliasing}
#'
#' @return a character vector of JBrowseR assembly configuration
#' @export
#'
#' @examples
#' assembly("https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz", bgzip = TRUE)
assembly <- function(assembly_data, bgzip = FALSE, aliases = NULL, refname_aliases = NULL) {
if (!bgzip) {
fa_assembly(assembly_data, aliases, refname_aliases)
} else {
bgzip_fa_assembly(assembly_data, aliases, refname_aliases)
}
}
fa_assembly <- function(assembly_data, aliases, refname_aliases) {
name <- get_name(assembly_data)
aliases <- get_aliases(aliases)
refname_aliases <- get_refname_aliases(refname_aliases)
# interpolate values into a string of JBrowse configuration
#
# note: this gets parsed into JSON in client and used as assembly value
as.character(
stringr::str_glue(
"{{ ",
'"name": "{name}", ',
"{aliases}",
'"sequence": {{ ',
'"type": "ReferenceSequenceTrack", ',
'"trackId": "{name}-ReferenceSequenceTrack", ',
'"adapter": {{ ',
'"type": "IndexedFastaAdapter", ',
'"fastaLocation": {{ ',
'"uri": "{assembly_data}" ',
"}}, ",
'"faiLocation": {{ ',
'"uri": "{assembly_data}.fai" ',
"}} ",
"}} ",
"}} ",
"{refname_aliases}",
"}}"
)
)
}
bgzip_fa_assembly <- function(assembly_data, aliases, refname_aliases) {
name <- get_name(assembly_data)
aliases <- get_aliases(aliases)
refname_aliases <- get_refname_aliases(refname_aliases)
# interpolate values into a string of JBrowse configuration
#
# note: this gets parsed into JSON in client and used as assembly value
as.character(
stringr::str_glue(
"{{ ",
'"name": "{name}", ',
"{aliases}",
'"sequence": {{ ',
'"type": "ReferenceSequenceTrack", ',
'"trackId": "{name}-ReferenceSequenceTrack", ',
'"adapter": {{ ',
'"type": "BgzipFastaAdapter", ',
'"fastaLocation": {{ ',
'"uri": "{assembly_data}" ',
"}}, ",
'"faiLocation": {{ ',
'"uri": "{assembly_data}.fai" ',
"}}, ",
'"gziLocation": {{ ',
'"uri": "{assembly_data}.gzi" ',
"}} ",
"}} ",
"}} ",
"{refname_aliases}",
"}}"
)
)
}
# create a JSON array of aliases for the config
# c("hg19", "GRCh37") -> "aliases": ["hg19", "GRCh37"]
get_aliases <- function(aliases) {
if (!is.null(aliases)) {
for (i in seq_along(aliases)) {
aliases[i] <- stringr::str_c('"', aliases[i], '"')
}
alias_array <- stringr::str_c(
"[",
stringr::str_c(aliases, collapse = ", "),
"]"
)
stringr::str_c(
'"aliases": ',
alias_array,
", "
)
} else {
""
}
}
get_refname_aliases <- function(refname_aliases) {
if (!is.null(refname_aliases)) {
as.character(
stringr::str_glue(
', "refNameAliases": {{ ',
'"adapter": {{ ',
'"type": "RefNameAliasAdapter", ',
'"location": {{ ',
'"uri": "{refname_aliases}" ',
"}} ",
"}} ",
"}} "
)
)
} else {
""
}
}