Skip to content

Commit 08dd242

Browse files
committed
- Use RMariaDB instead of RMySQL in makeTxDbFromUCSC() and
makeTxDbFromEnsembl() to fetch data from UCSC or Ensembl. - Various tweaks and a small speed-up to makeTxDbFromUCSC().
1 parent edb6576 commit 08dd242

File tree

8 files changed

+114
-131
lines changed

8 files changed

+114
-131
lines changed

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: GenomicFeatures
22
Title: Tools for making and manipulating transcript centric annotations
3-
Version: 1.31.6
3+
Version: 1.31.7
44
Encoding: UTF-8
55
Author: M. Carlson, H. Pagès, P. Aboyoun, S. Falcon, M. Morgan,
66
D. Sarkar, M. Lawrence
@@ -16,13 +16,13 @@ Description: A set of tools and methods for making and manipulating
1616
provided for extracting the desired features in a convenient
1717
format.
1818
Maintainer: Bioconductor Package Maintainer <maintainer@bioconductor.org>
19-
Depends: BiocGenerics (>= 0.1.0), S4Vectors (>= 0.17.28), IRanges (>= 2.13.15),
19+
Depends: BiocGenerics (>= 0.1.0), S4Vectors (>= 0.17.29), IRanges (>= 2.13.15),
2020
GenomeInfoDb (>= 1.15.4), GenomicRanges (>= 1.31.10),
2121
AnnotationDbi (>= 1.41.4)
2222
Imports: methods, utils, stats, tools, DBI, RSQLite (>= 2.0), RCurl,
2323
XVector (>= 0.19.7), Biostrings (>= 2.47.6), rtracklayer (>= 1.39.7),
2424
biomaRt (>= 2.17.1), Biobase (>= 2.15.1)
25-
Suggests: RMySQL, org.Mm.eg.db, org.Hs.eg.db,
25+
Suggests: RMariaDB, org.Mm.eg.db, org.Hs.eg.db,
2626
BSgenome, BSgenome.Hsapiens.UCSC.hg19 (>= 1.3.17),
2727
BSgenome.Celegans.UCSC.ce2,
2828
BSgenome.Dmelanogaster.UCSC.dm3 (>= 1.3.17),

R/Ensembl-utils.R

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,8 @@
88
### sequences in the reference genome associated with a particular dataset
99
### and Ensembl release (e.g. for dataset "hsapiens_gene_ensembl" and Ensembl
1010
### release "64").
11-
### Note that querying the Ensembl MySQL server (via RMySQL) would probably
12-
### be a better way to access this stuff but that would mean one more
13-
### dependency for the GenomicFeatures package. With some potential
14-
### complications like: (a) no RMySQL Windows binary on CRAN, and (b) depending
15-
### on RMySQL *and* RSQLite has its own pitfalls.
11+
### Note that querying the Ensembl MySQL server (via RMariaDB) would probably
12+
### be a better way to access this stuff.
1613
###
1714
### Ensembl Core Schema Documentation:
1815
### http://www.ensembl.org/info/docs/api/core/core_schema.html

R/UCSC-utils.R

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ lookup_organism_by_UCSC_genome <- function(genome)
99
genome <- gsub("\\d+$", "", genome)
1010

1111
## Fetch all UCSC genomes with:
12-
## library(RMySQL)
13-
## dbconn <- dbConnect(MySQL(), username="genome",
12+
## library(RMariaDB)
13+
## dbconn <- dbConnect(MariaDB(), username="genome",
1414
## host="genome-mysql.soe.ucsc.edu", port=3306)
1515
## genomes <- sort(dbGetQuery(dbconn, "SHOW DATABASES")[[1L]])
1616
## unique(gsub("\\d+$", "", genomes))
@@ -153,10 +153,11 @@ UCSC_dbselect <- function(dbname, from, columns=NULL, where=NULL,
153153
stopifnot(isSingleString(where))
154154
SQL <- paste(SQL, "WHERE", where)
155155
}
156-
dbconn <- dbConnect(RMySQL::MySQL(), dbname=dbname,
157-
username="genome",
158-
host=server,
159-
port=3306)
160-
suppressWarnings(dbGetQuery(dbconn, SQL))
156+
dbconn <- dbConnect(RMariaDB::MariaDB(), dbname=dbname,
157+
username="genome",
158+
host=server,
159+
port=3306)
160+
on.exit(dbDisconnect(dbconn))
161+
dbGetQuery(dbconn, SQL)
161162
}
162163

R/makeTxDbFromEnsembl.R

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,10 @@
3535
dbname
3636
}
3737

38-
.fix_numeric_cols <- function(df)
39-
{
40-
col_idx <- which(sapply(df, is.numeric))
41-
df[col_idx] <- lapply(df[col_idx], as.integer)
42-
df
43-
}
44-
45-
.RMySQL_select <- function(dbconn, columns, from)
38+
.dbselect <- function(dbconn, columns, from)
4639
{
4740
SQL <- sprintf("SELECT %s FROM %s", paste0(columns, collapse=","), from)
48-
## Not sure systematic conversion of numeric to int is actually a
49-
## good idea (risk of overflow?)
50-
.fix_numeric_cols(suppressWarnings(dbGetQuery(dbconn, SQL)))
41+
dbGetQuery(dbconn, SQL)
5142
}
5243

5344
.seq_region_columns <- c(
@@ -69,7 +60,7 @@
6960
)
7061
columns <- c(paste0("transcript.", transcript_columns), "gene.stable_id")
7162
from <- "transcript LEFT JOIN gene USING(gene_id)"
72-
transcripts <- .RMySQL_select(dbconn, columns, from)
63+
transcripts <- .dbselect(dbconn, columns, from)
7364
colnames(transcripts) <- c("tx_id",
7465
"tx_name",
7566
"seq_region_id",
@@ -93,7 +84,7 @@
9384
"seq_end", # relative to last exon
9485
"transcript_id"
9586
)
96-
.RMySQL_select(dbconn, columns, "translation")
87+
.dbselect(dbconn, columns, "translation")
9788
}
9889

9990
### 'has_cds' must be a logical vector.
@@ -161,7 +152,7 @@
161152
)
162153
columns <- c("transcript_id", "rank", exon_columns)
163154
from <- "exon_transcript INNER JOIN exon USING(exon_id)"
164-
splicings <- .RMySQL_select(dbconn, columns, from)
155+
splicings <- .dbselect(dbconn, columns, from)
165156
colnames(splicings) <- c("tx_id",
166157
"exon_rank",
167158
"exon_id",
@@ -185,7 +176,7 @@
185176
## extract this value from Ensembl
186177
id0 <- 6L
187178
columns <- c("seq_region_id", "attrib_type_id", "value")
188-
seq_region_attrib <- .RMySQL_select(dbconn, columns, "seq_region_attrib")
179+
seq_region_attrib <- .dbselect(dbconn, columns, "seq_region_attrib")
189180
seq_region_attrib$seq_region_id[seq_region_attrib$attrib_type_id == id0]
190181
}
191182

@@ -213,7 +204,7 @@
213204
setdiff(seq_region_columns, using_column),
214205
setdiff(coord_system_columns, using_column))
215206
from <- "seq_region INNER JOIN coord_system USING(coord_system_id)"
216-
seq_region <- .RMySQL_select(dbconn, "*", from)
207+
seq_region <- .dbselect(dbconn, "*", from)
217208
stopifnot(identical(colnames(seq_region), joined_columns))
218209
colnames(seq_region)[6:9] <- paste0("coord_system_",
219210
colnames(seq_region)[6:9])
@@ -259,14 +250,15 @@ makeTxDbFromEnsembl <- function(organism="Homo sapiens",
259250
circ_seqs=DEFAULT_CIRC_SEQS,
260251
server="ensembldb.ensembl.org")
261252
{
262-
if (!requireNamespace("RMySQL", quietly=TRUE))
263-
stop(wmsg("Couldn't load the RMySQL package. You need to install ",
264-
"the RMySQL package in order to use makeTxDbFromEnsembl()."))
253+
if (!requireNamespace("RMariaDB", quietly=TRUE))
254+
stop(wmsg("Couldn't load the RMariaDB package. ",
255+
"You need to install the RMariaDB package ",
256+
"in order to use makeTxDbFromEnsembl()."))
265257

266258
dbname <- .lookup_dbname(organism, release=release)
267-
dbconn <- dbConnect(RMySQL::MySQL(), dbname=dbname,
268-
username="anonymous",
269-
host=server)
259+
dbconn <- dbConnect(RMariaDB::MariaDB(), dbname=dbname,
260+
username="anonymous",
261+
host=server)
270262
on.exit(dbDisconnect(dbconn))
271263

272264
transcripts <- .fetch_Ensembl_transcripts(dbconn)

0 commit comments

Comments
 (0)