Skip to content

Commit

Permalink
fn dbpedia_get_wikidata_uris()
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Blätte authored and Andreas Blätte committed Sep 21, 2023
1 parent 4c172c5 commit 885c7b4
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 3 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: dbpedia
Type: Package
Title: R wrapper for DBpedia Spotlight
Version: 0.0.1.9001
Version: 0.0.1.9002
Date: 2023-09-21
Authors@R: c(
person("Andreas", "Blaette", role = c("aut", "cre"), email = "andreas.blaette@uni-due.de", comment = c(ORCID = "0000-0001-8970-8010")),
Expand All @@ -21,7 +21,8 @@ Imports:
Suggests:
markdown (>= 1.5),
rmarkdown,
knitr
knitr,
SPARQL
VignetteBuilder: knitr
SystemRequirements: docker
LazyData: true
Expand All @@ -34,6 +35,7 @@ Encoding: UTF-8
Collate:
'dbpedia.R'
'utils.R'
'wikidata.R'
'zzz.R'
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ import(methods)
importFrom(RcppCWB,cl_struc2str)
importFrom(cli,cli_alert_danger)
importFrom(cli,cli_alert_warning)
importFrom(cli,cli_progress_bar)
importFrom(cli,cli_progress_done)
importFrom(cli,cli_progress_step)
importFrom(cli,cli_progress_update)
importFrom(data.table,`:=`)
importFrom(data.table,as.data.table)
importFrom(data.table,setDT)
Expand Down
87 changes: 87 additions & 0 deletions R/wikidata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#' Get Wikipedia IDs for DBpedia IDS.
#'
#' @param x A character vector with DBpedia URIs.
#' @param optional Optional information to retrieve (passed as length-one
#' character vector, e.g. 'municipalityCode').
#' @param endpoint Endpoint to query (a `character` vector).
#' @param limit Single numeric value with maximum size of chunks to process at
#' a time.
#' @param wait A numeric value passed into `Sys.sleep()` to slow down sequence
#' of requests (and avoid denial of service). Defaults to 100.
#' @param progress Whether to show progress bar (`logical` value).
#' @examples
#' \donttest{
#' dbpedia_ids <- c(
#' "http://de.dbpedia.org/resource/Killesberg",
#' "http://de.dbpedia.org/resource/Ljubljana",
#' "http://de.dbpedia.org/resource/Velbert"
#' )
#' dbpedia_get_wikidata_uris(
#' dbpedia_ids,
#' optional = "municipalityCode",
#' endpoint = "http://de.dbpedia.org/sparql",
#' wait = 0,
#' limit = 2,
#' progress = TRUE
#' )
#' }
#' @importFrom cli cli_progress_bar cli_progress_done cli_progress_update
dbpedia_get_wikidata_uris <- function(x, optional, endpoint, limit = 100, wait = 1, progress = FALSE){

if (!requireNamespace("SPARQL", quietly = TRUE)){
stop(
"R package SPARQL required but not available. ",
"SPARQL is currently not at CRAN, but can be installed from the archive"
)
}

stopifnot(
is.character(x),
is.numeric(limit), length(limit) == 1L,
is.logical(progress), length(progress) == 1L,
is.numeric(wait), length(wait) == 1L, wait >= 0
)

if (!missing(optional)){
stopifnot(is.character(optional), length(optional) == 1L)
optional <- sprintf('OPTIONAL { ?item dbo:%s ?key . }', optional)
} else {
optional <- ""
}

template <- 'SELECT distinct ?item ?wikidata_uri ?key
WHERE {
VALUES ?item {%s}
?item owl:sameAs ?wikidata_uri
%s
FILTER(regex(str(?wikidata_uri), "www.wikidata.org" ) )}
LIMIT %d'


chunks <- as_chunks(x = x, size = limit)
retval_li <- list()

if (progress) cli_progress_bar("Tasks", total = length(chunks), type = "tasks")
for (i in 1L:length(chunks)){
cli_progress_update()
query <- sprintf(
template,
paste(sprintf("<%s>", chunks[[i]]), collapse = " "),
optional,
limit
)

Sys.sleep(wait)

retval_li[[i]] <- SPARQL::SPARQL(url = endpoint, query = query)[["results"]]
retval_li[[i]][["wikidata_id"]] <- gsub(
"^.*\\/(Q\\d+)>$", "\\1",
retval_li[[i]][["wikidata_uri"]]
)
colnames(retval_li[[i]])[1] <- "dbpedia_uri"
}

if (progress) cli_progress_done()

do.call(rbind, retval_li)
}
2 changes: 1 addition & 1 deletion man/as_chunks.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 51 additions & 0 deletions man/dbpedia_get_wikidata_uris.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 885c7b4

Please sign in to comment.