diff --git a/DESCRIPTION b/DESCRIPTION index 50bf679..4240b57 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,8 @@ Suggests: markdown (>= 1.5), rmarkdown, knitr, - SPARQL + SPARQL, + WikidataQueryServiceR VignetteBuilder: knitr SystemRequirements: docker LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 87d6377..22d52d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand export(as_annotation) +export(dbpedia_get_wikidata_uris) +export(wikidata_query) exportMethods(get_dbpedia_links) import(methods) importFrom(RcppCWB,cl_struc2str) diff --git a/NEWS.md b/NEWS.md index 39524e1..a698723 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,9 @@ -## dbpedia v0.0.1.9001 +## dbpedia v0.0.1.9001-v0.0.1.9002 * New auxiliary function `as_chunks()`. +* New function `dbpedia_get_wikidata_uris()`. +* New function `wikidata_query()` as high-level wrapper for +`WikidataQueryServiceR::query_wikidata()`. ## dbpedia v0.0.1 diff --git a/R/wikidata.R b/R/wikidata.R index 3d78543..c9b7fa1 100644 --- a/R/wikidata.R +++ b/R/wikidata.R @@ -9,6 +9,7 @@ #' @param wait A numeric value passed into `Sys.sleep()` to slow down sequence #' of requests (and avoid denial of service). Defaults to 100. #' @param progress Whether to show progress bar (`logical` value). +#' @export #' @examples #' \donttest{ #' dbpedia_ids <- c( @@ -83,5 +84,78 @@ dbpedia_get_wikidata_uris <- function(x, optional, endpoint, limit = 100, wait = if (progress) cli_progress_done() + do.call(rbind, retval_li) +} + + +#' Query Wikidata endpoint for additional information. +#' +#' This is a wrapper for `WikidataQueryServiceR::query_wikidata()` to get +#' additional information for known wikidata IDs. +#' +#' @return A `tibble`. +#' @param x A vector of wikidata ids. +#' @param id Wikidata ID for information to retrieve (`character` vector). +#' @param limit Maximum number of wikidata IDs to be sent to endpoint at a time. +#' @param progress Whether to show progress information (`logical` value). +#' @param wait A numeric value - slow down requests to avoid denial of service. +#' @export +#' @examples +#' \donttest{ +#' wikidata_ids <- c("Q1741365", "Q3840", "Q437") +#' wikidata_resolve_dbpedia_uri( +#' wikidata_ids, +#' id = "P439", # German municipality key +#' wait = 0, +#' limit = 2, +#' progress = TRUE +#' ) +#' } +wikidata_query <- function(x, id, limit = 100L, wait = 1, progress = FALSE){ + + if (!requireNamespace("WikidataQueryServiceR", quietly = TRUE)){ + stop("R package WikidataQueryServiceR required but not available. ") + } + + stopifnot( + is.vector(x), is.character(x), + is.character(id), length(id) == 1L, + is.numeric(limit), limit > 0, + is.numeric(wait), wait > 0, length(wait) == 1L, + is.logical(progress), length(progress) == 1L + ) + + template <- 'SELECT ?item ?label ?key ?keyLabel + WHERE { + VALUES ?item { %s } + OPTIONAL { ?item wdt:%s ?key . } + ?item rdfs:label ?label + filter(lang(?label) = "de") + SERVICE wikibase:label { bd:serviceParam wikibase:language "de". } + }' + + chunks <- as_chunks(x = x, size = limit) + retval_li <- list() + + if (progress) cli_progress_bar("Tasks", total = length(chunks), type = "tasks") + for (i in 1L:length(chunks)){ + cli_progress_update() + query <- sprintf( + template, + paste0("wd:", chunks[[i]], collapse = " "), + id + ) + + Sys.sleep(wait) + + retval_li[[i]] <- WikidataQueryServiceR::query_wikidata( + sparql_query = query, + format = "simple" + ) + colnames(retval_li[[i]])[1] <- "wikidata_id" + } + + if (progress) cli_progress_done() + do.call(rbind, retval_li) } \ No newline at end of file diff --git a/man/wikidata_query.Rd b/man/wikidata_query.Rd new file mode 100644 index 0000000..b892155 --- /dev/null +++ b/man/wikidata_query.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wikidata.R +\name{wikidata_query} +\alias{wikidata_query} +\title{Query Wikidata endpoint for additional information.} +\usage{ +wikidata_query(x, id, limit = 100L, wait = 1, progress = FALSE) +} +\arguments{ +\item{x}{A vector of wikidata ids.} + +\item{id}{Wikidata ID for information to retrieve (\code{character} vector).} + +\item{limit}{Maximum number of wikidata IDs to be sent to endpoint at a time.} + +\item{wait}{A numeric value - slow down requests to avoid denial of service.} + +\item{progress}{Whether to show progress information (\code{logical} value).} +} +\value{ +A \code{tibble}. +} +\description{ +This is a wrapper for \code{WikidataQueryServiceR::query_wikidata()} to get +additional information for known wikidata IDs. +} +\examples{ +\donttest{ +wikidata_ids <- c("Q1741365", "Q3840", "Q437") +wikidata_resolve_dbpedia_uri( + wikidata_ids, + id = "P439", # German municipality key + wait = 0, + limit = 2, + progress = TRUE +) +} +}