From a52d1fbd66e67b19a42ce6c56e2b03afcbc4ada3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Bl=C3=A4tte?=
 <andreasblatte@MBP-von-Andreas.fritz.box>
Date: Mon, 26 Feb 2024 16:12:53 +0100
Subject: [PATCH] pipes dropped for oldrel compatibility #38

---
 NAMESPACE               |  1 +
 R/dbpedia.R             | 65 ++++++++++++++++++++---------------------
 R/utils.R               | 19 +++++-------
 R/wikidata.R            | 15 +++++++---
 R/xml.R                 | 17 ++++++-----
 man/get_dbpedia_uris.Rd |  5 ++--
 man/wikidata_uris.Rd    | 15 +++++++---
 7 files changed, 75 insertions(+), 62 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 619ae60..7719f46 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -57,6 +57,7 @@ importFrom(tibble,as_tibble)
 importFrom(utils,URLencode)
 importFrom(xml2,read_xml)
 importFrom(xml2,xml_attr)
+importFrom(xml2,xml_children)
 importFrom(xml2,xml_find_all)
 importFrom(xml2,xml_set_attrs)
 importFrom(xml2,xml_text)
diff --git a/R/dbpedia.R b/R/dbpedia.R
index 55116e9..8f7d89c 100644
--- a/R/dbpedia.R
+++ b/R/dbpedia.R
@@ -155,6 +155,7 @@ as_annotation <- function(x){
 #' @param feature_tag ...
 #' @importFrom stringi stri_c
 #' @importFrom NLP Annotation
+#' @importFrom xml2 xml_children
 to_annotation = function(nodes, xml, token_tags, feature_tag) {
   
   if (inherits(nodes, "xml_nodeset")) {
@@ -169,8 +170,10 @@ to_annotation = function(nodes, xml, token_tags, feature_tag) {
     
   } else {
     
-    token_elements <- nodes |>
-      xml2::xml_find_all(xpath = namespaced_xpath(xml = xml, tags = token_tags))
+    token_elements <- xml2::xml_find_all(
+        nodes,
+        xpath = namespaced_xpath(xml = xml, tags = token_tags)
+      )
     
     # make token annotation data annotation
     
@@ -210,9 +213,10 @@ to_annotation = function(nodes, xml, token_tags, feature_tag) {
     # data.frame split to rwos
     
     token_feat_dataframe <- data.frame(word = toks, id = tok_ids)
-    token_feat_list <- split(token_feat_dataframe, seq(nrow(token_feat_dataframe))) |>
-      unname()
-    
+    token_feat_list <- unname(
+      split(token_feat_dataframe, seq(nrow(token_feat_dataframe)))
+    )
+
     token_annotation <- NLP::Annotation(
       seq_along(tok_ids), # IDs must be integer, which is a bit unfortunate
       rep("word", length(tok_ids)),
@@ -224,49 +228,45 @@ to_annotation = function(nodes, xml, token_tags, feature_tag) {
     # and add feature elements if chosen
     
     if (!is.null(feature_tag)) {
-      feature_elements <- nodes |>
-        xml2::xml_find_all(xpath = namespaced_xpath(xml = xml, tags = feature_tag))
+      feature_elements <-  xml2::xml_find_all(
+        nodes,
+        xpath = namespaced_xpath(xml = xml, tags = feature_tag)
+      )
     } else {
       feature_elements <- NULL
     }
     
     if (length(feature_elements) > 0) {
       
-      
-      feature_ids <- sapply(feature_elements, function(element) {
-        xml2::xml_find_first(element,
-                             xpath = namespaced_xpath(xml = xml, tags = token_tags)) |>
-          xml2::xml_attr("id") 
-      }
-      )
+      feature_ids <- sapply(
+        feature_elements,
+        function(element) {
+          el <- xml2::xml_find_first(
+            element,
+            xpath = namespaced_xpath(xml = xml, tags = token_tags)
+          )
+          xml2::xml_attr(el, "id") 
+        })
       
       feature_ids <- sprintf("%s_%s", feature_ids, feature_tag)
       
       # get attributes of features
       feature_ids <- feature_ids # name has no ID. We use the first word ID (assuming that there are no overlaps?)
       feature_kinds <- xml2::xml_attr(feature_elements, "type")
-      feature_texts <- sapply(feature_elements, function(feat) {
-        xml2::xml_children(feat) |>
-          xml2::xml_text() |>
-          paste(collapse = " ")
-      }
+      feature_texts <- sapply(
+        feature_elements,
+        function(feat) paste(xml_text(xml_children(feat)), collapse = " ")
       )
       
       # get spans for features
       
-      entity_spans <- sapply(feature_elements, function(element) {
-        child_id <- element |>
-          xml2::xml_children() |>
-          xml2::xml_attr("id")
-        
+      entity_spans <- t(sapply(feature_elements, function(element) {
+        child_id <- xml_attr(xml_children(element), "id")
         child_idx <- which(tok_ids %in% child_id)
         child_start <- min(start_positions[child_idx])
         child_end <- max(end_positions[child_idx])
-        
-        matrix(c(child_start, child_end), nrow = 1, ncol = 2)
-        
-      }
-      ) |> t()
+        matrix(c(child_start, child_end), nrow = 1L, ncol = 2L)
+      }))
       
       
       feature_annotation <- NLP::Annotation(
@@ -300,7 +300,7 @@ to_annotation = function(nodes, xml, token_tags, feature_tag) {
     
     # make string
     word_with_ws <- paste(toks, ifelse(is.na(tok_joins), " ", ""), sep = "")
-    s <- stringi::stri_c(word_with_ws, collapse = "") |> trimws()
+    s <- trimws(stringi::stri_c(word_with_ws, collapse = ""))
     
     # add segment id as metadata (should work if segment is NULL as the TEI has
     # an ID as well).
@@ -705,13 +705,12 @@ setMethod("get_dbpedia_uris", "subcorpus_bundle", function(x, language = getOpti
 #' 
 #' # Process quanteda corpus 
 #' library(quanteda)
-#' uritab <- data_char_ukimmig2010 |>
-#'   corpus() |>
+#' uritab <- data_char_ukimmig2010 %>%
+#'   corpus() %>%
 #'   get_dbpedia_uris(
 #'     verbose = FALSE,
 #'     config = httr::config(http_version = 1.1)
 #'   )
-#'   
 #' @rdname get_dbpedia_uris
 setMethod(
   "get_dbpedia_uris",
diff --git a/R/utils.R b/R/utils.R
index 8e7a5cd..63513d8 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -222,23 +222,20 @@ map_types_to_class <- function(x, mapping_vector, other = "MISC", verbose = TRUE
     # types is a list of lists. Transform to single character vector.
     type_list <- unlist(types, recursive = FALSE)
 
-    types_with_class <- lapply(seq_along(type_list), function(i) {
+    types_with_class_raw <- lapply(seq_along(type_list), function(i) {
       list_name <- names(type_list)[[i]]
       list_elements <- type_list[[i]]
       paste0(list_name, ":", list_elements)
-    }) |>
-      unlist() |>
-      intersect(mapping_vector)
+    })
+    types_with_class <- intersect(unlist(types_with_class_raw), mapping_vector)
 
-    if (length(types_with_class) > 0) {
+    if (length(types_with_class) > 0L) {
       match_idx <- which(mapping_vector %in% types_with_class)
 
-      class_name <- mapping_vector |>
-        names() |>
-        _[match_idx] |>
-        unique() |>
-        sort() |>
-        paste(collapse = "|")
+      class_name <- paste(
+        sort(unique(names(mapping_vector)[match_idx])),
+        collapse = "|"
+      )
 
     } else {
       class_name <- other
diff --git a/R/wikidata.R b/R/wikidata.R
index 5bc9d09..5ca79c7 100644
--- a/R/wikidata.R
+++ b/R/wikidata.R
@@ -263,10 +263,17 @@ setGeneric(
 #' 
 #' httr::set_config(httr::config(ssl_verifypeer = 0L))
 #'
-#' uritab <- data_char_ukimmig2010 |>
-#'   corpus() |>
-#'   get_dbpedia_uris(progress = TRUE) %>% 
-#'   add_wikidata_uris(endpoint = "https://dbpedia.org/sparql/", progress = TRUE, chunksize = 100) %>% 
+#' uritab <- data_char_ukimmig2010 %>%
+#'   corpus() %>%
+#'   get_dbpedia_uris(
+#'     progress = TRUE,
+#'     config = httr::config(http_version = 1.1)
+#'   ) %>% 
+#'   add_wikidata_uris(
+#'     endpoint = "https://dbpedia.org/sparql/",
+#'     progress = TRUE,
+#'     chunksize = 100
+#'   ) %>% 
 #'   wikidata_query(id = "P31")
 #' }
 #'   
diff --git a/R/xml.R b/R/xml.R
index c53ea4b..0e423c3 100644
--- a/R/xml.R
+++ b/R/xml.R
@@ -26,11 +26,12 @@ xml_enrich <- function(xml,
 ) {
 
   # get all nodes which might contain entities
-  nodes <- xml |>
-    xml2::xml_find_all(xpath = namespaced_xpath(xml = xml, tags = token_tags))
+  nodes <- xml2::xml_find_all(
+    xml,
+    xpath = namespaced_xpath(xml = xml, tags = token_tags)
+  )
 
-  node_ids <- nodes |>
-    xml2::xml_attr("id")
+  node_ids <- xml2::xml_attr(nodes, "id")
 
   # for each annotation, extract identified words 
 
@@ -46,9 +47,11 @@ xml_enrich <- function(xml,
       # if there is no feature tag, pre-annotated named entities weren't
       # provided. Add identified named entities to tokens.
 
-      annotation_id <- annotation_dt[i, ][["original_id"]] |>
-        strsplit(split = "\\|") |>
-        unlist()
+      annotation_id <- unlist(strsplit(
+        annotation_dt[i, ][["original_id"]],
+        split = "\\|"
+      )
+      )
 
       # there could be additional values such as the type?
       nodes_idx <- which(node_ids %in% annotation_id)
diff --git a/man/get_dbpedia_uris.Rd b/man/get_dbpedia_uris.Rd
index 573f6ca..e4192ac 100644
--- a/man/get_dbpedia_uris.Rd
+++ b/man/get_dbpedia_uris.Rd
@@ -227,11 +227,10 @@ uritab <- corpus("REUTERS") \%>\%
 
 # Process quanteda corpus 
 library(quanteda)
-uritab <- data_char_ukimmig2010 |>
-  corpus() |>
+uritab <- data_char_ukimmig2010 \%>\%
+  corpus() \%>\%
   get_dbpedia_uris(
     verbose = FALSE,
     config = httr::config(http_version = 1.1)
   )
-  
 }
diff --git a/man/wikidata_uris.Rd b/man/wikidata_uris.Rd
index 991d1c5..1dfad0c 100644
--- a/man/wikidata_uris.Rd
+++ b/man/wikidata_uris.Rd
@@ -80,10 +80,17 @@ options(dbpedia.endpoint = "http://api.dbpedia-spotlight.org/en/annotate")
 
 httr::set_config(httr::config(ssl_verifypeer = 0L))
 
-uritab <- data_char_ukimmig2010 |>
-  corpus() |>
-  get_dbpedia_uris(progress = TRUE) \%>\% 
-  add_wikidata_uris(endpoint = "https://dbpedia.org/sparql/", progress = TRUE, chunksize = 100) \%>\% 
+uritab <- data_char_ukimmig2010 \%>\%
+  corpus() \%>\%
+  get_dbpedia_uris(
+    progress = TRUE,
+    config = httr::config(http_version = 1.1)
+  ) \%>\% 
+  add_wikidata_uris(
+    endpoint = "https://dbpedia.org/sparql/",
+    progress = TRUE,
+    chunksize = 100
+  ) \%>\% 
   wikidata_query(id = "P31")
 }