-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.R
56 lines (55 loc) · 1.87 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#' Find unique (taxon) occurrence records
#'
#' Subset a dataset to unique spatial localities or locality-taxon combinations.
#'
#' The `na.rm` argument applies to coordinate values and, if `taxVar`
#' is supplied, to taxon values. If `na.rm = FALSE`, any `NA` values will be
#' retained and treated as their own value. Note that `divvy` ignores any rows
#' with missing coordinates for the subsampling functions [cookies()],
#' [clustr()], and [bandit()].
#'
#' @inheritParams sdSumry
#' @param na.rm Should records missing information be removed?
#' Default is yes.
#'
#' @return An object with the same class and columns as `dat`, containing the
#' subset of rows representing unique coordinates (if only `xy` supplied)
#' or unique taxon-site combinations (if `taxVar` is also supplied).
#' The first record at each spatial locality is retained,
#' or if `taxVar` is specified, the first record of each taxon at a locality.
#'
#' @examples
#' # generate occurrence data
#' x <- rep(1, 10)
#' y <- c(rep(1, 5), 2:6)
#' sp <- c(rep(letters[1:3], 2),
#' rep(letters[4:5], 2))
#' obs <- data.frame(x, y, sp)
#'
#' # compare original and unique datasets:
#' # rows 4 and 5 removed as duplicates of rows 1 and 2, respectively
#' obs
#' uniqify(obs, taxVar = 3, xy = 1:2)
#'
#' # using taxon identifications or other third variable is optional
#' uniqify(obs, xy = c('x', 'y'))
#'
#' # caution - data outside the taxon and occurrence variables
#' # will be lost where associated with duplicate occurrences
#' obs$notes <- letters[11:20]
#' uniqify(obs, 1:2, 3)
#' # the notes 'n' and 'o' are absent in the output data
#'
#' @export
uniqify <- function(dat, xy, taxVar = NULL, na.rm = TRUE){
args <- xy
if (! is.null(taxVar)){
args <- c(taxVar, args)
}
if (na.rm){
compl <- stats::complete.cases( dat[,args] )
dat <- dat[compl, ]
}
dupes <- duplicated( dat[,args] )
dat[ !dupes, ]
}