Skip to content

Commit

Permalink
Added new 'deduplicate_labels()' function.
Browse files Browse the repository at this point in the history
  • Loading branch information
melff committed Mar 14, 2020
1 parent 676bfb6 commit bfaad78
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 4 deletions.
4 changes: 2 additions & 2 deletions pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: memisc
Type: Package
Title: Management of Survey Data and Presentation of Analysis Results
Version: 0.99.22
Date: 2020-02-13
Version: 0.99.23
Date: 2020-03-14
Author: Martin Elff (with contributions from Christopher N. Lawrence, Dave Atkins, Jason W. Morgan, Achim Zeileis)
Maintainer: Martin Elff <memisc@elff.eu>
Description: An infrastructure for the management of survey data including
Expand Down
5 changes: 5 additions & 0 deletions pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -458,3 +458,8 @@ export(set_measurement)

export(Mean,Median,Min,Max,Weighted.Mean,
Var,StdDev,Cov,Cor,Range)

export(deduplicate_labels)
S3method(deduplicate_labels,default)
S3method(deduplicate_labels,item)
S3method(deduplicate_labels,item.list)
73 changes: 73 additions & 0 deletions pkg/R/dedup-labels.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
combine_duplicated_labels <- function(x){
dl <- duplicated_labels(x)
l <- labels(x)
for(i in seq_along(dl)){
old_val <- dl[[i]]
new_val <- old_val[1]
drop_val <- old_val[-1]
x[x %in% old_val] <- new_val
drop <- l@values %in% drop_val
l@.Data <- l@.Data[!drop]
l@values <- l@values[!drop]
labels(x) <- l
}
x
}


prefix_duplicated_labels <- function(x,
pattern="%d. %s",
...){
dl <- duplicated_labels(x)
if(length(dl)){
l <- labels(x)
dedup_lab <- sprintf(pattern,l@values,l@.Data)
l@.Data <- dedup_lab
labels(x) <- l
}
x
}


postfix_duplicated_labels <- function(x,
pattern="%s (%d)",
...){
dl <- duplicated_labels(x)
l <- labels(x)
for(i in seq_along(dl)){
dup_lab <- names(dl)[i]
dup_val <- dl[[i]]
ii <- match(dup_val,l@values)
dedup_lab <- sprintf(pattern,dup_lab,dup_val)
l@.Data[ii] <- dedup_lab
}
labels(x) <- l
x
}

deduplicate_labels <- function(x,...) UseMethod("deduplicate_labels")
deduplicate_labels.default <- function(x,...) return(x)

deduplicate_labels.item <- function(x,method=c("combine codes",
"prefix values",
"postfix values"),...){
method <- match.arg(method)
# browser()
switch(method,
"combine codes"=combine_duplicated_labels(x),
"prefix values"=prefix_duplicated_labels(x,...),
"postfix values"=postfix_duplicated_labels(x,...))
}

deduplicate_labels.item.list <- function(x,...){
n <- ncol(x)
for(i in 1:n){
x.i <- x[[i]]
if(length(labels(x.i)) &&
length(duplicated_labels(x.i))){
x.i <- deduplicate_labels(x.i,...)
x@.Data[[i]] <- x.i
}
}
x
}
5 changes: 4 additions & 1 deletion pkg/inst/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
2019-01-02:
2020-03-14:
- Added 'deduplicate_labels()' function to deal with duplicated value labels.

2020-01-02:
- Added convenience wrappers 'Mean()', 'Var()', etc. to basic stats
functions such as 'mean()', 'var()', etc.
- 'rename()', 'relabel()', 'dimrename()', etc. now also work without
Expand Down
3 changes: 2 additions & 1 deletion pkg/inst/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
allow to create codebooks for single items/variables in
imported data files.
\item A \code{duplicated_labels} function allows to show and
describe duplicated labels.
describe duplicated labels and a \code{deduplicate_labels}
function allows to get rid of such duplicates.
\item New operators \code{\%#\%}, \code{\%##\%}, and \code{\%@\%} to
manipulate annotations and other attributes.
\item A \code{List} function adds names to its elements by deparsing
Expand Down
76 changes: 76 additions & 0 deletions pkg/man/dedup-labels.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
\name{deduplicate_labels}
\alias{deduplicate_labels}
\alias{deduplicate_labels.item}
\alias{deduplicate_labels.item.list}
\title{Handle duplicated labels}
\description{
The function \code{deduplicate_labels} can be used with "item" objects,
"importer" objects or "data.set" objects to deal with
duplicate labels,
i.e. labels that are attached to more than
one code. There are several ways to de-duplicate labels: by combining
values that share their label or by making labels duplicate labels distinct.
}
\usage{
deduplicate_labels(x,\dots)
\S3method{deduplicate_labels}{item}(x,
method=c("combine codes",
"prefix values",
"postfix values"),\dots)
# Applicable to 'importer' objects and 'data.set' objects
\S3method{deduplicate_labels}{item.list}(x,\dots)
}
\arguments{
\item{x}{an item with value labels or that contains items with
value labels}
\item{method}{a character string that determines the method to
make value labels unique.}
\item{\dots}{other arguments, passed to specific methods of the
generic function.}
}
\value{The function \code{deduplicate_labels} a copy of \code{x}
that has unqiue value labels.
}
\examples{
x1 <- as.item(rep(1:5,4),
labels=c(
A = 1,
A = 2,
B = 3,
B = 4,
C = 5
),
annotation = c(
description="Yet another test"
))

x2 <- as.item(rep(1:4,5),
labels=c(
i = 1,
ii = 2,
iii = 3,
iii = 4
),
annotation = c(
description="Still another test"
))

x3 <- as.item(rep(1:2,10),
labels=c(
a = 1,
b = 2
),
annotation = c(
description="Still another test"
))

codebook(deduplicate_labels(x1))
codebook(deduplicate_labels(x1,method="prefix"))
codebook(deduplicate_labels(x1,method="postfix"))

ds <- data.set(x1,x2,x3)
codebook(deduplicate_labels(ds))
codebook(deduplicate_labels(ds,method="prefix"))
codebook(deduplicate_labels(ds,method="postfix"))

}

0 comments on commit bfaad78

Please sign in to comment.