In [2]:
library(glstudy)
library(grpreg)
library(grplasso)
library(sparsegl)
library(gglasso)
library(microbenchmark)
library(progress)


Attaching package: ‘glstudy’


The following object is masked _by_ ‘.GlobalEnv’:

    objective




In [3]:
objective <- function(X, y, groups, group_sizes, beta, lmda, penalty)
{
    reg = sum(sapply(1:length(groups), function(i) penalty[i] * sqrt(sum(beta[(groups[i]+1) : (groups[i]+group_sizes[i])] ** 2))))
    0.5 * sum((y - X %*% beta) ** 2) + lmda * reg
}

In [4]:
generate.data <- function(
    seed,
    n, 
    p,
    sparsity,
    group_size
)
{
    set.seed(seed)
    X <- matrix(rnorm(n * p), n, p)
    X <- apply(X, 2, scale)
    beta <- runif(p, -1, 1) * rbinom(p, 1, 1-sparsity)
    y <- as.numeric(X %*% beta + rnorm(n))
    y <- y - mean(y)
    n_full_groups <- as.integer(p / group_size)
    groups <- c((0:(n_full_groups-1)) * group_size, p)
    group_sizes <- groups[2:length(groups)] - groups[1:length(groups)-1]
    groups <- groups[1:length(groups)-1]
    list(
        X=X,
        y=y,
        groups=groups,
        group_sizes=group_sizes
    )
}

bench.packages <- function(
    seed=0,
    n=100,
    p=100,
    sparsity=0.95,
    group_size=10,
    subset=c("grpreg", "grplasso", "sparsegl", "gglasso")
)
{
    ps <- p
    times <- matrix(NA, length(ps), 5)
    objs <- matrix(NA, length(ps), 5)
    n_lmdas <- rep(NA, length(ps))
    colnames(times) <- c("grpglmnet", "grpreg", "grplasso", "sparsegl", "gglasso")
    colnames(objs) <- colnames(times)
    
    other_fs <- list(
        "grpreg"=function(X,y,group,lmdas,penalty) grpreg(X, y, group, penalty='grLasso', family='gaussian', lambda=lmdas / length(y), eps=1e-7),
        "grplasso"=function(X,y,group,lmdas,penalty) grplasso(X, y, index=as.integer(group), lambda=2*lmdas, center=FALSE, standardize=FALSE, model=LinReg(), control=grpl.control(trace=0)),
        "sparsegl"=function(X,y,group,lmdas,penalty) sparsegl(X, y, group=as.integer(group), family='gaussian', lambda=lmdas / length(y), intercept=FALSE, standardize=FALSE, pf_group=penalty, asparse=0),
        "gglasso"=function(X,y,group,lmdas,penalty) gglasso(X, y, group=as.integer(group), loss='ls', lambda=lmdas / length(y), pf=penalty)
    )

    pb <- txtProgressBar(min = 0,      # Minimum value of the progress bar
                         max = length(ps), # Maximum value of the progress bar
                         style = 3,    # Progress bar style (also available style = 1 and style = 2)
                         width = 50,   # Progress bar width. Defaults to getOption("width")
                         char = "=")   # Character used to create the bar
    for (i in 1:length(ps)) {
        p <- ps[i]

        # generate data
        data <- generate.data(seed, n, p, sparsity, group_size)
        X <- data$X
        y <- data$y
        groups <- data$groups
        group_sizes <- data$group_sizes
        penalty <- sqrt(group_sizes)
        
        # run mine
        times[i, 1] <- summary(microbenchmark({grpglmnet.out <- group_basil(X, y, groups, group_sizes, penalty=penalty)}, times=1L, unit='ns'))$mean
        lmdas <- grpglmnet.out$lmdas
        n_lmdas[i] <- length(lmdas)

        # prepare input for other packages
        group <- as.factor(rep(1:length(groups), times=group_sizes))

        # run other packages
        other_outs <- list()
        for (ss in subset) {
            times[i, ss] <- summary(microbenchmark({other_outs[[ss]] <- other_fs[[ss]](X, y, group, lmdas, penalty)}, times=1L, unit='ns'))$mean
        }

        # save last objectives
        last_idx <- length(lmdas)
        objs[i, 1] <- objective(X, y, groups, group_sizes, grpglmnet.out$beta[,last_idx], lmdas[last_idx], penalty)
        for (ss in subset) {
            if (ss == "grpreg") {
                objs[i, ss] <- objective(X, y, groups, group_sizes, other_outs[[ss]]$beta[2:nrow(other_outs[[ss]]$beta), last_idx], lmdas[last_idx], penalty)
            } else if (ss == "grplasso") {
                objs[i, ss] <- objective(X, y, groups, group_sizes, other_outs[[ss]]$coefficients[, last_idx], lmdas[last_idx], penalty)
            } else {
                objs[i, ss] <- objective(X, y, groups, group_sizes, other_outs[[ss]]$beta[, last_idx], lmdas[last_idx], penalty)
            }
        }
        
        setTxtProgressBar(pb, i)
    }
    close(pb)
    
    times <- times * 1e-9
    
    list(
        times=times,
        objs=objs,
        n_lmdas=n_lmdas
    )
}

In [17]:
p <- 2 ** (4 : 16)
out <- bench.packages(p=p)



In [36]:
p.large <- 2 ** (17 : 18)
out.large <- bench.packages(p=p.large, subset=c("sparsegl", "gglasso", "grpreg"))



In [37]:
p.large2 <- 2 ** (19 : 20)
out.large2 <- bench.packages(p=p.large2, subset=c("sparsegl", "gglasso"))



In [38]:
times <- rbind(out$times, out.large$times, out.large2$times)
objs <- rbind(out$objs, out.large$objs, out.large2$objs)
n_lmdas <- c(out$n_lmdas, out.large$n_lmdas, out.large2$n_lmdas)

In [39]:
write.table(times, "data/gl_packages_time.csv", sep=',', row.names=FALSE, col.names=FALSE)
write.table(objs, "data/gl_packages_obj.csv", sep=',', row.names=FALSE, col.names=FALSE)
write.table(n_lmdas, "data/gl_packages_n_lmda.csv", sep=',', row.names=FALSE, col.names=FALSE)

Now let's try non-full rank group matrices.

In [7]:
p <- 2 ** (10 : 12)
out.nfr <- bench.packages(p=p, group_size=300)



In [8]:
out.nfr

grpglmnet,grpreg,grplasso,sparsegl,gglasso
0.07253239,0.03545208,0.2592905,0.2959534,0.4476103
0.1363746,0.06978451,0.5932974,0.7928653,1.0632244
0.1651402,0.15784388,0.8101893,0.8235419,1.2208115

grpglmnet,grpreg,grplasso,sparsegl,gglasso
106.496,121.0182,106.4958,106.4958,106.4958
290.725,340.2109,290.7251,290.7245,290.7245
717.3426,829.9833,717.3431,717.3419,717.3419
