add c.DataSetList(); check_format()

IOHprofiler · Oct 1, 2018 · 2f3bd3f · 2f3bd3f
1 parent cf3dea8
commit 2f3bd3f
Show file tree

Hide file tree

Showing 7 changed files with 698 additions and 159 deletions.
diff --git a/C/align.cc b/C/align.cc
@@ -0,0 +1,68 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+// [[Rcpp::export]]
+NumericVector align_by_target_inner_loop(double t, int idxEvals, int idxTarget, 
+  List data, NumericVector index, NumericMatrix next_lines, NumericVector curr_eval) {
+
+  int N = data.size();
+  NumericVector out = clone(curr_eval);
+
+  for (int k = 0; k < N; k++) {
+    NumericMatrix d = as<NumericMatrix>(data[k]);
+    int n_row = d.nrow();
+    int n_col = d.ncol();
+    int iter = index[k];
+
+    while (true) {
+      if (next_lines(k, idxTarget) >= t) {
+        out[k] = next_lines(k, idxEvals);
+        break;
+      }
+
+      if (iter < (n_row - 1)) {
+        iter++;
+        for (int j = 0; j < n_col; j++) {
+          next_lines(k, j) = d(iter, j);
+        }
+      } else {
+        break;
+      }
+    }
+    index[k] = iter;
+  }
+  return out;
+}
+
+// TODO: verify this function
+// [[Rcpp::export]]
+void align_by_runtime_inner_loop(int r, int idxEvals, int idxTarget, 
+  List data, NumericVector n_rows, NumericVector index, NumericMatrix next_lines,
+  NumericVector curr_fvalues) {
+
+  int N = data.size();
+  for (int k = 0; k < N; k++) {
+    NumericMatrix d = as<NumericMatrix>(data[k]);
+    int n_row = n_rows[k];
+    int iter = index[k];
+    int n_col = d.ncol();
+
+    while (!NumericVector::is_na(next_lines(k, idxEvals))) {
+      if (next_lines(k, idxEvals) > r) {
+        curr_fvalues[k] = next_lines(k, idxTarget);
+        break;
+      }
+
+      if (iter < (n_row - 1)) {
+        iter++;
+        for (int j = 0; j < n_col; j++) {
+          next_lines(k, j) = d(iter, j);
+        }
+      } else {
+        curr_fvalues[k] = next_lines(k, idxTarget);
+        next_lines(k, idxEvals) = NA_REAL;
+      }
+    }
+    index[k] = iter;
+  }
+}
diff --git a/README.html b/README.html
diff --git a/README.md b/README.md
@@ -1,14 +1,79 @@
-# Iterative Optimization Heuristics Profiler
+# Post-procesing Tool
 
-This is the post-processing module of the project __Iterative Optimization Heuristics Profiler__ (IOHProfiler)
+This is the post-processing tool of the project __Iterative Optimization Heuristics Profiler__ (IOHProfiler). This tool provides a web-based interface to analyze and visualization the benchmark data, collected from previous experiments. Importantly, we __do support__ the widely used [COCO](https://github.com/numbbo/coco) data format (aka Black-Box Optimization Benchmarking).
 
-# INSTALL
+# Installation
+This software is mainly written in __R__ To run it directly from the source code, please install R environment first. The binary file and installation manual for R can be found here [https://cran.r-project.org/](https://cran.r-project.org/).
 
-## Requirements
+After R environment is correctly installed on you machine, several R packages are needed to execute the sorftware. Please start up the __R console__, which can be done (in case you're not familiar with R) by either executing command `R` in your system terminal or open the R application. Once it is done, please copy-paste and execute the following commands into the R console to install all depedencies.
+
+```r
+install.packages('shiny')
+install.packages('shinyjs')
+install.packages('shinydashboard')
+install.packages('magrittr')
+install.packages('dplyr')
+install.packages('reshape2')
+install.packages('data.table')
+install.packages('markdown')
+install.packages('devtools')
+install.packages('Rcpp')
+devtools::install_github("ropensci/plotly")
+```
+Please make sure those packages are correctly installed by monitoring the (verbose) prompting messages on the console.
+
+Then, please clone this repository into your own system. To start the post-processing module, please execute the following commands in the R console:
+```r
+shiny::runApp('/path/to/the/clone/folder')
+```
+
+# Online Service
+Alternatively, we have built a server to put this tool online, which is currently hosted in __Leiden University__. The server can be accessed via
+
+
+# Data Preparation
+Data preparation is fairly easy for this tool. Just compress the data folder obtained from the experiment and uploaded it. Currently, we support two data format:
+* IOHProfiler: our own csv-based format 
+* COCO: data format of the widely used COCO framework
 
-# Example
+# Programing Interface
+In addition to the graphical user interface, it is possible to directly call several procedures to analyze the data.
+
+* To read and align all the data set in a folder
+```console
+> ds <- read_dir('/path/to/data/folder')
+> ds
+DataSetList:
+1: DataSet((1+1)-Cholesky-CMA on f1 2D)
+2: DataSet((1+1)-Cholesky-CMA on f1 5D)
+3: DataSet((1+1)-Cholesky-CMA on f1 10D)
+4: DataSet((1+1)-Cholesky-CMA on f1 20D)
+5: DataSet((1+1)-Cholesky-CMA on f10 2D)
+6: DataSet((1+1)-Cholesky-CMA on f10 5D)
+7: DataSet((1+1)-Cholesky-CMA on f10 10D)
+8: DataSet((1+1)-Cholesky-CMA on f10 20D)
+9: DataSet((1+1)-Cholesky-CMA on f11 2D)
+10: DataSet((1+1)-Cholesky-CMA on f11 5D)
+```
+The return value is a list of __DataSets__. Each data set consists of:
+
+  1. runtime samples (aligned by target values), 
+  2. target values (aligned by runtime) and 
+  3. aligned endogenous parameter values of your optimization algorithm (aligned by target values).
+
+* To get a summary of one data set (e.g., the runtime distribution):
+```console
+> summarise_runtime(ds[[1]], ftarget = 1e-1, maximization = FALSE)
+               algId       f(x) runs  mean median       sd 2% 5% 10% 25% 50% 75% 90% 95% 98%
+1 (1+1)-Cholesky-CMA 0.09986529   80 36.55   37.5 17.11236  4  5  14  22  37  49  57  67  68
+```
 
 # TODO
+* [ ] convert data processing code into a package
+* [ ] add more stastistical tests
+
+# Contact
+If you have any questions, comments or suggestions, please don't hesitate contacting us via <wangronin@gmail.com> or <h.wang@liacs.leidenuniv.nl>.  
 
 # Cite us
 

diff --git a/plot.R b/plot.R
@@ -2,12 +2,11 @@
 # 1. plotly 2. ggplot
 # Author: Hao Wang
 # Email: wangronin@gmail.com
-# 
+
 
 library(plotly)
 library(ggplot2)
 
-
 plot_ly_default <- function(title = NULL,
                             x.title = NULL,
                             y.title = NULL) {

diff --git a/pproc.R b/pproc.R
@@ -5,7 +5,6 @@
 # Email: wangronin@gmail.com
 # 
 # Remark:
-#   1. library 'itertool' is way two slow and thus is not used here
 #   2. Rcpp is used for the data alignment function
 
 library(magrittr)
@@ -16,6 +15,7 @@ source('readFiles.R')
 
 # TODO: perhaps migrate to data.table for speed concern and simplicity
 # TODO: find better name to replace FCE
+# TODO: general issue: maybe separate DataSetList class from DataSet class
 
 # constructor of S3 class 'DataSet' ---------------------------
 # Attributes
@@ -403,16 +403,17 @@ read_dir <- function(path, verbose = T, print_fun = NULL, maximization = TRUE,
               format = format, subsampling = subsampling)
 }
 
-# S3 constructoer of the 'DataSetList' 
+# TODO: find a better name for this function
+# TODO: implement this
+load_index <- function(file) {
+
+}
+
+# S3 constructor of the 'DataSetList' 
 # Attributes
 #   funId
 #   DIM
 #   algId
-#   Precision
-#   datafile
-#   instance
-#   maxEvals
-#   finalFunEvals
 DataSetList <- function(path = NULL, verbose = T, print_fun = NULL, maximization = TRUE,
                         format = 'IOHProfiler', subsampling = FALSE) {
   if (is.null(path))
@@ -476,24 +477,40 @@ DataSetList <- function(path = NULL, verbose = T, print_fun = NULL, maximization
     }
   }
 
-  # TODO: sort all DataSet by key order: algId, funcId and DIM
+  # TODO: sort all DataSet by multiple attributes: algId, funcId and DIM
   class(object) %<>% c('DataSetList')
   attr(object, 'DIM') <- DIM
   attr(object, 'funcId') <- funcId
   attr(object, 'algId') <- algId
   object
 }
 
-# TODO: implement this 
-# c.DataSetList <- function(...) {
-#   browser()
-# }
+c.DataSetList <- function(...) {
+  # TODO: maybe remove duplicated dataset in the further
+  # remove the empty list first
+  dsl <- list(...)
+  dsl <- dsl[sapply(dsl, length) != 0]  
+
+  if (length(dsl) == 0)
+    return()
+
+  object <- unlist(dsl, recursive = F)
+  class(object) %<>% c('DataSetList')
+
+  for (attr_str in c('DIM', 'funcId', 'algId')) {
+    attr(object, attr_str) <- unlist(lapply(dsl, function(x) attr(x, attr_str)))
+  }
+  object
+}
 
 `[.DataSetList` <- function(x, i, drop = FALSE) {
-  obj <- unclass(x)[i]
+  # remove the attributes firstly
+  obj <- unclass(x)[i] 
   class(obj) %<>% c('DataSetList')
+
+  # also slice the attributes accordingly
   attr(obj, 'DIM') <- attr(x, 'DIM')[i]
-  attr(obj, 'funId') <- attr(x, 'funId')[i]
+  attr(obj, 'funcId') <- attr(x, 'funcId')[i]
   attr(obj, 'algId') <- attr(x, 'algId')[i]
   obj
 }