Added kernel stuff to readme

CollinErickson · Feb 11, 2024 · 81761f7 · 81761f7
1 parent 45caef2
commit 81761f7
Show file tree

Hide file tree

Showing 9 changed files with 136 additions and 111 deletions.
diff --git a/R/GauPro_S3.R b/R/GauPro_S3.R
@@ -23,38 +23,6 @@ predict.GauPro <- function(object, XX, se.fit=F, covmat=F, split_speed=T, ...) {
   object$predict(XX=XX, se.fit=se.fit, covmat=covmat, split_speed=split_speed)
 }
 
-#' if (F) {
-#'   # Plot is automatically dispatched, same with print and format
-#'   #' Plot for class GauPro
-#'   #'
-#'   #' @param x Object of class GauPro
-#'   #' @param ... Additional parameters
-#'   #'
-#'   #' @return Nothing
-#'   #' @export
-#'   #'
-#'   #' @examples
-#'   #' n <- 12
-#'   #' x <- matrix(seq(0,1,length.out = n), ncol=1)
-#'   #' y <- sin(2*pi*x) + rnorm(n,0,1e-1)
-#'   #' gp <- GauPro(X=x, Z=y, parallel=FALSE)
-#'   #' if (requireNamespace("MASS", quietly = TRUE)) {
-#'   #'   plot(gp)
-#'   #' }
-#'   #'
-#'   plot.GauPro <- function(x,  ...) {
-#'     x$plot(...)
-#'     # if (x$D == 1) {
-#'     #   x$cool1Dplot(...)
-#'     # } else if (x$D == 2) {
-#'     #   x$plot2D(...)
-#'     # } else {
-#'     #   # stop("No plot method for higher than 2 dimension")
-#'     #   x$plotmarginal()
-#'     # }
-#'   }
-#' }
-
 #' Summary for GauPro object
 #'
 #' @param object GauPro R6 object

diff --git a/R/kernel_Matern52.R b/R/kernel_Matern52.R
@@ -1,5 +1,10 @@
 #' Matern 5/2 Kernel R6 class
 #'
+#'
+#' \eqn{k(x, y) = s2 * (1 + t1 + t1^2 / 3) * exp(-t1) }
+#' where
+#' \eqn{t1 = sqrt(5) * sqrt(sum(theta * (x-y)^2))}
+#'
 #' @docType class
 #' @importFrom R6 R6Class
 #' @export

diff --git a/R/kernel_model.R b/R/kernel_model.R
@@ -495,14 +495,21 @@ GauPro_kernel_model <- R6::R6Class(
       # Update K, Kinv, mu_hat, and s2_hat, maybe nugget too
       self$K <- self$kernel$k(self$X) + diag(self$kernel$s2 * self$nug,
                                              self$N)
+      nugincreased <- FALSE
       while(T) {
         try.chol <- try(self$Kchol <- chol(self$K), silent = T)
         if (!inherits(try.chol, "try-error")) {break}
-        warning("Can't Cholesky, increasing nugget #7819553")
+        nugincreased <- TRUE
+        # warning("Can't Cholesky, increasing nugget #7819553")
         oldnug <- self$nug
         self$nug <- max(1e-8, 2 * self$nug)
         self$K <- self$K + diag(self$kernel$s2 * (self$nug - oldnug),
                                 self$N)
+        # cat("Increasing nugget to get invertibility from ", oldnug, ' to ',
+        #     self$nug, "\n")
+      }
+      if (nugincreased) {
+        warning("Can't Cholesky, increasing nugget #7819553")
         cat("Increasing nugget to get invertibility from ", oldnug, ' to ',
             self$nug, "\n")
       }

diff --git a/R/kernel_periodic.R b/R/kernel_periodic.R
@@ -4,6 +4,8 @@
 #'
 #' \eqn{k(x, y) = s2 * exp(-sum(alpha*sin(p * (x-y))^2))}
 #'
+#' \eqn{$k(x, y) = \sigma^2 * \exp(-\sum(\alpha_i*sin(p * (x_i-y_i))^2))}
+#'
 #' @docType class
 #' @importFrom R6 R6Class
 #' @export

diff --git a/README.Rmd b/README.Rmd
@@ -107,6 +107,39 @@ output.
 plot(dm)
 ```
 
+### Constructing a kernel
+
+In this case, the kernel was chosen automatically by looking at which dimensions
+were continuous and which were discrete, and then using a Matern 5/2 on the 
+continuous dimensions (1,5), and separate ordered factor kernels on the other
+dimensions since those columns in the data frame are all ordinal factors.
+We can construct our own kernel using products and sums of any kernels,
+making sure that the continuous kernels ignore the factor dimensions.
+
+Suppose we want to construct a kernel for this example that uses the power
+exponential kernel for the two continuous dimensions, ordered kernels on 
+`cut` and `color`, and a Gower kernel on `clarity`.
+First we construct the power exponential kernel that ignores the 3 factor
+dimensions.
+Then we construct 
+
+```{r diamond_construct_kernel}
+cts_kernel <- IgnoreIndsKernel$new(k=PowerExp$new(D=2), ignoreinds = c(2,3,4))
+factor_kernel2 <- OrderedFactorKernel$new(D=5, xindex=2, nlevels=nlevels(diamonds_subset[[2]]))
+factor_kernel3 <- OrderedFactorKernel$new(D=5, xindex=3, nlevels=nlevels(diamonds_subset[[3]]))
+factor_kernel4 <- GowerFactorKernel$new(D=5, xindex=4, nlevels=nlevels(diamonds_subset[[4]]))
+
+# Multiply them
+diamond_kernel <- cts_kernel * factor_kernel2 * factor_kernel3 * factor_kernel4
+```
+
+Now we can pass this kernel into `gpkm` and it will use it.
+
+```{r diamond_construct_kernel_fit}
+dm <- gpkm(price ~ carat + cut + color + clarity + depth,
+           diamonds_subset, kernel=diamond_kernel)
+dm$plotkernel()
+```
 
 
 
@@ -139,14 +172,35 @@ proper types, then the default kernel will properly handle it by applying
 the numeric kernel to the numeric inputs and
 the factor kernel to the factor and character inputs.
 
+| Kernel | Code | Continuous/<br />discrete | Equation | Notes |
+| --- | --- | --- | --- | --- |
+| Gaussian | `Gaussian` | cts |  | Often causes issues since it assumes infinite differentiability. Experts don't recommend using it. |
+| Matern 3/2 | `Matern32` | cts | | Assumes one time differentiability. This is often too low of an assumption. |
+| Matern 5/2 | `Matern52` | cts | | Assumes two time differentiability. Generally the best. |
+| Exponential | `Exponential` | cts | |  Equivalent to Matern 1/2. Assumes no differentiability. |
+| Triangle | `Triangle` | cts | | |
+| Power exponential | `PowerExp` | cts | | |
+| Periodic | `Periodic` | cts | $k(x, y) = \sigma^2 * \exp(-\sum(\alpha_i*sin(p * (x_i-y_i))^2))$ | The only kernel that takes advantage of periodic data. But there is often incoherance far apart, so you will likely want to multiply by one of the standard kernels. |
+| Cubic | `Cubic` | cts | | |
+| Rational quadratic | `RatQuad` | cts | | |
+| Latent factor kernel | `LatentFactorKernel` | factor | | This embeds each discrete value into a low dimensional space and calculates the distances in that space. This works well when there are many discrete values. |
+| Ordered factor kernel | `OrderedFactorKernel` | factor | | This maintains the order of the discrete values. E.g., if there are 3 levels, it will ensure that 1 and 2 have a higher correlation than 1 and 3. This is similar to embedding into a latent space with 1 dimension and requiring the values to be kept in numerical order. |
+| Factor kernel | `FactorKernel` | factor | | This fits a parameter for every pair of possible values. E.g., if there are 4 discrete values, it will fit 6 (4 choose 2) values. This doesn't scale well. When there are many discrete values, use any of the other factor kernels. |
+| Gower factor kernel | `GowerFactorKernel` | factor | $k(x,y) = \begin{cases} 1,  & \text{if } x=y \\ p, & \text{if } x \neq y  \end{cases}$ | This is a very simple factor kernel. For the relevant dimension, the correlation will either be 1 if the value are the same, or $p$ if they are different. |
+| Ignore indices | `IgnoreInds` | N/A | | Use this to create a kernel that ignores certain dimensions. Useful when you want to fit different kernel types to different dimensions or when there is a mix of continuous and discrete dimensions. |
+
+Factor kernels: note that these all only work on a single dimension. If there
+are multiple factor dimensions in your input, then they each will be given a
+different factor kernel.
+
 ## Combining kernels
 
 Kernels can be combined by multiplying or adding them directly.
 
 The following example uses the product of a periodic and a Matern 5/2 kernel
 to fit periodic data.
 
-```{r combine seed}
+```{r combine seed, include=F}
 set.seed(99)
 ```
 
@@ -158,4 +212,7 @@ gp <- gpkm(x, y, kernel=Periodic$new(D=1)*Matern52$new(D=1), nug.min=1e-6)
 gp$plot()
 ```
 
+For an example of a more complex kernel being constructed,
+see the diamonds section above.
+
 
diff --git a/README.md b/README.md
@@ -113,6 +113,42 @@ plot(dm)
 
 ![](tools/README-plot_dm-1.png)<!-- -->
 
+### Constructing a kernel
+
+In this case, the kernel was chosen automatically by looking at which
+dimensions were continuous and which were discrete, and then using a
+Matern 5/2 on the continuous dimensions (1,5), and separate ordered
+factor kernels on the other dimensions since those columns in the data
+frame are all ordinal factors. We can construct our own kernel using
+products and sums of any kernels, making sure that the continuous
+kernels ignore the factor dimensions.
+
+Suppose we want to construct a kernel for this example that uses the
+power exponential kernel for the two continuous dimensions, ordered
+kernels on `cut` and `color`, and a Gower kernel on `clarity`. First we
+construct the power exponential kernel that ignores the 3 factor
+dimensions. Then we construct
+
+``` r
+cts_kernel <- IgnoreIndsKernel$new(k=PowerExp$new(D=2), ignoreinds = c(2,3,4))
+factor_kernel2 <- OrderedFactorKernel$new(D=5, xindex=2, nlevels=nlevels(diamonds_subset[[2]]))
+factor_kernel3 <- OrderedFactorKernel$new(D=5, xindex=3, nlevels=nlevels(diamonds_subset[[3]]))
+factor_kernel4 <- GowerFactorKernel$new(D=5, xindex=4, nlevels=nlevels(diamonds_subset[[4]]))
+
+# Multiply them
+diamond_kernel <- cts_kernel * factor_kernel2 * factor_kernel3 * factor_kernel4
+```
+
+Now we can pass this kernel into `gpkm` and it will use it.
+
+``` r
+dm <- gpkm(price ~ carat + cut + color + clarity + depth,
+           diamonds_subset, kernel=diamond_kernel)
+dm$plotkernel()
+```
+
+![](tools/README-diamond_construct_kernel_fit-1.png)<!-- -->
+
 ## Using kernels
 
 A key modeling decision for Gaussian process models is the choice of
@@ -134,17 +170,34 @@ default kernel will properly handle it by applying the numeric kernel to
 the numeric inputs and the factor kernel to the factor and character
 inputs.
 
+| Kernel                | Code                  | Continuous/<br />discrete | Equation                                                                             | Notes                                                                                                                                                                                                                                                                       |
+|-----------------------|-----------------------|---------------------------|--------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Gaussian              | `Gaussian`            | cts                       |                                                                                      | Often causes issues since it assumes infinite differentiability. Experts don’t recommend using it.                                                                                                                                                                          |
+| Matern 3/2            | `Matern32`            | cts                       |                                                                                      | Assumes one time differentiability. This is often too low of an assumption.                                                                                                                                                                                                 |
+| Matern 5/2            | `Matern52`            | cts                       |                                                                                      | Assumes two time differentiability. Generally the best.                                                                                                                                                                                                                     |
+| Exponential           | `Exponential`         | cts                       |                                                                                      | Equivalent to Matern 1/2. Assumes no differentiability.                                                                                                                                                                                                                     |
+| Triangle              | `Triangle`            | cts                       |                                                                                      |                                                                                                                                                                                                                                                                             |
+| Power exponential     | `PowerExp`            | cts                       |                                                                                      |                                                                                                                                                                                                                                                                             |
+| Periodic              | `Periodic`            | cts                       | $k(x, y) = \sigma^2 * \exp(-\sum(\alpha_i*sin(p * (x_i-y_i))^2))$                    | The only kernel that takes advantage of periodic data. But there is often incoherance far apart, so you will likely want to multiply by one of the standard kernels.                                                                                                        |
+| Cubic                 | `Cubic`               | cts                       |                                                                                      |                                                                                                                                                                                                                                                                             |
+| Rational quadratic    | `RatQuad`             | cts                       |                                                                                      |                                                                                                                                                                                                                                                                             |
+| Latent factor kernel  | `LatentFactorKernel`  | factor                    |                                                                                      | This embeds each discrete value into a low dimensional space and calculates the distances in that space. This works well when there are many discrete values.                                                                                                               |
+| Ordered factor kernel | `OrderedFactorKernel` | factor                    |                                                                                      | This maintains the order of the discrete values. E.g., if there are 3 levels, it will ensure that 1 and 2 have a higher correlation than 1 and 3. This is similar to embedding into a latent space with 1 dimension and requiring the values to be kept in numerical order. |
+| Factor kernel         | `FactorKernel`        | factor                    |                                                                                      | This fits a parameter for every pair of possible values. E.g., if there are 4 discrete values, it will fit 6 (4 choose 2) values. This doesn’t scale well. When there are many discrete values, use any of the other factor kernels.                                        |
+| Gower factor kernel   | `GowerFactorKernel`   | factor                    | $k(x,y) = \begin{cases} 1, & \text{if } x=y \\ p, & \text{if } x \neq y \end{cases}$ | This is a very simple factor kernel. For the relevant dimension, the correlation will either be 1 if the value are the same, or $p$ if they are different.                                                                                                                  |
+| Ignore indices        | `IgnoreInds`          | N/A                       |                                                                                      | Use this to create a kernel that ignores certain dimensions. Useful when you want to fit different kernel types to different dimensions or when there is a mix of continuous and discrete dimensions.                                                                       |
+
+Factor kernels: note that these all only work on a single dimension. If
+there are multiple factor dimensions in your input, then they each will
+be given a different factor kernel.
+
 ## Combining kernels
 
 Kernels can be combined by multiplying or adding them directly.
 
 The following example uses the product of a periodic and a Matern 5/2
 kernel to fit periodic data.
 
-``` r
-set.seed(99)
-```
-
 ``` r
 x <- 1:20
 y <- sin(x) + .1*x^1.3
@@ -154,3 +207,6 @@ gp$plot()
 ```
 
 ![](tools/README-combine_periodic-1.png)<!-- -->
+
+For an example of a more complex kernel being constructed, see the
+diamonds section above.
diff --git a/man/summary.GauPro.Rd b/man/summary.GauPro.Rd
diff --git a/scratch/ToDo.md b/scratch/ToDo.md
@@ -1,14 +1,12 @@
 # GauPro to do
 
-* readme should have list of all kernels
-
-* readme example of combining kernels
-
 * EI/CorEI/AugEI/qEI: nopt, test, doc
   * Use t-dist
 
 * Add documentation for kernels, esp. factor ones
 
+* Add readme/documentation for trends
+
 * Speed up triangle, ratquad, periodic, powerexp k/grad
 
 * Knowledge gradient: multiple starts for optim
@@ -40,10 +38,4 @@ diff(range(Z))^2. Or else give message to normalize.
 
 * Standardize for X.
 
-* plotmarginal w/ factors needs level names
-
 * factor kernel: clean up jitter/start
-
-* Fix GHA/Linux bug with cubic
-
-* increasing nugget to get invertibility, only print last
diff --git a/tools/README-diamond_construct_kernel_fit-1.png b/tools/README-diamond_construct_kernel_fit-1.png