STOR-i · fairbrot · Sep 27, 2016 · Sep 10, 2016 · Sep 11, 2016 · Sep 11, 2016
diff --git a/REQUIRE b/REQUIRE
@@ -2,5 +2,5 @@ julia 0.4
 Optim 0.5-
 PDMats 0.3-
 Distances 0.2-
-ArrayViews 0.6-
 ScikitLearnBase 0.0.1
+Compat 0.9.2
diff --git a/src/GP.jl b/src/GP.jl
@@ -81,41 +81,103 @@ fit!(gp::GP, x::Vector{Float64}, y::Vector{Float64}) = fit!(gp, x', y)
 function update_mll!(gp::GP)
     μ = mean(gp.m,gp.X)
     Σ = cov(gp.k, gp.X, gp.data)
-    gp.cK = PDMat(Σ + exp(2*gp.logNoise)*eye(gp.nobsv) + 1e-8*eye(gp.nobsv))
+    for i in 1:gp.nobsv
+        ## add observation noise
+        Σ[i,i] += exp(2*gp.logNoise) + 1e-8
+    end
+    gp.cK = PDMat(Σ)
     gp.alpha = gp.cK \ (gp.y - μ)
     gp.mLL = -dot((gp.y - μ),gp.alpha)/2.0 - logdet(gp.cK)/2.0 - gp.nobsv*log(2π)/2.0 # Marginal log-likelihood
 end
 
-# Update gradient of marginal log likelihood
+# modification of update_mll! that reuses existing matrices to avoid
+# unnecessary memory allocations, which speeds things up significantly
+function update_mll!!(gp::GP)
+    Σbuffer = gp.cK.mat
+    μ = mean(gp.m,gp.X)
+    cov!(Σbuffer, gp.k, gp.X, gp.data)
+    for i in 1:gp.nobsv
+        Σbuffer[i,i] += exp(2*gp.logNoise) + 1e-8
+    end
+    chol_buffer = gp.cK.chol.factors
+    copy!(chol_buffer, Σbuffer)
+    chol = cholfact!(Symmetric(chol_buffer))
+    gp.cK = PDMats.PDMat(Σbuffer, chol)
+    gp.alpha = gp.cK \ (gp.y - μ)
+    gp.mLL = -dot((gp.y - μ),gp.alpha)/2.0 - logdet(gp.cK)/2.0 - gp.nobsv*log(2π)/2.0 # Marginal log-likelihood
+end
 
-function update_mll_and_dmll!(gp::GP; noise::Bool=true, mean::Bool=true, kern::Bool=true)
-    update_mll!(gp::GP)
-    gp.dmLL = Array(Float64, noise + mean*num_params(gp.m) + kern*num_params(gp.k))
+@doc """
+get_ααinvcKI!(ααinvcKI::Matrix, cK::AbstractPDMat, α::Vector)
 
-    # Calculate Gradient with respect to hyperparameters
+# Description
+Computes α*α'-cK\eye(nobsv) in-place, avoiding any memory allocation
 
-    #Derivative wrt the observation noise
+# Arguments:
+* `ααinvcKI::Matrix` the matrix to be overwritten
+* `cK::AbstractPDMat` the covariance matrix of the GP (supplied by gp.cK)
+* `α::Vector` the alpha vector of the GP (defined as cK \ (Y-μ), and supplied by gp.alpha)
+* nobsv
+"""
+function get_ααinvcKI!(ααinvcKI::Matrix, cK::AbstractPDMat, α::Vector)
+    nobsv = length(α)
+    size(ααinvcKI) == (nobsv, nobsv) || throw(ArgumentError, 
+                @sprintf("Buffer for ααinvcKI should be a %dx%d matrix, not %dx%d",
+                         nobsv, nobsv,
+                         size(ααinvcKI,1), size(ααinvcKI,2)))
+    ααinvcKI[:,:] = 0.0
+    @inbounds for i in 1:nobsv
+        ααinvcKI[i,i] = -1.0
+    end
+    A_ldiv_B!(cK.chol, ααinvcKI)
+    LinAlg.BLAS.ger!(1.0, α, α, ααinvcKI)
+end
+""" Update gradient of marginal log-likelihood """
+function update_mll_and_dmll!(gp::GP,
+    Kgrad::Matrix{Float64},
+    ααinvcKI::Matrix{Float64}
+    ; 
+    noise::Bool=true, # include gradient component for the logNoise term
+    mean::Bool=true, # include gradient components for the mean parameters
+    kern::Bool=true, # include gradient components for the spatial kernel parameters
+    )
+    size(Kgrad) == (gp.nobsv, gp.nobsv) || throw(ArgumentError, 
+                @sprintf("Buffer for Kgrad should be a %dx%d matrix, not %dx%d",
+                         gp.nobsv, gp.nobsv,
+                         size(Kgrad,1), size(Kgrad,2)))
+    update_mll!!(gp)
+    n_mean_params = num_params(gp.m)
+    n_kern_params = num_params(gp.k)
+    gp.dmLL = Array(Float64, noise + mean*n_mean_params + kern*n_kern_params)
+    logNoise = gp.logNoise
+    get_ααinvcKI!(ααinvcKI, gp.cK, gp.alpha)
+
+    i=1
     if noise
-        gp.dmLL[1] = exp(2*gp.logNoise)*trace((gp.alpha*gp.alpha' - gp.cK \ eye(gp.nobsv)))
+        gp.dmLL[i] = exp(2.0*logNoise)*trace(ααinvcKI)
+        i+=1
     end
-
-    #Derivative wrt to mean hyperparameters, need to loop over as same with kernel hyperparameters
     if mean
         Mgrads = grad_stack(gp.m, gp.X)
-        for i in 1:num_params(gp.m)
-            gp.dmLL[i+noise] = dot(Mgrads[:,i],gp.alpha)
+        for j in 1:n_mean_params
+            gp.dmLL[i] = dot(Mgrads[:,j],gp.alpha)
+            i += 1
         end
     end
-
-    # Derivative of marginal log-likelihood with respect to kernel hyperparameters
     if kern
-        Kgrads = grad_stack(gp.k, gp.X, gp.data)   # [dK/dθᵢ]
-        for i in 1:num_params(gp.k)
-            gp.dmLL[i+mean*num_params(gp.m)+noise] = trace((gp.alpha*gp.alpha' - gp.cK \ eye(gp.nobsv))*Kgrads[:,:,i])/2
+        for iparam in 1:n_kern_params
+            grad_slice!(Kgrad, gp.k, gp.X, gp.data, iparam)
+            gp.dmLL[i] = vecdot(Kgrad,ααinvcKI)/2.0
+            i+=1
         end
     end
 end
 
+function update_mll_and_dmll!(gp::GP; noise::Bool=true, mean::Bool=true, kern::Bool=true)
+    Kgrad = Array(Float64, gp.nobsv, gp.nobsv)
+    ααinvcKI = Array(Float64, gp.nobsv, gp.nobsv)
+    update_mll_and_dmll!(gp, Kgrad, ααinvcKI, noise=noise,mean=mean,kern=kern)
+end
 
 @doc """
 # Description

diff --git a/src/GaussianProcesses.jl b/src/GaussianProcesses.jl
@@ -1,5 +1,7 @@
 module GaussianProcesses
-using Optim, PDMats, Distances, ArrayViews
+using Optim, PDMats, Distances
+using Compat
+import Compat: view, cholfact!
 
 import Base: +, *
 import Base: rand, rand!, mean, cov, push!
@@ -19,7 +21,7 @@ include("optimize.jl")
 # This approach to loading supported plotting packages is taken from the "KernelDensity" package
 macro glue(pkg)
     path = joinpath(dirname(@__FILE__),"glue",string(pkg,".jl"))
-    init = symbol(string(pkg,"_init"))
+    init = Symbol(string(pkg,"_init"))
     quote
         $(esc(init))() = Base.include($path)
         isdefined(Main,$(QuoteNode(pkg))) && $(esc(init))()

diff --git a/src/glue/ScikitLearn.jl b/src/glue/ScikitLearn.jl
@@ -46,9 +46,14 @@ ScikitLearnBase.clone(k::Kernel) = deepcopy(k)
 ## Vector{Float64}. We use it to implement ScikitLearnBase.get_params, which
 ## must return a Symbol => value dictionary. Likewise for set_params!
 
+function add_prefix(pref, di)
+    newdi = typeof(di)()
+    for (param,value) in di
+        newdi[Symbol(pref, param)] = value
+    end
+    return newdi
+end
 function ScikitLearnBase.get_params(gp::GP)
-    add_prefix(pref, di) =
-        Dict([symbol(pref, param)=>value for (param, value) in di])
     merge(add_prefix(:m_, ScikitLearnBase.get_params(gp.m)),
           add_prefix(:k_, ScikitLearnBase.get_params(gp.k)),
           Dict(:logNoise=>gp.logNoise))
@@ -68,9 +73,9 @@ function ScikitLearnBase.set_params!(gp::GP; params...)
     for (name, value) in params
         sname = string(name)
         if startswith(sname, "m_")
-            m_params[symbol(sname[3:end])] = value
+            m_params[Symbol(sname[3:end])] = value
         elseif startswith(sname, "k_")
-            k_params[symbol(sname[3:end])] = value
+            k_params[Symbol(sname[3:end])] = value
         else
             @assert name == :logNoise "Unknown parameter passed to set_params!: $name"
             gp.logNoise = value

diff --git a/src/kernels/kernels.jl b/src/kernels/kernels.jl
@@ -37,6 +37,10 @@ function cov(k::Kernel, X::Matrix{Float64}, Y::Matrix{Float64})
     d(x,y) = cov(k, x, y)
     return map_column_pairs(d, X, Y)
 end
+function cov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}, Y::Matrix{Float64})
+    d(x,y) = cov(k, x, y)
+    return map_column_pairs!(cK, d, X, Y)
+end
 
 """
 # Description
@@ -57,18 +61,58 @@ function cov(k::Kernel, X::Matrix{Float64}, data::EmptyData)
     d(x,y) = cov(k, x, y)
     return map_column_pairs(d, X)
 end
+function cov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}, data::EmptyData)
+    d(x,y) = cov(k, x, y)
+    return map_column_pairs!(cK, d, X)
+end
 
 cov(k::Kernel, X::Matrix{Float64}) = cov(k, X, KernelData(k, X))
+cov!(cK:: AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}) = cov!(cK, k, X, KernelData(k, X))
+#=function cov!(cK::Matrix{Float64}, k::Kernel, X::Matrix{Float64}, data::KernelData)=#
+#=    cK[:,:] = cov(k,X,data)=#
+#=end=#
+function addcov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64})
+    cK[:,:] .+= cov(k, X, KernelData(k, X))
+    return cK
+end
+function addcov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}, data::KernelData)
+    cK[:,:] .+= cov(k, X, data)
+    return cK
+end
+function multcov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64})
+    cK[:,:] .*= cov(k, X, KernelData(k, X))
+    return cK
+end
+function multcov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}, data::KernelData)
+    cK[:,:] .*= cov(k, X, data)
+    return cK
+end
+
 
+function grad_slice!(dK::AbstractMatrix, k::Kernel, X::Matrix{Float64}, data::KernelData, p::Int)
+    dim = size(X,1)
+    nobsv = size(X,2)
+    npars = num_params(k)
+    @inbounds for j in 1:nobsv
+        @simd for i in 1:j
+            dK[i,j] = dKij_dθp(k,X,data,i,j,p,dim)
+            dK[j,i] = dK[i,j]
+        end
+    end
+    return dK
+end
 # Calculates the stack [dk / dθᵢ] of kernel matrix gradients
-function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64}, data::EmptyData)
-    d, nobsv = size(X)
-    for j in 1:nobsv, i in 1:nobsv
-        @inbounds stack[i,j,:] = grad_kern(k, X[:,i], X[:,j])
+function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64}, data::KernelData)
+    npars = num_params(k)
+    for p in 1:npars
+        grad_slice!(view(stack,:,:,p),k,X,data,p)
     end
     return stack
 end
-
+function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64})
+    grad_stack!(stack, k, X, KernelData(k, X))
+end
+grad_stack(k::Kernel, X::Matrix{Float64}) = grad_stack(k, X, KernelData(k, X))
 function grad_stack(k::Kernel, X::Matrix{Float64}, data::KernelData)
     n = num_params(k)
     n_obsv = size(X, 2)
@@ -77,25 +121,33 @@ function grad_stack(k::Kernel, X::Matrix{Float64}, data::KernelData)
     return stack
 end
 
-function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64})
-    grad_stack!(stack, k, X, KernelData(k, X))
+function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64}, data::EmptyData)
+    dim = size(X,1)
+    nobsv = size(X,2)
+    for p in 1:num_params(k)
+        @inbounds for j in 1:nobsv
+            @simd for i in 1:j
+                stack[i,j,p] = dKij_dθp(k,X,i,j,p,dim)
+                stack[j,i,p] = stack[i,j,p]
+            end
+        end
+    end
+    return stack
 end
 
-grad_stack(k::Kernel, X::Matrix{Float64}) = grad_stack(k, X, KernelData(k, X))
-
 ##############################
 # Parameter name definitions #
 ##############################
 
 # This generates names like [:ll_1, :ll_2, ...] for parameter vectors
-get_param_names(n::Int, prefix::Symbol) = [symbol(prefix, :_, i) for i in 1:n]
+get_param_names(n::Int, prefix::Symbol) = [Symbol(prefix, :_, i) for i in 1:n]
 get_param_names(v::Vector, prefix::Symbol) = get_param_names(length(v), prefix)
 
 # Fallback. Yields names like :Matl2Iso_param_1 => 0.5
 # Ideally this is never used, because the names are uninformative.
 get_param_names(obj::Union{Kernel, Mean}) =
     get_param_names(num_params(obj),
-                    symbol(typeof(obj).name.name, :_param_))
+                    Symbol(typeof(obj).name.name, :_param_))
 
 """ `composite_param_names(objects, prefix)`, where `objects` is a
 vector of kernels/means, calls `get_param_names` on each object and prefixes the
@@ -114,7 +166,7 @@ yields
 function composite_param_names(objects, prefix)
     p = Symbol[]
     for (i, obj) in enumerate(objects)
-        append!(p, [symbol(prefix, i, :_, sym) for sym in get_param_names(obj)])
+        append!(p, [Symbol(prefix, i, :_, sym) for sym in get_param_names(obj)])
     end
     p
 end
@@ -130,6 +182,8 @@ function show(io::IO, k::Kernel, depth::Int = 0)
     print(io, "\n")
 end
 
+num_params(k::Kernel)=throw(ArgumentError, "Undefined number of parameters")
+
 include("stationary.jl")
 
 include("lin.jl")               # Linear covariance function

diff --git a/src/kernels/lin.jl b/src/kernels/lin.jl
@@ -1,5 +1,13 @@
 # Linear covariance function
 
+@inline dotijp(X::Matrix{Float64}, i::Int, j::Int, p::Int) = X[p,i]*X[p,j]
+@inline function dotij(X::Matrix, i::Int, j::Int, dim::Int)
+	s=zero(eltype(X))
+	@inbounds @simd for p in 1:dim
+		s+=dotijp(X,i,j,p)
+	end
+	return s
+end
 include("lin_iso.jl")
 include("lin_ard.jl")