Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revamp gradients implementations #34

Merged
merged 5 commits into from
Sep 27, 2016
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ julia 0.4
Optim 0.5-
PDMats 0.3-
Distances 0.2-
ArrayViews 0.6-
ScikitLearnBase 0.0.1
Compat 0.9.2
96 changes: 79 additions & 17 deletions src/GP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,41 +81,103 @@ fit!(gp::GP, x::Vector{Float64}, y::Vector{Float64}) = fit!(gp, x', y)
function update_mll!(gp::GP)
μ = mean(gp.m,gp.X)
Σ = cov(gp.k, gp.X, gp.data)
gp.cK = PDMat(Σ + exp(2*gp.logNoise)*eye(gp.nobsv) + 1e-8*eye(gp.nobsv))
for i in 1:gp.nobsv
## add observation noise
Σ[i,i] += exp(2*gp.logNoise) + 1e-8
end
gp.cK = PDMat(Σ)
gp.alpha = gp.cK \ (gp.y - μ)
gp.mLL = -dot((gp.y - μ),gp.alpha)/2.0 - logdet(gp.cK)/2.0 - gp.nobsv*log(2π)/2.0 # Marginal log-likelihood
end

# Update gradient of marginal log likelihood
# modification of update_mll! that reuses existing matrices to avoid
# unnecessary memory allocations, which speeds things up significantly
function update_mll!!(gp::GP)
Σbuffer = gp.cK.mat
μ = mean(gp.m,gp.X)
cov!(Σbuffer, gp.k, gp.X, gp.data)
for i in 1:gp.nobsv
Σbuffer[i,i] += exp(2*gp.logNoise) + 1e-8
end
chol_buffer = gp.cK.chol.factors
copy!(chol_buffer, Σbuffer)
chol = cholfact!(Symmetric(chol_buffer))
gp.cK = PDMats.PDMat(Σbuffer, chol)
gp.alpha = gp.cK \ (gp.y - μ)
gp.mLL = -dot((gp.y - μ),gp.alpha)/2.0 - logdet(gp.cK)/2.0 - gp.nobsv*log(2π)/2.0 # Marginal log-likelihood
end

function update_mll_and_dmll!(gp::GP; noise::Bool=true, mean::Bool=true, kern::Bool=true)
update_mll!(gp::GP)
gp.dmLL = Array(Float64, noise + mean*num_params(gp.m) + kern*num_params(gp.k))
@doc """
get_ααinvcKI!(ααinvcKI::Matrix, cK::AbstractPDMat, α::Vector)

# Calculate Gradient with respect to hyperparameters
# Description
Computes α*α'-cK\eye(nobsv) in-place, avoiding any memory allocation

#Derivative wrt the observation noise
# Arguments:
* `ααinvcKI::Matrix` the matrix to be overwritten
* `cK::AbstractPDMat` the covariance matrix of the GP (supplied by gp.cK)
* `α::Vector` the alpha vector of the GP (defined as cK \ (Y-μ), and supplied by gp.alpha)
* nobsv
"""
function get_ααinvcKI!(ααinvcKI::Matrix, cK::AbstractPDMat, α::Vector)
nobsv = length(α)
size(ααinvcKI) == (nobsv, nobsv) || throw(ArgumentError,
@sprintf("Buffer for ααinvcKI should be a %dx%d matrix, not %dx%d",
nobsv, nobsv,
size(ααinvcKI,1), size(ααinvcKI,2)))
ααinvcKI[:,:] = 0.0
@inbounds for i in 1:nobsv
ααinvcKI[i,i] = -1.0
end
A_ldiv_B!(cK.chol, ααinvcKI)
LinAlg.BLAS.ger!(1.0, α, α, ααinvcKI)
end
""" Update gradient of marginal log-likelihood """
function update_mll_and_dmll!(gp::GP,
Kgrad::Matrix{Float64},
ααinvcKI::Matrix{Float64}
;
noise::Bool=true, # include gradient component for the logNoise term
mean::Bool=true, # include gradient components for the mean parameters
kern::Bool=true, # include gradient components for the spatial kernel parameters
)
size(Kgrad) == (gp.nobsv, gp.nobsv) || throw(ArgumentError,
@sprintf("Buffer for Kgrad should be a %dx%d matrix, not %dx%d",
gp.nobsv, gp.nobsv,
size(Kgrad,1), size(Kgrad,2)))
update_mll!!(gp)
n_mean_params = num_params(gp.m)
n_kern_params = num_params(gp.k)
gp.dmLL = Array(Float64, noise + mean*n_mean_params + kern*n_kern_params)
logNoise = gp.logNoise
get_ααinvcKI!(ααinvcKI, gp.cK, gp.alpha)

i=1
if noise
gp.dmLL[1] = exp(2*gp.logNoise)*trace((gp.alpha*gp.alpha' - gp.cK \ eye(gp.nobsv)))
gp.dmLL[i] = exp(2.0*logNoise)*trace(ααinvcKI)
i+=1
end

#Derivative wrt to mean hyperparameters, need to loop over as same with kernel hyperparameters
if mean
Mgrads = grad_stack(gp.m, gp.X)
for i in 1:num_params(gp.m)
gp.dmLL[i+noise] = dot(Mgrads[:,i],gp.alpha)
for j in 1:n_mean_params
gp.dmLL[i] = dot(Mgrads[:,j],gp.alpha)
i += 1
end
end

# Derivative of marginal log-likelihood with respect to kernel hyperparameters
if kern
Kgrads = grad_stack(gp.k, gp.X, gp.data) # [dK/dθᵢ]
for i in 1:num_params(gp.k)
gp.dmLL[i+mean*num_params(gp.m)+noise] = trace((gp.alpha*gp.alpha' - gp.cK \ eye(gp.nobsv))*Kgrads[:,:,i])/2
for iparam in 1:n_kern_params
grad_slice!(Kgrad, gp.k, gp.X, gp.data, iparam)
gp.dmLL[i] = vecdot(Kgrad,ααinvcKI)/2.0
i+=1
end
end
end

function update_mll_and_dmll!(gp::GP; noise::Bool=true, mean::Bool=true, kern::Bool=true)
Kgrad = Array(Float64, gp.nobsv, gp.nobsv)
ααinvcKI = Array(Float64, gp.nobsv, gp.nobsv)
update_mll_and_dmll!(gp, Kgrad, ααinvcKI, noise=noise,mean=mean,kern=kern)
end

@doc """
# Description
Expand Down
6 changes: 4 additions & 2 deletions src/GaussianProcesses.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module GaussianProcesses
using Optim, PDMats, Distances, ArrayViews
using Optim, PDMats, Distances
using Compat
import Compat: view, cholfact!

import Base: +, *
import Base: rand, rand!, mean, cov, push!
Expand All @@ -19,7 +21,7 @@ include("optimize.jl")
# This approach to loading supported plotting packages is taken from the "KernelDensity" package
macro glue(pkg)
path = joinpath(dirname(@__FILE__),"glue",string(pkg,".jl"))
init = symbol(string(pkg,"_init"))
init = Symbol(string(pkg,"_init"))
quote
$(esc(init))() = Base.include($path)
isdefined(Main,$(QuoteNode(pkg))) && $(esc(init))()
Expand Down
13 changes: 9 additions & 4 deletions src/glue/ScikitLearn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,14 @@ ScikitLearnBase.clone(k::Kernel) = deepcopy(k)
## Vector{Float64}. We use it to implement ScikitLearnBase.get_params, which
## must return a Symbol => value dictionary. Likewise for set_params!

function add_prefix(pref, di)
newdi = typeof(di)()
for (param,value) in di
newdi[Symbol(pref, param)] = value
end
return newdi
end
function ScikitLearnBase.get_params(gp::GP)
add_prefix(pref, di) =
Dict([symbol(pref, param)=>value for (param, value) in di])
merge(add_prefix(:m_, ScikitLearnBase.get_params(gp.m)),
add_prefix(:k_, ScikitLearnBase.get_params(gp.k)),
Dict(:logNoise=>gp.logNoise))
Expand All @@ -68,9 +73,9 @@ function ScikitLearnBase.set_params!(gp::GP; params...)
for (name, value) in params
sname = string(name)
if startswith(sname, "m_")
m_params[symbol(sname[3:end])] = value
m_params[Symbol(sname[3:end])] = value
elseif startswith(sname, "k_")
k_params[symbol(sname[3:end])] = value
k_params[Symbol(sname[3:end])] = value
else
@assert name == :logNoise "Unknown parameter passed to set_params!: $name"
gp.logNoise = value
Expand Down
78 changes: 66 additions & 12 deletions src/kernels/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ function cov(k::Kernel, X::Matrix{Float64}, Y::Matrix{Float64})
d(x,y) = cov(k, x, y)
return map_column_pairs(d, X, Y)
end
function cov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}, Y::Matrix{Float64})
d(x,y) = cov(k, x, y)
return map_column_pairs!(cK, d, X, Y)
end

"""
# Description
Expand All @@ -57,18 +61,58 @@ function cov(k::Kernel, X::Matrix{Float64}, data::EmptyData)
d(x,y) = cov(k, x, y)
return map_column_pairs(d, X)
end
function cov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}, data::EmptyData)
d(x,y) = cov(k, x, y)
return map_column_pairs!(cK, d, X)
end

cov(k::Kernel, X::Matrix{Float64}) = cov(k, X, KernelData(k, X))
cov!(cK:: AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}) = cov!(cK, k, X, KernelData(k, X))
#=function cov!(cK::Matrix{Float64}, k::Kernel, X::Matrix{Float64}, data::KernelData)=#
#= cK[:,:] = cov(k,X,data)=#
#=end=#
function addcov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64})
cK[:,:] .+= cov(k, X, KernelData(k, X))
return cK
end
function addcov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}, data::KernelData)
cK[:,:] .+= cov(k, X, data)
return cK
end
function multcov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64})
cK[:,:] .*= cov(k, X, KernelData(k, X))
return cK
end
function multcov!(cK::AbstractMatrix{Float64}, k::Kernel, X::Matrix{Float64}, data::KernelData)
cK[:,:] .*= cov(k, X, data)
return cK
end


function grad_slice!(dK::AbstractMatrix, k::Kernel, X::Matrix{Float64}, data::KernelData, p::Int)
dim = size(X,1)
nobsv = size(X,2)
npars = num_params(k)
@inbounds for j in 1:nobsv
@simd for i in 1:j
dK[i,j] = dKij_dθp(k,X,data,i,j,p,dim)
dK[j,i] = dK[i,j]
end
end
return dK
end
# Calculates the stack [dk / dθᵢ] of kernel matrix gradients
function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64}, data::EmptyData)
d, nobsv = size(X)
for j in 1:nobsv, i in 1:nobsv
@inbounds stack[i,j,:] = grad_kern(k, X[:,i], X[:,j])
function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64}, data::KernelData)
npars = num_params(k)
for p in 1:npars
grad_slice!(view(stack,:,:,p),k,X,data,p)
end
return stack
end

function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64})
grad_stack!(stack, k, X, KernelData(k, X))
end
grad_stack(k::Kernel, X::Matrix{Float64}) = grad_stack(k, X, KernelData(k, X))
function grad_stack(k::Kernel, X::Matrix{Float64}, data::KernelData)
n = num_params(k)
n_obsv = size(X, 2)
Expand All @@ -77,25 +121,33 @@ function grad_stack(k::Kernel, X::Matrix{Float64}, data::KernelData)
return stack
end

function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64})
grad_stack!(stack, k, X, KernelData(k, X))
function grad_stack!(stack::AbstractArray, k::Kernel, X::Matrix{Float64}, data::EmptyData)
dim = size(X,1)
nobsv = size(X,2)
for p in 1:num_params(k)
@inbounds for j in 1:nobsv
@simd for i in 1:j
stack[i,j,p] = dKij_dθp(k,X,i,j,p,dim)
stack[j,i,p] = stack[i,j,p]
end
end
end
return stack
end

grad_stack(k::Kernel, X::Matrix{Float64}) = grad_stack(k, X, KernelData(k, X))

##############################
# Parameter name definitions #
##############################

# This generates names like [:ll_1, :ll_2, ...] for parameter vectors
get_param_names(n::Int, prefix::Symbol) = [symbol(prefix, :_, i) for i in 1:n]
get_param_names(n::Int, prefix::Symbol) = [Symbol(prefix, :_, i) for i in 1:n]
get_param_names(v::Vector, prefix::Symbol) = get_param_names(length(v), prefix)

# Fallback. Yields names like :Matl2Iso_param_1 => 0.5
# Ideally this is never used, because the names are uninformative.
get_param_names(obj::Union{Kernel, Mean}) =
get_param_names(num_params(obj),
symbol(typeof(obj).name.name, :_param_))
Symbol(typeof(obj).name.name, :_param_))

""" `composite_param_names(objects, prefix)`, where `objects` is a
vector of kernels/means, calls `get_param_names` on each object and prefixes the
Expand All @@ -114,7 +166,7 @@ yields
function composite_param_names(objects, prefix)
p = Symbol[]
for (i, obj) in enumerate(objects)
append!(p, [symbol(prefix, i, :_, sym) for sym in get_param_names(obj)])
append!(p, [Symbol(prefix, i, :_, sym) for sym in get_param_names(obj)])
end
p
end
Expand All @@ -130,6 +182,8 @@ function show(io::IO, k::Kernel, depth::Int = 0)
print(io, "\n")
end

num_params(k::Kernel)=throw(ArgumentError, "Undefined number of parameters")

include("stationary.jl")

include("lin.jl") # Linear covariance function
Expand Down
8 changes: 8 additions & 0 deletions src/kernels/lin.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Linear covariance function

@inline dotijp(X::Matrix{Float64}, i::Int, j::Int, p::Int) = X[p,i]*X[p,j]
@inline function dotij(X::Matrix, i::Int, j::Int, dim::Int)
s=zero(eltype(X))
@inbounds @simd for p in 1:dim
s+=dotijp(X,i,j,p)
end
return s
end
include("lin_iso.jl")
include("lin_ard.jl")

Expand Down
Loading