In [None]:
# # This file is part of Theano Geometry
#
# Copyright (C) 2017, Stefan Sommer (sommer@di.ku.dk)
# https://bitbucket.org/stefansommer/theanogemetry
#
# Theano Geometry is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Theano Geometry is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of\
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Theano Geometry. If not, see <http://www.gnu.org/licenses/>.
#

# Manifold PPCA on $\mathbb{S}^2$ and ellipsoids
arXiv:1801.10341 [math.ST] http://arxiv.org/abs/1801.10341

Stefan Sommer

In [None]:
%cd ..
surface = 'S2'
# surface = 'ellipsoid'

In [None]:
if surface is 'S2':
    from src.manifolds.S2 import *
    M = S2(use_spherical_coords=True,chart_center='x')
else:
    from src.manifolds.ellipsoid import *
    M = Ellipsoid(params=[2.,1.,1.],chart_center='x')
print(M)

from src.plotting import *
# %matplotlib notebook

## Riemannian Geodesics

In [None]:
# Riemannian structure
from src.Riemannian import metric
metric.initialize(M,truncate_high_order_derivatives=True)

# element, tangent vector and covector
x = np.array([np.pi/2,np.pi/2])
v = np.dot(np.linalg.cholesky(M.gsharpf(x)),np.array([np.sqrt(2)/2,-np.sqrt(2)/4])*np.pi*.85)
p = M.flatf(x,v)

print("x = ", x)
print("v = ", v)
print("p = ", p)

In [None]:
# 2nd order geodesic equation
from src.Riemannian import geodesic
geodesic.initialize(M)

xs = M.Exptf(x,v)

newfig()
M.plot()
M.plotx(xs,v,linewidth = 1.5, s=50)
plt.savefig('Riemannian_geodesic_' + surface + '.pdf')
plt.show()

vv = v
v = xs[-1]

## Brownian Motion

In [None]:
# srng.seed(42)
n_steps.set_value(200) # set to 400 for more accurate plots

In [None]:
# coordinate form
from src.stochastics import Brownian_coords
Brownian_coords.initialize(M)

(ts,xs) = M.Brownian_coordsf(x,dWsf(M.dim.eval()))
newfig()
M.plot()
M.plotx(xs)
plt.savefig('Riemannian_Brownian_motion_' + surface + '.pdf')
plt.show()

## Anisotropic Process

In [None]:
from src.framebundle import FM
from src.stochastics import stochastic_development
FM.initialize(M)
stochastic_development.initialize(M)

# covariance
nu = np.dot(np.linalg.cholesky(M.gsharpf(x)),np.diag((1.,.1)),)
print(nu)
u = np.concatenate((x,nu.flatten()))

# plot with frame
(ts,us) = M.stochastic_developmentf(u,dWsf(M.dim.eval()))
newfig()
M.plot()
M.plotFMx(us)
plt.savefig('Anisotropic_FM_' + surface + '.pdf')
plt.show()

# plot only trajectory
newfig()
M.plot()
M.plotx(us[:,0:M.dim.eval()])
plt.savefig('Anisotropic_M_' + surface + '.pdf')
plt.show()

## Samples and Density Estimate

In [None]:
# plot sample data with trajectories
K = 8
obss = np.zeros((K,n_steps.eval(),M.dim.eval()))
# srng.seed(422)
i = 0
while i < K:
    try:
        (ts,us) = M.stochastic_developmentf(u,dWsf(M.dim.eval()))
        obss[i] = us[:,0:M.dim.eval()]
        i += 1
    except np.linalg.linalg.LinAlgError:
        pass

# plot samples
colormap = plt.get_cmap('winter')
colors=[colormap(k) for k in np.linspace(0, 1, K)]
newfig()
M.plot()
M.plotFMx(u)
for i in range(K):
    M.plotx(obss[i],linewidth=.5,color=colors[i])
plt.savefig('Samples_with_trajectory_' + surface + '.pdf')
plt.show()

In [None]:
# covariance
nu = np.dot(np.linalg.cholesky(M.gsharpf(x)),np.diag((np.pi,.1)),)
print(nu)
u = np.concatenate((x,nu.flatten()))

In [None]:
# sample data
K = 256 # or 2048
obss = np.zeros((K,M.dim.eval()))
# srng.seed(422)
i = 0
while i < K:
    try:
        (ts,us) = M.stochastic_developmentf(u,dWsf(M.dim.eval()))
        obss[i] = M.get_coordsf(M.Ff(us[-1][0:M.dim.eval()]),x)
        i += 1
    except np.linalg.linalg.LinAlgError:
        pass

# plot samples
newfig()
M.plot()
M.plotFMx(u)
for i in range(K):
    M.plotx(obss[i])
plt.savefig('Samples' + surface + '.pdf')
plt.show()

In [None]:
# Logarithm map
from src.Riemannian import Log
Log.initialize(M)

# Tangent PCA
from src.statistics.tangent_PCA import *

from src.utils import *
from sklearn.decomposition import PCA

pca = tangent_PCA(M, lambda *args: M.Logf(*args),x,obss)
print(pca.get_covariance())

plt.scatter(pca.transformed_Logs[:, 0], pca.transformed_Logs[:, 1])
plt.axis('equal')
plt.show()

In [None]:
# plot estimated density, 
newfig()
plot_density_estimate(M,obss,limits=[-np.pi,np.pi,0,np.pi],pts=100,alpha=.8,bandwidth=.15) # general ellipsoidal coordinates (note: very long computation time)
plt.savefig('Density_' + surface + '.pdf')
plt.show()

## Principal Components and Conditioned Processes

In [None]:
# Delyon/Hu guided process
from src.stochastics.guided_process import *

# Riemannian Brownian motion
# guide function
phi = lambda g,v: -(g-v)
x0 = M.element()
(Brownian_coords_guided,Brownian_coords_guidedf) = get_guided_likelihood(
    M, M.sde_Brownian_coords, phi,
    lambda x: theano.tensor.slinalg.Cholesky()(M.gsharp(x)),
    x0)

(ts,xs,log_likelihood,log_varphi) = Brownian_coords_guidedf(x,v,dWsf(M.dim.eval()))[:4]
print("log likelihood: ", log_likelihood[-1], ", log varphi: ", log_varphi[-1])

newfig()
M.plot()
M.plotx(xs)
M.plotx(v,color='k')
plt.show()

In [None]:
# HMC sampler
from src.statistics.HMC import *

def U(w,x,v):
    Cxv = T.sum(phi(x, v) ** 2)
    (_,_,log_likelihood,log_varphi) = Brownian_coords_guided(x,v,w)[:4]
    return -log_likelihood[-1]*log_varphi[-1]

# for numeric version
thetas = ()
ve = M.element()
(dU,dUf) = get_df(U,dWt,thetas,(x0,ve))
Uf = lambda *p: dUf(*p)[0]
dwUf = lambda *p: dUf(*p)[1]
dthetaUf = lambda *p: dUf(*p)[2:]

In [None]:
# run HMC
w = 0*dWsf(M.dim.eval())
K = 10
L = 4
epsilon = .2/L
ws = np.zeros((K,) + w.shape)
xss = np.zeros((K,) + xs.shape)
vals = np.zeros(K)
for k in range(K):
    (ts,xs,log_likelihood,log_varphi,w_guided) = Brownian_coords_guidedf(x,v,w)[:5]
    ws[k] = w_guided
    xss[k] = xs
    vals[k] = log_likelihood[-1]*log_varphi[-1]
    
    w = HMC_step_numeric(Uf, dwUf, lambda: dWsf(M.dim.eval()), epsilon, L, w, (x, v))
#     (accept,new_w) = HMC_stepf(w,epsilon,L)
#     if accept[0]:
#         w = new_w

# colors
cmap = cm.jet
cs = cmap(255*(vals-np.min(vals))/(np.max(vals)-np.min(vals)))

# plot result
newfig()
M.plot()
M.plotx(v,color='k')
for k in range(K):
    M.plotx(xss[k],color=cs[k])
plt.show()

# noise
plt.figure()
for k in range(K):
    plt.plot(ws[k][:,0],ws[k][:,1],color=cs[k])
plt.show()

In [None]:
# Delyon/Hu guided version of development Brownian motion

# guide function
phi = lambda u,v: T.tensordot(T.nlinalg.MatrixInverse()(u[M.dim:].reshape((M.dim,-1))),-(u[0:M.dim]-v).flatten(),(1,0))
u0 = M.FM_element()
(Brownian_development_guided,Brownian_development_guidedf) = get_guided_likelihood(
    M, M.sde_development, 
#     phi,
    lambda u,v: theano.gradient.disconnected_grad(phi(u,v)),
    lambda u: u[M.dim:].reshape((M.dim,-1)), 
    u0,
    A=lambda u, v, w: T.tensordot(v[0:M.dim], T.tensordot(theano.gradient.disconnected_grad(T.nlinalg.MatrixInverse()(T.tensordot(u[M.dim:].reshape((M.dim,-1)), u[M.dim:].reshape((M.dim,-1)), (1, 1)))), w[0:M.dim], 1), 1)
#     A=lambda u, v, w: T.tensordot(v[0:M.dim], T.tensordot(T.nlinalg.MatrixInverse()(T.tensordot(u[M.dim:].reshape((M.dim,-1)), u[M.dim:].reshape((M.dim,-1)), (1, 1))), w[0:M.dim], 1), 1)    
)

# # initial frame bundle element
# nu = np.linalg.cholesky(M.gsharpf(x))
# u = np.concatenate((x,nu.flatten()))

(ts,us,log_likelihood,log_varphi) = Brownian_development_guidedf(u,v,.2*dWsf(M.dim.eval()))[:4]
print("log likelihood: ", log_likelihood[-1], ", log varphi: ", log_varphi[-1])
xs = us[:,:M.dim.eval()]

newfig()
M.plot()
M.plotFMx(us)
M.plotx(v,color='k')
plt.savefig('Guided_FM_' + surface + '.pdf')
plt.show()

In [None]:
# Hamiltonian MCMC, development version
def U(w,u,v):
    Cuv = T.sum(phi(u, v) ** 2)
    (_,_,log_likelihood,log_varphi) = Brownian_development_guided(u,v,w)[:4]
    return -(-.5 * M.dim.eval() * T.log(2. * np.pi * Tend)
             -M.determinant(u[0:M.dim],u[M.dim:].reshape((M.dim,-1)))             
             -Cuv/(2.*Tend)
             +log_likelihood[-1]*log_varphi[-1])

# for numeric version
thetas = (u0,)
ve = M.element()
(dU,dUf) = get_df(U,dWt,thetas,(u0,ve))
Uf = lambda *p: dUf(*p)[0]
dwUf = lambda *p: dUf(*p)[1]
dthetaUf = lambda *p: dUf(*p)[2:]

In [None]:
# extract principal component from samples
def get_principal_component(ws,w_guideds,N_samplers=1):
    N_samples = w_guideds.shape[0]//N_samplers
    start = (N_samples//N_samplers)//2
    ws = ws.reshape((N_samplers,-1,ws.shape[1],ws.shape[2]))[:,start:,:,:].reshape((-1,ws.shape[1],ws.shape[2]))
    wsmean = np.mean(ws,axis=0)
    w_guideds = w_guideds.reshape((N_samplers,-1,w_guideds.shape[1],w_guideds.shape[2]))[:,start:,:,:].reshape((-1,w_guideds.shape[1],w_guideds.shape[2]))
    w_guidedsmean = np.mean(w_guideds,axis=0)
    w_guidedmean = w_guidedsmean[-1]
    (ts,usmean) = Brownian_development_guidedf(u,v,wsmean)[:2]
    return (w_guidedmean,w_guidedsmean,usmean)

In [None]:
# guess for initial path
def MPP(ws,u,v):
    (ts,us) = M.stochastic_development(u,ws)
    return T.square(ws).sum()/dt+2*1e1*T.square(us[-1][0:M.dim]-v).sum()

# for numeric version
ws = T.matrix()
ue = M.FM_element()
ve = M.element()
(dMPP,dMPPf) = get_df(MPP,ws,(),(ue,ve))
MPPf = lambda *p: dMPPf(*p)[0]
dwMPPf = lambda *p: dMPPf(*p)[1]

def MPP(u,v):
    scale = 1/n_steps.eval()
    def fopts(w):
        y = dMPPf(w.reshape(n_steps.eval(),-1)*scale,u,v)
        print(y[0])
        return (y[0],y[1].flatten())
    
    w = np.zeros(dWsf(M.dim.eval()).shape)
    for i in range(150):
        grad = dwMPPf(w,u,v)
#         print(MPPf(w,u,v),np.linalg.norm(grad))
        w -= .1*1e-4*grad

#     print("after grad descent: %f,%f" % (MPPf(w,u,v),np.linalg.norm(grad)))
#     res = minimize(fopts, np.zeros(dWsf(M.dim.eval()).shape), 
#                    method='BFGS', jac=True, options={'disp': True, 
#                                                     'maxiter': 50})
    return w#res.x.reshape((n_steps.eval(),-1))/scale

w = MPP(u,v)
(ts,us) = M.stochastic_developmentf(u,w)
# p = M.Log_FM(u,v)
# us = M.Exp_Hamiltonian_FMtf(u,p).T
# print(us)

# plot result
newfig()
M.plot()
M.plotx(v,color='k')
# M.plotx(us[:,:M.dim.eval()])
M.plotFMx(us,N_vec=n_steps.eval()//4)
plt.show()

In [None]:
# # covariance
# nu = np.dot(np.linalg.cholesky(M.gsharpf(x)),np.diag((.4,1.)))
# print(nu)
# u = np.concatenate((x,nu.flatten()))

# initial point, good guess
# w = 0*dWsf(M.dim.eval())
# w = .5*ws
# print("norm at w=0: %f" % np.linalg.norm(dwUf(w,u,v)))
# for i in range(20):
#     grad = dwUf(w,u,v)
# #     print(Uf(w,u,v),np.linalg.norm(grad))
#     w -= .005*grad
# print("norm after grad descent: %f" % np.linalg.norm(dwUf(w,u,v)))
# (ts,us) = Brownian_development_guidedf(u,v,w)[:2]

# w0 = .5*MPP(u,v)
# # plot result
# newfig()
# M.plot()
# M.plotx(v,color='k')
# # M.plotx(us[:,:M.dim.eval()])
# M.plotFMx(us,N_vec=n_steps.eval()//4)
# plt.show()

# parallel HMC samplers
def lHMC(u,pars):
    (v,seed,) = pars
    if seed:
        srng.seed(seed)
        
    N_samples = 128
    L = 8
    epsilon = .05/L
    maxRetries = 0

    w = .5*MPP(u,v)
    ws = np.zeros((N_samples,) + w.shape)
    w_guideds = np.zeros((N_samples,) + w.shape)
    xss = np.zeros((N_samples,n_steps.eval(),M.dim.eval()))
    vals = np.zeros(N_samples)
    prev_w = w
    for k in range(N_samples):
        done = False
        i = 0
        while not done:
            try:
                w = HMC_step_numeric(Uf, dwUf, lambda: dWsf(M.dim.eval()), epsilon, L, w, (u, v))

                (ts,us,log_likelihood,log_varphi,w_guided) = Brownian_development_guidedf(u,v,w)[:5]
                ws[k] = w
                w_guideds[k] = w_guided
                xss[k] = us[:,:M.dim.eval()]
                vals[k] = log_likelihood[-1]*log_varphi[-1]

                prev_w = w
                done = True
            except np.linalg.linalg.LinAlgError:
                i = i+1
                print("LinAlgError, retrying (%d of %d)" % (i,maxRetries))
                if i >= maxRetries:
                    print('backtracking')                    
                    w = prev_w
                    done = True
#                     raise
    (w_guidedmean,w_guidedsmean,usmean) = get_principal_component(ws,w_guideds)

    return (ws,w_guideds,xss,vals,w_guidedmean,w_guidedsmean,usmean)

# run HMC
N_samplers = 8
print("Running HMC with %d sampler(s)" % N_samplers)
try:
    mpu.openPool()
    sol = mpu.pool.imap(partial(lHMC,u),mpu.inputArgs(itertools.cycle((v,)),np.random.randint(1000,size=N_samplers)))
    res = list(sol)
    ws = mpu.getRes(res,0).reshape((-1,n_steps.eval(),M.dim.eval()))
    w_guideds = mpu.getRes(res,1).reshape((-1,n_steps.eval(),M.dim.eval()))
    xss = mpu.getRes(res,2).reshape((-1,n_steps.eval(),M.dim.eval()))
    vals = mpu.getRes(res,3).flatten()
    w_guidedmean = np.mean(mpu.getRes(res,4),axis=0)
    w_guidedsmean = np.mean(mpu.getRes(res,5),axis=0)
    usmean = np.mean(mpu.getRes(res,6),axis=0)    
except:
    mpu.closePool()
    raise
else:
    mpu.closePool()
print("HMC done, %d samples total" % vals.size)

# colors
cmap = cm.jet
cs = cmap(255*(vals-np.min(vals))/(np.max(vals)-np.min(vals)))

# # plot result
# newfig()
# M.plot()
# M.plotx(v,color='k')
# skip = np.max((1,K//K))
# for k in range(0,K,skip):
#     M.plotx(xss[k],color=cs[k])
# plt.show()

# # noise
# plt.figure()
# for k in range(0,K,skip):
#     plt.plot(w_guideds[k][:,0],w_guideds[k][:,1],'.-',color=cs[k])
# plt.show()

In [None]:
# # mean path (using development)
# wsmean = np.mean(ws,axis=0)
# (ts,usmean) = Brownian_development_guidedf(u,v,wsmean)[:2]
# w_guidedsmean = np.mean(w_guideds,axis=0)

start = 0
skip = np.max((1,(vals.size-start)//40))

# plot result
newfig()
M.plot()
for k in range(start,vals.size,skip):
    M.plotx(xss[k],color=cs[k])
# M.plotFMx(usmean,N_vec=n_steps.eval()//4)
M.plotx(usmean[:,0:M.dim.eval()],color='blue',linewidth=2)
M.plotFMx(u)    
M.plotx(M.Exptf(x,vv),linewidth = 1.5, color='r')
M.plotx(v,color='r',s=150)
plt.savefig('Conditioned_samples_' + surface + '.pdf')
plt.show()

vvu = np.linalg.solve(nu,vv)

# range
(xmin,ymin) = np.min(np.vstack((w_guideds.reshape(-1,M.dim.eval()),vvu)),axis=0)-1.
(xmax,ymax) = np.max(np.vstack((w_guideds.reshape(-1,M.dim.eval()),vvu)),axis=0)+1.

# noise
plt.figure()
for k in range(start,vals.size,skip):
    plt.plot(w_guideds[k][:,0],w_guideds[k][:,1],color=cs[k])
    plt.plot(w_guideds[k][-1,0],w_guideds[k][-1,1],'o',color='k',markersize=10)
plt.plot(w_guidedsmean[:,0],w_guidedsmean[:,1],'-',color='b',linewidth=4)
plt.plot([0.,vvu[0]],[0.,vvu[1]],'r-',linewidth=2)
# plt.axis((-.5,4.2,-2.5,.5))
plt.savefig('Conditioned_antidevelopment_' + surface + '.pdf')
plt.show()

# noise density
plt.figure()
plot_Euclidean_density_estimate(w_guideds.reshape((-1,2)),limits=(xmin,xmax,ymin,ymax))
plt.plot(w_guidedsmean[:,0],w_guidedsmean[:,1],'-',color='b',linewidth=1)
plt.plot([0.,vvu[0]],[0.,vvu[1]],'r-',linewidth=2)
# plt.axis((-.5,4.2,-2.5,.5))
plt.savefig('Conditioned_antidevelopment_density_' + surface + '.pdf')
plt.show()

newfig()
plot_Euclidean_density_estimate(w_guideds.reshape((-1,2)),view='3D',border=1.,alpha=.8)
plt.show()

# Euclideanization

In [None]:
# data plot
newfig()
M.plot()
plot_density_estimate(M,obss,limits=[-np.pi,np.pi,0,np.pi],pts=100,alpha=.2,bandwidth=.15) # general ellipsoidal coordinates (note: very long computation time)

# plot samples
M.plotFMx(u)
K = obss.shape[0]
for i in range(K):
    M.plotx(obss[i],color='k')
M.plotx(v)

# plot geodesic and mean curve
M.plotx(usmean[:,0:M.dim.eval()],color='k',linewidth=4)
M.plotFMx(u)    
M.plotx(M.Exptf(x,vv), color='r', linewidth=4)
M.plotx(v,color='b',s=50,linewidth=5)

plt.savefig('Data_geodesic_component_' + surface + '.pdf')
plt.show()

In [None]:
# compute anisotropic PCA view
# run HMC
N_samplers = 1
K = 128 #obss.shape[0]
batches = 2
print("Running HMC, %d observations per batch of %d, %d samplers" % (K,batches,N_samplers))
for J in range(batches):
    print("batch %d of %d" % (J,batches))
    try:
        mpu.openPool()
        sol = mpu.pool.imap(partial(lHMC,u),mpu.inputArgs(obss[J*K:(J+1)*K],np.random.randint(1000,size=K)))
        res = list(sol)
        if J == 0:
            w_guidedmean = mpu.getRes(res,4).reshape((-1,M.dim.eval()))
        else:
            w_guidedmean = np.vstack((w_guidedmean,mpu.getRes(res,4).reshape((-1,M.dim.eval()))))
    except:
        mpu.closePool()
        raise
    else:
        mpu.closePool()
print("HMC done, %d observations" % K)
print(w_guidedmean.shape)

In [None]:
# display in nu coordinates
tpca_comps = np.linalg.solve(M.orthFramef(x),pca.transformed_Logs.T).T
pca_comps = np.linalg.solve(M.orthFramef(x),np.tensordot(nu,w_guidedmean,(1,1))).T
tvvu = np.linalg.solve(M.orthFramef(x),np.tensordot(nu,vvu,(1,0)))
tw_guidedsmean = np.linalg.solve(M.orthFramef(x),np.tensordot(nu,w_guidedsmean,(1,1))).T
plt.scatter(tpca_comps[:, 0], tpca_comps[:, 1],color='r',marker='x')
plt.scatter(pca_comps[:, 0],pca_comps[:, 1],color='k')
plt.plot([0.,tvvu[0]],[0.,tvvu[1]],'r-',linewidth=5)
plt.scatter(tvvu[0], tvvu[1],color='r',marker='x')
plt.plot(tw_guidedsmean[:,0],tw_guidedsmean[:,1],'-',color='k',linewidth=5)
plt.scatter(tw_guidedsmean[-1,0],tw_guidedsmean[-1,1],color='k')
plt.ylim((-2.2,2.2))
plt.xlim((-3.5,3.5))
# plt.axis('equal')
plt.savefig('Tangent_vs_model_PCA_' + surface + '.pdf')
plt.show()

## MLE

In [None]:
# covariance
nu = np.dot(np.diag((.075,.4)),np.linalg.cholesky(M.gsharpf(x)))
print(nu)
u = np.concatenate((x,nu.flatten()))

thetas_true = (u,)

n_steps.set_value(100)

# sample data
# srng.seed(7163)
K = 4*22
obss = np.zeros((K,M.dim.eval()))
# srng.seed(422)
i = 0
while i < K:
    try:
        (ts,us) = M.stochastic_developmentf(u,dWsf(M.dim.eval()))
        obss[i] = us[-1][0:M.dim.eval()]
        i += 1
    except np.linalg.linalg.LinAlgError:
        pass

# plot samples
newfig()
M.plot()
M.plotFMx(u)
for i in range(K):
    M.plotx(obss[i])
plt.savefig('MLE_samples_' + surface + '.pdf')
plt.show()

In [None]:
options = {}
options['samples_per_obs'] = 1
options['epochs'] = 15
options['learning_rate'] = .97e-2
options['verbose'] = True
options['initial'] = [np.concatenate((x,.5*np.eye(M.dim.eval()).flatten()))] # [np.concatenate((x,np.diag((.1,.25)).flatten()))] # [np.concatenate((x,.5*np.eye(M.dim.eval()).flatten()))] # [.5*thetas_true[0]]
options['chain_sampler'] = True
options['chain_size'] = dWsf(M.dim.eval()).shape

In [None]:
%%time
# optimization
from src.statistics.mle import *

# np.random.seed(345)

def llog_p_T(thetas,pars):
    (v,seed,w) = pars
    if seed:
        srng.seed(seed)
    u = thetas[0]
    w = HMC_step_numeric(Uf, dwUf, lambda: dWsf(M.dim.eval()), epsilon, L, w, (u, v))
        
    maxRetries = 10
    done = False
    i = 0
    while not done:
        try:
            w = HMC_step_numeric(Uf, dwUf, lambda: dWsf(M.dim.eval()), epsilon, L, w, (u, v))
            res = dUf(w,u,v)            
            done = True
        except np.linalg.linalg.LinAlgError:
            i = i+1
            print("LinAlgError, retrying (%d of %d)" % (i,maxRetries))
            if i >= maxRetries:
                print('maxRetries reached')
                w = 0
    
    return (res[0],res[2],w)

def update_thetas(thetas, dthetas):
    u = thetas[0]
  
    u -= options['learning_rate']*dthetas[0]
#     u[M.dim.eval():] -= options['learning_rate']*dthetas[0][M.dim.eval():] # only diagonal covariance
#     u[M.dim.eval():] -= options['learning_rate']*(np.eye(M.dim.eval())*dthetas[0][M.dim.eval():].reshape((M.dim.eval(),-1))).flatten() # only covariance
#     u[M.dim.eval()+0] -= 2.5*options['learning_rate']*dthetas[0][M.dim.eval()+0]
#     u[M.dim.eval()+3] -= .25*options['learning_rate']*dthetas[0][M.dim.eval()+3]
    
    return (u,)

# run MLE
(thetas, log_likelihood, log_likelihoods, thetass) = iterative_mle(obss,llog_p_T,update_thetas,options)

In [None]:
# plot
plot_thetas_true = np.diag(np.tensordot(np.linalg.cholesky(M.gf(x)),thetas_true[0][M.dim.eval():].reshape((M.dim.eval(),-1)),(1,0)))
plot_thetass = np.zeros((thetass[0].shape[0]+1,M.dim.eval()))
plot_thetass[0] = np.diag(np.tensordot(np.linalg.cholesky(M.gf(x)),options['initial'][0][M.dim.eval():].reshape((M.dim.eval(),-1)),(1,0)))
for i in range(thetass[0].shape[0]):
    plot_thetass[i+1] = np.diag(np.tensordot(np.linalg.cholesky(M.gf(x)),thetass[0][i][M.dim.eval():].reshape((M.dim.eval(),-1)),(1,0)))
plt.plot(range(log_likelihoods.shape[0]),log_likelihoods)
plt.savefig('MLE_likelihood_' + surface + '.pdf')
plt.show()
plt.plot(range(log_likelihoods.shape[0]+1),plot_thetass.reshape((thetass[0].shape[0]+1,-1)))
plt.hlines(plot_thetas_true.flatten(),plt.xlim()[0],plt.xlim()[1],color='r')
lims = plt.axis(); plt.axis((lims[0],lims[1],0.,lims[3]))
plt.savefig('MLE_variance_' + surface + '.pdf')
plt.show()
None