Boston Housing Dataset Test
===========================

In this notebook we test ravrand's ARD basis functions on the Boston housing dataset.

In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.cross_validation import KFold

import revrand.basis_functions as bf
from revrand.metrics import smse, msll
from revrand.slm import learn, predict
from revrand.btypes import Parameter, Positive, Bound

import revrand.legacygp as gp
import revrand.legacygp.kernels as kern

# Log output to the terminal attached to this notebook
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")


In [2]:
# Load the data
boston = load_boston()
X = boston.data
y = boston.target - boston.target.mean()

N, D = X.shape
lenscale = 10.
nbases = 200
lenARD = lenscale * np.ones(D)


In [3]:
# Construct basis functions
base = bf.RandomMatern32(Xdim=D, nbases=nbases, lenscale_init=Parameter(lenARD, Positive())) + \
    bf.LinearBasis(onescol=True)
#base = bf.SigmoidalBasis(centres=X[np.random.choice(N, 10), :], lenscale_init=Parameter(lenARD, Positive())) + \
#    bf.LinearBasis(onescol=True)
#base = bf.FastFoodRBF(Xdim=D, nbases=nbases, lenscale_init=Parameter(lenARD, Positive())) + \
#    bf.LinearBasis(onescol=True)
    

In [4]:
# Construct a GP kernel to compare to
def kdef(h, k):
    return (h(1e-5, 10., 0.5) * k(kern.matern3on2, [h(1e-5, 1e5, l) for l in lenARD]) +
            k(kern.lognoise, h(-4, 1, 0)))


In [5]:
# Cross val
folds = 5
av_smse, av_msll = 0., 0.
av_smse_gp, av_msll_gp = 0., 0

for i, (tr_ind, ts_ind) in enumerate(KFold(len(y), n_folds=folds, shuffle=True)):
    
    # Training
    params = learn(X[tr_ind], y[tr_ind], base)
     
    #hyper_params = gp.learn(X[tr_ind], y[tr_ind], kdef, verbose=False, ftol=1e-15)
    #regressor = gp.condition(X[tr_ind], y[tr_ind], kdef, hyper_params)
    
    # Prediction
    Ey, Vf, Vy = predict(X[ts_ind], base, *params)
    
    #query = gp.query(regressor, X[ts_ind])
    #Ey_gp = gp.mean(query)
    #Vy_gp = gp.variance(query, noise=True)

    
    # Validation
    f_smse, f_msll = smse(y[ts_ind], Ey), msll(y[ts_ind], Ey, Vy, y[tr_ind])
    #f_smse_gp, f_msll_gp = smse(y[ts_ind], Ey_gp), msll(y[ts_ind], Ey_gp, Vy_gp, y[tr_ind])

    av_smse += f_smse
    av_msll += f_msll
    #av_smse_gp += f_smse_gp
    #av_msll_gp += f_msll_gp
    
    print("Fold: {},\n\trevrand: SMSE = {}, MSLL = {}"
          #"\n\tGP: SMSE = {}, MSLL = {}"
          .format(i, f_smse, f_msll))#, f_smse_gp, f_smse_gp))
    
av_smse /= folds
av_msll /= folds
#av_smse_gp /= folds
#av_msll_gp /= folds

Fold: 0,
	revrand: SMSE = 0.28239808889711826, MSLL = -0.6501849375034063
Fold: 1,
	revrand: SMSE = 0.33226267948199495, MSLL = -0.39085857963926146
Fold: 2,
	revrand: SMSE = 0.3300863661903386, MSLL = -0.4351833254704734
Fold: 3,
	revrand: SMSE = 0.2525967498573402, MSLL = -0.6634710722416051


  for xi, pos in zip(x, ispos)])


Fold: 4,
	revrand: SMSE = 0.17945283234099624, MSLL = -0.8270982530420484


  for lxi, gi, pos in zip(logx, g, ispos)])
  for lxi, gi, pos in zip(logx, g, ispos)])


In [6]:
# Print results
print("Final:\n\trevrand: SMSE = {}, MSLL = {}"
      #"\n\tGP: SMSE = {}, MSLL = {}"
      .format(av_smse, av_msll))#, av_smse_gp, av_msll_gp))


Final:
	revrand: SMSE = 0.27535934335355766, MSLL = -0.5933592335793589
