Boston Housing Dataset Test
===========================

In this notebook we test revrand's ARD basis functions on the Boston housing dataset.

In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.cross_validation import KFold

import revrand.basis_functions as bf
from revrand.metrics import smse, msll
from revrand.slm import learn, predict
from revrand.btypes import Parameter, Positive, Bound

# Log output to the terminal attached to this notebook
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")


In [2]:
# Load the data
boston = load_boston()
X = boston.data
y = boston.target - boston.target.mean()

N, D = X.shape
lenscale = 10.
nbases = 200
lenARD = lenscale * np.ones(D)


In [3]:
# Construct basis functions
base = bf.RandomMatern32(Xdim=D, nbases=nbases, lenscale_init=Parameter(lenARD, Positive())) + \
    bf.LinearBasis(onescol=True)
#base = bf.spectralmixture(Xdim=D, bases_per_component=5, lenscales_init=[Parameter(l, Positive()) for l in lenARD]) + \
#    bf.LinearBasis(onescol=True)
#base = bf.SigmoidalBasis(centres=X[np.random.choice(N, 10), :], lenscale_init=Parameter(lenARD, Positive())) + \
#    bf.LinearBasis(onescol=True)
#base = bf.FastFoodRBF(Xdim=D, nbases=nbases, lenscale_init=Parameter(lenARD, Positive())) + \
#    bf.LinearBasis(onescol=True)
    

In [4]:
# Cross val
folds = 5
av_smse, av_msll = 0., 0.
av_smse_gp, av_msll_gp = 0., 0

for i, (tr_ind, ts_ind) in enumerate(KFold(len(y), n_folds=folds, shuffle=True)):
    
    # Training
    params = learn(X[tr_ind], y[tr_ind], base)
    
    # Prediction
    Ey, Vf, Vy = predict(X[ts_ind], base, *params)

    # Validation
    f_smse, f_msll = smse(y[ts_ind], Ey), msll(y[ts_ind], Ey, Vy, y[tr_ind])

    av_smse += f_smse
    av_msll += f_msll
    
    print("Fold: {},\n\trevrand: SMSE = {}, MSLL = {}".format(i, f_smse, f_msll))
    
av_smse /= folds
av_msll /= folds


Fold: 0,
	revrand: SMSE = 0.18841881037678757, MSLL = -0.7647662838713496
Fold: 1,
	revrand: SMSE = 0.30787083891819134, MSLL = -0.48176210061086716
Fold: 2,
	revrand: SMSE = 0.2538624993036456, MSLL = -0.7362161127359302


  for xi, pos in zip(x, ispos)])
  for lxi, gi, pos in zip(logx, g, ispos)])
  for lxi, gi, pos in zip(logx, g, ispos)])


Fold: 3,
	revrand: SMSE = 0.13717082494172678, MSLL = -1.0092322929288136


  dWX = np.outer(X[:, i], - self.W[i, :] / l**2)


Fold: 4,
	revrand: SMSE = 0.33261668579433284, MSLL = -0.36967354439035344


In [7]:
# Print results
print("Final:\n\trevrand: SMSE = {}, MSLL = {}".format(av_smse, av_msll))


Final:
	revrand: SMSE = 0.24398793186693682, MSLL = -0.6723300669074628
