Boston Housing Dataset Test
===========================

In this notebook we test revrand's ARD basis functions on the Boston housing dataset.

In [1]:
import numpy as np
from scipy.stats import gamma

from sklearn.datasets import load_boston
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score

from revrand import StandardLinearModel
import revrand.basis_functions as bf
from revrand.metrics import smse, msll
from revrand.btypes import Parameter, Positive, Bound

# Log output to the terminal attached to this notebook
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")


In [2]:
# Load the data
boston = load_boston()
X = boston.data
y = boston.target - boston.target.mean()

N, D = X.shape
#lenscale = 10.
nbases = 50
#lenARD = lenscale * np.ones(D)
lenARD = gamma(10, scale=10)

In [3]:
# Construct basis functions
base = bf.RandomMatern32(Xdim=D, nbases=nbases, lenscale_init=Parameter(lenARD, Positive(), shape=(D,))) + \
    bf.LinearBasis(onescol=True)
#base = bf.spectralmixture(Xdim=D, bases_per_component=5, lenscales_init=[Parameter(l, Positive()) for l in lenARD]) + \
#    bf.LinearBasis(onescol=True)
#base = bf.SigmoidalBasis(centres=X[np.random.choice(N, 10), :], lenscale_init=Parameter(lenARD, Positive())) + \
#    bf.LinearBasis(onescol=True)
#base = bf.FastFoodRBF(Xdim=D, nbases=nbases, lenscale_init=Parameter(lenARD, Positive())) + \
#    bf.LinearBasis(onescol=True)
    

In [4]:
# Cross val
folds = 5
av_smse, av_msll, av_r2 = 0., 0., 0.

slm = StandardLinearModel(base)
foldgen = KFold(n_splits=folds, shuffle=True)

for i, (tr_ind, ts_ind) in enumerate(foldgen.split(X)):
    
    # Training
    slm.fit(X[tr_ind], y[tr_ind])
    
    # Prediction
    Ey, Vy = slm.predict_moments(X[ts_ind])

    # Validation
    f_smse = smse(y[ts_ind], Ey)
    f_msll = msll(y[ts_ind], Ey, Vy, y[tr_ind])
    f_r2 = r2_score(y[ts_ind], Ey)
    
    av_smse += f_smse
    av_msll += f_msll
    av_r2 += f_r2
    
    
    print("Fold: {},\n\trevrand: R2 = {}, SMSE = {}, MSLL = {}".format(i, f_r2, f_smse, f_msll))
    
av_smse /= folds
av_msll /= folds
av_r2 /= folds

Fold: 0,
	revrand: R2 = 0.8604037586778012, SMSE = 0.13959624132219886, MSLL = -1.113792368265064
Fold: 1,
	revrand: R2 = 0.8479839492556478, SMSE = 0.15201605074435215, MSLL = -1.0179448783492322
Fold: 2,
	revrand: R2 = 0.8556414761339752, SMSE = 0.14435852386602477, MSLL = -0.9970271556253443
Fold: 3,
	revrand: R2 = 0.7527360305165767, SMSE = 0.24726396948342333, MSLL = -0.7163839307922802
Fold: 4,
	revrand: R2 = 0.7594975366690915, SMSE = 0.24050246333090855, MSLL = -0.2735388509449056


In [5]:
# Print results
print("Final:\n\trevrand: R2 = {}, SMSE = {}, MSLL = {}".format(av_r2, av_smse, av_msll))


Final:
	revrand: R2 = 0.8152525502506183, SMSE = 0.18474744974938154, MSLL = -0.8237374367953653
