Basic setup

In [27]:
import numpy as np 
from scipy.optimize import fmin_l_bfgs_b
from sklearn.model_selection import train_test_split

In [30]:
X = np.loadtxt('x.txt')
y = np.loadtxt('y.txt')

The following functions are borrowed from the jupyter notebook provided for the 3F8 short lab.

In [35]:
# The logistic function

def logistic(x): return 1.0 / (1.0 + np.exp(-x))

In [28]:
##
# Function that expands a matrix of input features by adding a column equal to 1.
#
# Input:
#
# X: matrix of input features.
#
# Output: Matrix x_tilde with one additional constant column equal to 1 added.
#
def get_x_tilde(X): return np.concatenate((np.ones((X.shape[ 0 ], 1 )), X), 1)


In [26]:
##
# Function that replaces initial input features by evaluating Gaussian basis functions
# on a grid of points
#
# Inputs:
#
# l: hyper-parameter for the width of the Gaussian basis functions
# Z: location of the Gaussian basis functions
# X: points at which to evaluate the basis functions
#
# Output: Feature matrix with the evaluations of the Gaussian basis functions.
#

def evaluate_basis_functions(l, X, Z):
    X2 = np.sum(X**2, 1)
    Z2 = np.sum(Z**2, 1)
    ones_Z = np.ones(Z.shape[ 0 ])
    ones_X = np.ones(X.shape[ 0 ])
    r2 = np.outer(X2, ones_Z) - 2 * np.dot(X, Z.T) + np.outer(ones_X, Z2)
    return np.exp(-0.5 / l**2 * r2)

In [36]:
##
# Function that makes predictions with a logistic classifier
#
# Input:
#
# X_tile: matrix of input features (with a constant 1 appended to the left) 
#         for which to make predictions
# w: vector of model parameters
#
# Output: The predictions of the logistic classifier
#

def predict(X_tilde, w): return logistic(np.dot(X_tilde, w))

Code implementation of Bayesian logistic regression with Laplace approximation:

In [1]:
##
# Computes negative log of approximated Gaussian posterior
#
# Input:
#
# w: Vector of model parameters w
# X_tilde: 2d array with input features (augmented)
# y: 1d array with output class labels (0 or 1)
# S: Covariance matrix of Gaussian prior
# m0: mean of Gaussian prior
#
# Output: Vector of negative log of approximated Gaussian posterior
#
def neg_log_posterior(w, X_tilde, y, S, m0):
    k = predict(X_tilde, w)
    log_prior = -0.5 * np.dot(np.dot(w-m0, np.linalg.inv(S)), w-m0)
    ll = np.sum(y * np.log(k) + (1 - y) * np.log(1-k))

    return -(ll + log_prior)

In [43]:
# Computes negative gradient of approximated Gaussian posterior, needed for L_BFGS_B optimisation
#
# Input:
#
# w: Vector of model parameters w
# X_tilde: 2d array with input features (augmented)
# y: 1d array with output class labels (0 or 1)
# S: Covariance matrix of Gaussian prior
# m0: mean of Gaussian prior
# Output: Vector of negative gradient of approximated Gaussian posterior

def grad_neg_log_posterior(w, X_tilde, y, S, m0):
    k = predict(X_tilde, w)
    log_prior_grad = - np.dot(np.linalg.inv(S), w) + np.dot(np.linalg.inv(S), m_0)
    ll_grad = X_tilde.T @ (y - k)
    
    return -(log_prior_grad + ll_grad)


In [61]:
# Computes Hessian matrix (not inverse) of likelihood, needed for defining the Gaussian approximation
#
# Input:
# w: Vector of model parameters w
# X_tilde: 2d array with input features (augmented)
# y: 1d array with output class labels (0 or 1)
# S: Covariance matrix of Gaussian prior
# m0: mean of Gaussian prior
#
# Output: Hessian matrix of likelihood

def hessian(w, X_tilde, y, S, m0):
    k = predict(X_tilde, w)
    h = np.linalg.inv(S)
    for x in X_tilde:
        k = predict(x, w)
        h += np.outer(x, x) * k * (1 - k)
    return h

In [54]:
def calculate_w_map(X_tilde, y, S, m0):
    w0 = np.zeros(X_tilde.shape[1])
    w_map, _, _ = fmin_l_bfgs_b(neg_log_posterior, w0, grad_neg_log_posterior, args=(X_tilde, y, S, m0))
    return w_map

In [53]:
# Function that performs Laplace approximation for Bayesian logistic regression
#
# Input:
# X_tilde: 2d array with input features (augmented)
# y: 1d array with output class labels (0 or 1)
# S: Covariance matrix of Gaussian prior
# m0: mean of Gaussian prior
#
# Output:
# log_evidence: Log model evidence using Laplace approximation
# w_map: MAP solution for model weights

def laplace_approx(X_tilde, y, S, m0):
    D = X_tilde.shape[1]
    w0 = np.zeros(X_tilde_train.shape[1])
    w_map, _, _ = fmin_l_bfgs_b(neg_log_posterior, w0, fprime=grad_neg_log_posterior, args=(X_tilde, y, S, m0))

    H = hessian(w_map, X_tilde, y, S, m0)
    S_N = np.linalg.inv(H) # Covariance matrix of Laplace approximation

    # Compute model evidence using approximation
    log_det_H = np.linalg.slogdet(H)[1]
    log_posterior = -neg_log_posterior(w_map, X_tilde, y, S, m0)
    log_evidence = log_posterior - 0.5 * log_det_H - (D / 2) * np.log(2 * np.pi)

    return w_map, S_N, log_evidence

In [17]:
##
# Computes the kappa value for a given sigma
#
# Input:
# var: Variance
#
# Output: kappa value

def kappa(var):
    return (1 + np.pi * var / 8)**0.5

In [20]:
##
# Bayesian classifier using Laplace approximation
#
# Input:
# X_train: 2d array with training input features (augmented)
# X_test: 2d array with test input features (augmented)
# y_train: 1d array with training output class labels (0 or 1)
# S: Covariance matrix of Gaussian prior
# m0: mean of Gaussian prior
#
# Output:
# Predicted probabilities for the test set
# log_evidence: Log model evidence using Laplace approximation

def bayesian_classifier(X_train, X_test, y_train, S, m0):
    w_map, S_N, log_evidence = laplace_approx(X_train, y_train, S, m0)
    pred_mean = X_test @ w_map
    pred_var = np.diag(X_test @ S_N @ X_test.T)
    return logistic(pred_mean / kappa(pred_var)), log_evidence

Example usage of the Bayesian classifier above defined:

In [49]:
# Split data into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# RBF
l=0.1 # Set RBF lengthscale
X_tilde_train = get_x_tilde(evaluate_basis_functions(l, X_train, X_train))
X_tilde_test = get_x_tilde(evaluate_basis_functions(l, X_test, X_train))

# Set prior parameters
m_0 = np.zeros(X_tilde_train.shape[1])
sigma_0 = 1
S = sigma_0 * np.eye(X_tilde_train.shape[1])

In [64]:
# Implement Bayesian classifier
pred, log_evidence = bayesian_classifier(X_tilde_train, X_tilde_test, y_train, S, m_0)