# **This example demonstrates how to perform Bayesian logistic regression on Iris flower dataset.**
1. Perform one round of regular logistic regression to generate prior
2. Perform Bayesian logistic regression

# **Load data and prepare training dataset X and y**




In [17]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from matplotlib.colors import ListedColormap

iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

# use the first two inputs with class labels 0 or 1
X = X[y<2, :2] # X should be 100X2
y = y[y<2]

# Prepare the input for testing or applicaiton data
h = 0.02  # step size in the mesh
x1_min, x1_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
x2_min, x2_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, h), np.arange(x2_min, x2_max, h))

# Prepare the design matrix for input of testing data. 31635X3, each row in Phi_test is phi.T in the slide
Phi_test = np.

# **Perform logistic regression with L2 regularization to obtain the MAP of mN (Slide 63).**

In [18]:
# Logistic Regression with L2 penalty is equivalent to MAP on omega with m0 = 0, S0 = 1/alpha * I. See why in Slide 63
# Estimate the prior for w coefficients w~N(m0,N0)
# S0 can be an identity matrix scaled by alpha
alpha = 2
S0 = # an identity matrix scaled by alpha. X size 100X1 We have w0, w1, w2 three parameters.
# m0 is usually zero mean. But we do not need it in the code!
# We just need to perform a logistic regression with L2 regularization to obtain prior for the Bayesian estimation
classifier =  #if alpha=1, it is default logistic regression

# get the MAP of omega
w0 =  # intercept from the previous results fitted
w1 =


m_N = #shape (3,)




# **Perform Bayesian Logistic Regression based on Slides 66-67, 70**


First, we need to run Laplace approximation to estimate the posterior of w ~ N(m_N, A^-1)

In [None]:
def logistic_function(z):
    return 1.0 / (1.0 + np.exp(-z))

# Design matrix Phi- based on training data. The matrix of input features (with an extra column of ones to account
# for the intercept term) for the samples where y < 1
Phi = # 50X3 matrix. Essentially, each row in Phi is phi.T in the slide
Phi =  # extract label 0 class along the rows

# Compute the scalar value for each row in Phi
R_nn =  #shape (50,)

# Since R is a 1D array, we reshape it to a 2D column vector for the following operations
R_nn =  #50X1 shape

# Compute the Hessian of the negative log posterior
# Scale each row in Phi by the corresponding value in R
R = # Use R_nn to create a diagonal matrix
A =

# Invert the Hessian to get the covariance matrix of the Laplace approximation
S_N =

print("m_N (MAP estimate):", )
print("S_N (Covariance matrix of the Laplace approximation):", )




Next, we use the posterior of w estimated by Laplace approximation to estimate the preditive distribution of class for the inputs of testing/application data xx1, xx2

In [None]:
from scipy.stats import norm

# Compute the predictive probabilities for the test set using slide 71
a_values = # should include 31635 elements
# Compute b. Note phi is one row in Phi_test, which is the transpose version of the phi in the slide
b_values = # should includ 31635 elements in an array

Z_values =

# Reshape Z to the shape of the meshgrid for contour plotting
Z = Z_values.reshape(xx1.shape)

# Plot
cm = plt.cm.RdBu
ax = plt.subplot(1,1,1)
ax.set_xlim(xx1.min(), xx1.max())
ax.set_ylim(xx2.min(), xx2.max())
ax.set_xticks(())
ax.set_yticks(())
ax.contourf(, cmap=cm, alpha=0.8)

ax.scatter(X[y==0, 0], X[y==0, 1], c='tab:red', edgecolor='k', label=iris.target_names[0])
ax.scatter(X[y==1, 0], X[y==1, 1], c='tab:blue', edgecolor='k', label=iris.target_names[1])

plt.xlabel("Sepal length")
plt.ylabel("Sepal width")
plt.title('Bayesian Logistic Regression')
plt.legend()
plt.show()