In [219]:
import numpy as np
import matplotlib.pyplot as plt


In [220]:
# Constants
SIGMA_SQUARED_01 = 1.0    # Variance of prior (0/1)
EPSILON_01 = 0.01         # Learning rate (0/1)

SIGMA_SQUARED_68 = 1.0     # Variance of prior (6/8)
EPSILON_68 = 0.01          # Learning rate (6/8)

In [221]:
def load_mnist_data(folder, digits):
    """
    Load MNIST data from text files and filter for specific digits.

    Parameters:
    - folder (str): Path to the folder containing MNIST data files.
    - digits (list): List of digits to filter (e.g., [0, 1]).

    Returns:
    - X_train (np.ndarray): Training data features.
    - y_train (np.ndarray): Training data labels.
    - X_test (np.ndarray): Test data features.
    - y_test (np.ndarray): Test data labels.
    """
    # Construct file paths
    trainX_path = f'{folder}/trainX.txt'
    trainY_path = f'{folder}/trainY.txt'
    testX_path = f'{folder}/testX.txt'
    testY_path = f'{folder}/testY.txt'

    # Load data from text files with the correct delimiter
    X_train = np.loadtxt(trainX_path, delimiter=',')
    y_train = np.loadtxt(trainY_path, delimiter=',').astype(int)
    X_test = np.loadtxt(testX_path, delimiter=',')
    y_test = np.loadtxt(testY_path, delimiter=',').astype(int)

    # Filter for specified digits
    train_filter = np.isin(y_train, digits)
    test_filter = np.isin(y_test, digits)
    X_train = X_train[train_filter]
    y_train = y_train[train_filter]
    X_test = X_test[test_filter]
    y_test = y_test[test_filter]

    # Adjust labels to 0 and 1
    y_train = np.where(y_train == digits[0], 0, 1)
    y_test = np.where(y_test == digits[0], 0, 1)

    # Add bias term (column of ones)
    X_train = np.hstack([np.ones((X_train.shape[0], 1)), X_train])
    X_test = np.hstack([np.ones((X_test.shape[0], 1)), X_test])

    return X_train, y_train, X_test, y_test


In [222]:
def sigmoid(z):
    """
    Compute the sigmoid function.

    Parameters:
    - z (np.ndarray): Input array.

    Returns:
    - np.ndarray: Sigmoid of the input.
    """
    return 1 / (1 + np.exp(-z))


In [223]:
def compute_gradient(X, y, beta, sigma_squared):
    """
    Compute the gradient of the loss function U(beta).

    Parameters:
    - X (np.ndarray): Feature matrix.
    - y (np.ndarray): Labels vector.
    - beta (np.ndarray): Coefficient vector.
    - sigma_squared (float): Variance of the prior distribution.

    Returns:
    - np.ndarray: Gradient vector.
    """
    predictions = sigmoid(X @ beta)
    error = predictions - y
    gradient = X.T @ error + beta / sigma_squared
    return gradient


In [224]:
def compute_loss(X, y, beta, sigma_squared):
    """
    Compute the loss function U(beta).

    Parameters:
    - X (np.ndarray): Feature matrix.
    - y (np.ndarray): Labels vector.
    - beta (np.ndarray): Coefficient vector.
    - sigma_squared (float): Variance of the prior distribution.

    Returns:
    - float: Loss value.
    """
    predictions = sigmoid(X @ beta)
    # Avoid log(0) by adding a small epsilon
    epsilon = 1e-15
    likelihood = -np.sum(y * np.log(predictions + epsilon) + (1 - y) * np.log(1 - predictions + epsilon))
    prior = (1 / (2 * sigma_squared)) * np.sum(beta ** 2)
    loss = likelihood + prior
    return loss


In [225]:
def map_logistic_regression(X, y, sigma_squared, epsilon, max_iter=5000, tol=1e-6):
    """
    Perform MAP estimation for logistic regression using gradient descent.

    Parameters:
    - X (np.ndarray): Feature matrix.
    - y (np.ndarray): Labels vector.
    - sigma_squared (float): Variance of the prior distribution.
    - epsilon (float): Learning rate (step size).
    - max_iter (int): Maximum number of iterations.
    - tol (float): Tolerance for convergence.

    Returns:
    - beta (np.ndarray): Estimated coefficients.
    """
    beta = np.zeros(X.shape[1])
    for iteration in range(max_iter):
        gradient = compute_gradient(X, y, beta, sigma_squared)
        beta_new = beta - epsilon * gradient
        
        # Check for convergence
        if np.linalg.norm(beta_new - beta, ord=1) < tol:
            loss = compute_loss(X, y, beta, sigma_squared)
            print(f'Converged after {iteration + 1} iterations, Loss: {loss:.4f}.')
            break

        beta = beta_new

    return beta


In [226]:
def predict(X, beta):
    """
    Predict class labels for given data and coefficients.

    Parameters:
    - X (np.ndarray): Feature matrix.
    - beta (np.ndarray): Coefficient vector.

    Returns:
    - np.ndarray: Predicted labels (0 or 1).
    """
    probabilities = sigmoid(X @ beta)
    return (probabilities >= 0.5).astype(int)


In [227]:
def evaluate_model(y_true, y_pred):
    """
    Evaluate the model using zero-one loss (average error rate).

    Parameters:
    - y_true (np.ndarray): True labels.
    - y_pred (np.ndarray): Predicted labels.

    Returns:
    - float: Average error rate.
    """
    error_rate = np.mean(np.abs(y_true - y_pred))
    return error_rate


In [228]:
def run_experiment(folder, digits, sigma_squared, epsilon):
    """
    Run the MAP logistic regression experiment for specified digits.

    Parameters:
    - folder (str): Path to the folder containing MNIST data files.
    - digits (list): Digits to classify (e.g., [0, 1]).
    - sigma_squared (float): Variance of the prior distribution.
    - epsilon (float): Learning rate (step size).
    """
    print(f'\nRunning experiment for digits {digits[0]} and {digits[1]}')

    # Load and preprocess data
    X_train, y_train, X_test, y_test = load_mnist_data(folder, digits)

    # Train the model using MAP estimation
    beta = map_logistic_regression(X_train, y_train, sigma_squared, epsilon)

    # Predict on test data
    y_pred = predict(X_test, beta)

    # Evaluate the model
    error_rate = evaluate_model(y_test, y_pred)
    print(f'Average Error Rate (Zero-One Loss): {error_rate:.4f}')
    print(f'Parameters used: sigma_squared={sigma_squared}, epsilon={epsilon}')


In [229]:
def main():
    # Path to the data folder
    data_folder = 'data-mnist'

    # Experiment for digits 0 and 1
    run_experiment(folder=data_folder, digits=[0, 1], sigma_squared=SIGMA_SQUARED_01, epsilon=EPSILON_01)

    # Experiment for digits 6 and 8
    run_experiment(folder=data_folder, digits=[6, 8], sigma_squared=SIGMA_SQUARED_68, epsilon=EPSILON_68)

if __name__ == "__main__":
    main()


Running experiment for digits 0 and 1
Converged after 935 iterations, Loss: 2.0343.
Average Error Rate (Zero-One Loss): 0.0000
Parameters used: sigma_squared=1.0, epsilon=0.01

Running experiment for digits 6 and 8
Converged after 992 iterations, Loss: 5.5752.
Average Error Rate (Zero-One Loss): 0.0179
Parameters used: sigma_squared=1.0, epsilon=0.01
