In [1]:
import os
import sys
sys.path.insert(0, os.path.join(os.getcwd(), '..', '..'))
import plda
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Load Data.

In [2]:
# To minimize the size of this repository, I only provide 200 training and 100 testing images.
X_train = np.load('mnist_data/mnist_train_images.npy')
Y_train = np.load('mnist_data/mnist_train_labels.npy')
X_test = np.load('mnist_data/mnist_test_images.npy')
Y_test = np.load('/mnist_data/mnist_test_labels.npy')

FileNotFoundError: [Errno 2] No such file or directory: '/mnist_datamnist_test_labels.npy'

In [None]:
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

In [None]:
n_examples = 5
fig, ax_arr = plt.subplots(1, n_examples, figsize=(10, 2))

for x in range(n_examples):
    ax_arr[x].imshow(X_train[x].reshape(28, 28), cmap='gray')
    ax_arr[x].set_xticks([])
    ax_arr[x].set_yticks([])
plt.show()

# Optional control over Preprocessing with Principal Components Analysis

In [None]:
classifier = plda.Classifier()

# Use the 5 top principal components to reduce overfitting.
# This will preprocess training data from shape (200, 784) to (200, 5).
classifier.fit_model(X_train, Y_train, n_principal_components=5)

predictions, log_p_predictions = classifier.predict(X_test)

In [None]:
print('Accuracy: {}'.format((Y_test == predictions).mean()))

In [None]:
n_examples = 10
fig, ax_arr = plt.subplots(1, n_examples, figsize=(20, 2))

for x in range(n_examples):
    ax_arr[x].imshow(X_test[x].reshape(28, 28), cmap='gray')
    ax_arr[x].set_xticks([])
    ax_arr[x].set_yticks([])
    title = 'Prediction: {}'
    xlabel = 'Truth: {}'
    ax_arr[x].set_title(title.format(predictions[x]))
    ax_arr[x].set_xlabel(xlabel.format(Y_test[x]))
plt.show()

# Default setting uses as many Principal Components as possible.

In [None]:
classifier = plda.Classifier()

# Use as many principal components as possible.
classifier.fit_model(X_train, Y_train)

predictions, log_p_predictions = classifier.predict(X_test)

In [None]:
# Overfit due to curse of dimensionality:
#  ratio of avg. sample size to data dimension is very small
#  (about ~20 / 200 = ~.2).
print('Accuracy: {}'.format((Y_test == predictions).mean()))

In [None]:
n_examples = 10
fig, ax_arr = plt.subplots(1, n_examples, figsize=(20, 2))

for x in range(n_examples):
    ax_arr[x].imshow(X_test[x].reshape(28, 28), cmap='gray')
    ax_arr[x].set_xticks([])
    ax_arr[x].set_yticks([])
    title = 'Prediction: {}'
    xlabel = 'Truth: {}'
    ax_arr[x].set_title(title.format(predictions[x]))
    ax_arr[x].set_xlabel(xlabel.format(Y_test[x]))
plt.show()

# Getting PCA Preprocessing information

In [None]:
type(classifier.model.pca)

In [None]:
classifier.model.pca

In [None]:
classifier.model.pca.n_features_  # Original dimensionality.

In [None]:
classifier.model.pca.n_components  # Preprocessed dimensionality

# Parameters fitted via Maximum Likelihood of the Data

In [None]:
Psi = classifier.model.Psi
A = classifier.model.A
inv_A = classifier.model.inv_A
m = classifier.model.m

# Indices of the subspace used for classification.
relevant_U_dims = classifier.model.relevant_U_dims

# Prior Gaussian Parameters

In [None]:
classifier.model.prior_params.keys()

# Posterior Gaussian Parameters

In [None]:
# Categories in the training data.
classifier.model.posterior_params.keys()

In [None]:
# Parameters for a category.
classifier.model.posterior_params[0].keys()

# Posterior Predictive Gaussian Parameters

In [None]:
# Categories in the training data.
classifier.model.posterior_predictive_params.keys()

In [None]:
# Parameters for a category.
classifier.model.posterior_predictive_params[0].keys()