In [27]:
import pickle
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [28]:
# Load the data from the MNIST digits data Pickle File.
with open("data/mnist_small.pkl", "rb") as file:
    data = pickle.load(file)

In [29]:
# Separating features and labels.
features, labels = data["X"], data["Y"]

# Assigning color code to each digit based on its value.
colors = ["violet", "indigo", "blue", "cyan", "green", "yellow", "orange", "red", "pink", "gray"]
label_colors = []
for label in labels:
    label_colors.append(colors[label[0]])

In [30]:
# Perform the Pricipal Component Analysis.
pca_output = PCA(n_components=2).fit_transform(features)

In [31]:
# Perform the tSNE projection.
tsne_output = TSNE(n_components=2, random_state=100).fit_transform(features)

In [32]:
# Helper function for visualising the plots.
def plot(data, path, label_colors):
    x, y = zip(*data)
    plt.scatter(x, y, c=label_colors, marker="x")
    plt.xlabel("First component of the projected features in 2-Dimensions")
    plt.ylabel("Second component of the projected features in 2-Dimensions")
    plt.title("Scatter Plot for MNIST digits based on dimensionality reduction")
    plt.savefig(path)
    plt.close()

# Visualising the results of the PCA scatter plot.
plot(pca_output, "PCA_Output.png", label_colors)
# Visualising the results of the tSNE scatter plot.
plot(tsne_output, "tSNE_Output.png", label_colors)