In [15]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from mnist import MNIST
from matplotlib import offsetbox
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


In [16]:
mndata = MNIST('Samples')

Tr_images, Tr_labels = mndata.load_training()

Tr_images = np.array(Tr_images)
Tr_labels = np.array(Tr_labels)
Te_images, Te_labels = mndata.load_testing()
Te_images = np.array(Te_images)
Te_labels = np.array(Te_labels)

In [None]:
# Scale and visualize the embedding vectors
def plot_embedding(X, title=None):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)
    
    fig = plt.figure(figsize=(28,28))
    ax = fig.add_subplot(1, 1, 1)
    for i in range(60000):
        plt.text(X[i, 0], X[i, 1], str(Tr_labels[i]),
                 color=plt.cm.Set1(Tr_labels[i] / 10.),
                 fontdict={'weight': 'bold', 'size': 9})
        
        
    if hasattr(offsetbox, 'AnnotationBbox'):
        # only print thumbnails with matplotlib > 1.0
        shown_images = np.array([[1., 1.]])  # just something big
        for i in range(60000):
            dist = np.sum((X[i] - shown_images) ** 2, 1)
            if np.min(dist) < 4e-3:
                # don't show points that are too close
                continue
            shown_images = np.r_[shown_images, [X[i]]]
            imagebox = offsetbox.AnnotationBbox(
                offsetbox.OffsetImage(np.reshape(Tr_images[i],(28,28)), cmap=plt.cm.gray_r),
                X[i])
            ax.add_artist(imagebox)
    plt.xticks([]), plt.yticks([])
    if title is not None:
        plt.title(title)
    plt.show()


# Projection on to the first 2 principal components

print("Computing PCA projection")
pca = PCA(n_components=2)
Train_pca = pca.fit(Tr_images).transform(Tr_images)
Test_pca = pca.fit(Te_images).transform(Te_images)
plot_embedding(Train_pca,
               "Principal Components projection of the digits")


print("Computing Linear Discriminant Analysis projection")
lda = LinearDiscriminantAnalysis(n_components=2)
Train_lda = lda.fit(Tr_images, Tr_labels).transform(Tr_images)
Test_lda = lda.fit(Te_images, Te_labels).transform(Te_images)
plot_embedding(Train_lda,
               "Linear Discriminant projection of the digits")

In [None]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 200, criterion ='entropy', random_state = 0)
classifierp = RandomForestClassifier(n_estimators = 200, criterion ='entropy', random_state = 0)
classifierl = RandomForestClassifier(n_estimators = 200, criterion ='entropy', random_state = 0)
classifier.fit(Tr_images,Tr_labels)
classifierp.fit(Train_pca,Tr_labels)
classifierl.fit(Train_lda,Tr_labels)
pred = classifier.predict(Te_images)
pca_pre = classifierp.predict(Test_pca)
lda_pre = classifierl.predict(Test_lda)



In [None]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(Te_labels, pred)
accp = accuracy_score(Te_labels, pca_pre)
accl = accuracy_score(Te_labels, lda_pre)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Te_labels,pred)
cmp = confusion_matrix(Te_labels,pca_pre)
cml = confusion_matrix(Te_labels,lda_pre)

In [None]:
acc

In [None]:
accp

In [None]:
accl