Using PCA in Image classification

In [None]:
from keras.datasets import fashion_mnist
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix
import time

In [None]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

In [None]:
# Reshape the data to create the features columns as pixel values in every image
d1, d2, d3 = X_train.shape
X_train = X_train.reshape(d1, d2*d3)
d1, d2, d3 = X_test.shape
X_test = X_test.reshape(d1, d2*d3)

In [None]:
# Classification model with pixels as features
start_time = time.time()
grad_boost_model = XGBClassifier()
hist = grad_boost_model.fit(X_train,y_train)
pred = grad_boost_model.predict(X_test)
print("--- %s seconds ---" % (time.time() - start_time))

--- 972.5711069107056 seconds ---


In [None]:
# Model evaluation in test data using Confusion matrix
confusion_matrix(y_test, pred)

array([[831,   0,   9,  43,   8,   1,  94,   0,  14,   0],
       [  5, 958,   4,  24,   3,   0,   4,   0,   2,   0],
       [ 15,   1, 761,   9, 138,   0,  70,   0,   6,   0],
       [ 25,   6,  12, 894,  28,   0,  33,   0,   2,   0],
       [  0,   1, 110,  37, 780,   0,  70,   0,   2,   0],
       [  0,   0,   0,   1,   0, 940,   0,  37,   2,  20],
       [157,   1, 124,  33,  92,   0, 575,   0,  18,   0],
       [  0,   0,   0,   0,   0,  14,   0, 935,   0,  51],
       [  0,   1,   9,   5,   6,   2,  11,   4, 960,   2],
       [  0,   0,   0,   1,   0,   7,   1,  42,   1, 948]])

In [None]:
# Full model accuracy
accuracy_score(y_test, pred)

0.8582

In [None]:
start_time = time.time()
# Carry out Principal components on the feature dataset and examine classification with a smaller feature set
# Define a PCA object with some 50 features as reduced dimension set
princ_comp_model = PCA(n_components=50)
princ_comp_model.fit(X_train)
# Obtain a smaller dimension dataset with 50 columns (each for a component)
X_train2 = princ_comp_model.transform(X_train)
print(sum(princ_comp_model.explained_variance_ratio_))
# Define a XGBClassifier to build a model on reduced dimension X_train2
grad_boost_model2 = XGBClassifier()
hist2 = grad_boost_model2.fit(X_train2,y_train)
# Transform test dataset to reduced dimension dataset(50 components)
X_test2 = princ_comp_model.transform(X_test)
pred2 = grad_boost_model2.predict(X_test2)
print("--- %s seconds ---" % (time.time() - start_time))

0.8626283168567077
--- 219.01335406303406 seconds ---


In [None]:
# Model evaluation using Confusion Matrix
confusion_matrix(y_test, pred2)

array([[815,   1,  16,  55,  11,   4,  82,   0,  16,   0],
       [  7, 948,   8,  32,   2,   0,   1,   0,   2,   0],
       [ 18,   1, 725,   9, 132,   2, 102,   0,  11,   0],
       [ 36,  10,   7, 867,  32,   1,  43,   0,   3,   1],
       [  1,   2, 103,  43, 751,   3,  88,   0,   9,   0],
       [  0,   0,   0,   2,   0, 891,   0,  64,   3,  40],
       [207,   1, 134,  38, 107,   1, 486,   0,  26,   0],
       [  0,   0,   0,   0,   0,  40,   0, 894,   1,  65],
       [  4,   0,  13,   9,   3,  12,  22,   8, 926,   3],
       [  0,   0,   0,   0,   0,  21,   0,  36,   1, 942]])

In [None]:
accuracy_score(y_test, pred2)

0.8245

Reasonable accuracy was achieved along with saving time by using PCA of image features.