In [None]:
import tensorflow as tf
from tensorflow import keras

from keras.datasets import fashion_mnist
from keras.utils.np_utils import to_categorical

#load dataset
(trainX, trainY), (testX, testY) = fashion_mnist.load_data()
#load train and test dataset
def load_dataset():
    #load dataset
    (trainX, trainY), (testX, testY) = fashion_mnist.load_data()
    #reshape dataset to have a single channel
    trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
    testX = testX.reshape((test.shape[0], 28, 28, 1))
    trainY = to_categorical(trainY)
    testY = to_categorical(testY)
    return trainX, trainY, testX, testY
    

seed = 9

from sklearn.model_selection import StratifiedShuffleSplit

data_split = StratifiedShuffleSplit(test_size = 0.5,random_state = seed)
for train_index, test_index in data_split.split(trainX, trainY):
    
    split_data_92, split_data_8 = trainX[train_index], trainX[test_index]
    
    split_label_92, split_label_8 = trainY[train_index], trainY[test_index]
train_test_split = StratifiedShuffleSplit(test_size = 0.3,random_state = seed)

#data splitting
for train_index, test_index in train_test_split.split(split_data_8,split_label_8):
    
    train_data_70, test_data_30 = split_data_8[train_index], split_data_8[test_index]
    
    train_label_70, test_label_30 = split_label_8[train_index], split_label_8[test_index]
train_data = train_data_70

train_labels = train_label_70

test_data = test_data_30

test_labels = test_label_30
print('train_data : ',train_data.shape)
print('train_labels : ',train_labels.shape)
print('test_data : ',test_data.shape)
print('test_labels : ',test_labels.shape)

import numpy as np

#data preprocessing
#PREPROCESSING WITH NORMALIZATION FUNCTION
def normalize(data, eps=1e-8):
    data -= data.mean(axis=(0,1,2),keepdims=True)
    std = np.sqrt(data.var(axis=(0, 1, 2), ddof=1, keepdims=True))
    std[std < eps] = 1
    data /= std
    return data
train_data=train_data.astype('float64')
test_data=test_data.astype('float64')
#calling the function
train_data = normalize(train_data)
test_data = normalize(test_data)
#printing the slope of train data and test data
print('train_data: ', train_data.shape)
print('test_data: ',test_data.shape)


#PREPROCESSING WITH PCA
#computing whitening matrix
train_data_flat = train_data.reshape(train_data.shape[0], -1)
test_data_flat = test_data.reshape(test_data.shape[0], -1)
print('train_data_flat: ',train_data_flat.shape)
print('test_data_flat: ',test_data_flat.shape)

train_data_flat_t = train_data_flat
test_data_flat_t = test_data_flat

from sklearn.decomposition import PCA

train_data_pca =PCA(n_components=min(train_data_flat.shape)).fit_transform(train_data_flat)
test_data_pca =PCA(n_components=min(test_data_flat.shape)).fit_transform(test_data_flat)
print(train_data_pca.shape)
print(test_data_pca.shape)


#PREPROCESSING WITH SVD
from skimage import color

def svdFeatures(input_data):
    svdArray_input_data=[]
    size = input_data.shape[0]
    for i in range (0,size):
        img=color.rgb2gray(input_data[i])
        u, s, v = np.linalg.svd(img, full_matrices=False)
        S=[s[i] for i in range(28)]
        svdArray_input_data.append(S)
        svdMatrix_input_data=np.matrix(svdArray_input_data)
    return svdMatrix_input_data
#apply SVD for train and test data
train_data_svd=svdFeatures(train_data)
test_data_svd=svdFeatures(test_data)
print(train_data_svd.shape)
print(test_data_svd.shape)



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
train_data :  (21000, 28, 28)
train_labels :  (21000,)
test_data :  (9000, 28, 28)
test_labels :  (9000,)
train_data:  (21000, 28, 28)
test_data:  (9000, 28, 28)
train_data_flat:  (21000, 784)
test_data_flat:  (9000, 784)
(21000, 784)
(9000, 784)
(21000, 28)
(9000, 28)


In [None]:
#By using data preprocessed with NORMALIZATION FUNCTION

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
gnb1 = GaussianNB()
train = gnb1.fit(train_data_flat_t, train_labels)


y_pred1 = gnb1.predict(test_data_flat_t)
score1 = gnb1.score(test_data_flat_t, test_labels)
print("score", score1)

Confusion_Matrix1 = metrics.confusion_matrix(test_labels, y_pred1)
print("Confusion Matrix",Confusion_Matrix1)

score 0.18744444444444444
Confusion Matrix [[  3   0   0   0   0   0 862   0  35   0]
 [  0   0   0   0   0   0 898   0   2   0]
 [  0   0   0   0   0   0 881   0  19   0]
 [  0   0   0   0   0   0 895   0   5   0]
 [  0   0   0   0   0   0 893   0   7   0]
 [  0   0   0   0   0   0 222   0 677   1]
 [  2   0   0   0   0   0 852   0  46   0]
 [  0   0   0   0   0   0  27   0 873   0]
 [  0   0   0   0   0   0 221   0 679   0]
 [  1   0   0   0   0   0  42   0 704 153]]


In [None]:
#By using data preprocessed with PCA 

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
gnb2 = GaussianNB()
train = gnb2.fit(train_data_pca, train_labels)


y_pred2 = gnb2.predict(test_data_pca)
score2 = gnb2.score(test_data_pca, test_labels)
print("score", score2)

Confusion_Matrix2 = metrics.confusion_matrix(test_labels, y_pred2)
print("Confusion Matrix",Confusion_Matrix2)

score 0.37733333333333335
Confusion Matrix [[ 80 179 206 113  48  52 110   8  97   7]
 [  6 725  14 117   4  16   2   0  10   6]
 [ 21 248 180  70 209  34  72   2  51  13]
 [ 14 323   7 344  70  93  21   0  27   1]
 [  5 120 116 105 419  31  52   0  33  19]
 [  3  11  11  40  15 423  14 263 112   8]
 [ 29 215 116  60 154  65 104   1 137  19]
 [  0  88  51   5  33 148  20 542   0  13]
 [  3  11  42  22  84 121  29   1 511  76]
 [  6   3 153   0  52 171 154  98 195  68]]


In [None]:
#By using data preprocessed with SVD

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
gnb3 = GaussianNB()
train = gnb3.fit(train_data_svd, train_labels)


y_pred3 = gnb3.predict(test_data_svd)
score3 = gnb3.score(test_data_svd, test_labels)
print("score", score3)

Confusion_Matrix3 = metrics.confusion_matrix(test_labels, y_pred3)
print("Confusion Matrix",Confusion_Matrix3)

score 0.3194444444444444
Confusion Matrix [[ 99 217  17 148  97  13  15 186  27  81]
 [  2 836   0   2   2   1   1  51   1   4]
 [ 26 101 123 206 151  16  13 186  22  56]
 [  5 552   2  46  24  15   4 215   3  34]
 [ 15  85  49 145 336  16  10 159  18  67]
 [  2 208   0   0   7 175   2 332  11 163]
 [ 59  98  66 156 182  53  20 129  44  93]
 [  0 277   0   1   1   3   0 545   0  73]
 [ 28 136  23  64  97  40   9 199 141 163]
 [ 18   4   0   2  13  24   2 264  19 554]]
