In [1]:
import sklearn
import skimage
import tensorflow as tf
import keras
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from tensorflow.keras.utils import to_categorical

from keras.datasets import fashion_mnist

#Loading dataset
(trainX,trainy),(testX,testy) = fashion_mnist.load_data()

def load_dataset():
    #Load dataset
    (trainX,trainy),(testX,testY) = fashion_mnist.load_data()
    #reshape dataset to have a single channel
    trainX=trainX.reshape((trainX.shape[0],28,28,1))
    testX=testX.reshape((testX.shape[0],28,28,1))
    #one hot encode target values
    trainy = to_categorical(trainy)
    testY = to_categorical(testY)
    return trainX, trainy, testX, testY

## Subset Generation

In [3]:
seed = 9
from sklearn.model_selection import StratifiedShuffleSplit
data_split = StratifiedShuffleSplit(test_size=0.08,random_state=seed)
for train_index,test_index in data_split.split(trainX,trainy):
    split_data_92, split_data_8 = trainX[train_index], trainX[test_index]
    split_label_92, split_label_8 = trainy[train_index], trainy[test_index]
    train_test_split = StratifiedShuffleSplit(test_size=0.3,random_state=seed)

## Data Splitting

In [4]:
for train_index, test_index in train_test_split.split(split_data_8,split_label_8):
    
    train_data_70, test_data_30 = split_data_8[train_index],split_data_8[test_index]
    
    train_label_70, test_label_30 = split_label_8[train_index],split_label_8[test_index]
train_data = train_data_70
train_labels = train_label_70
test_data = test_data_30
test_labels = test_label_30

print('train_data: ',train_data.shape)
print('train_labels: ',train_labels.shape)
print('test_data: ',test_data.shape)
print('test_labels: ',test_labels.shape)

train_data:  (3360, 28, 28)
train_labels:  (3360,)
test_data:  (1440, 28, 28)
test_labels:  (1440,)


## Data Preprocessing

   ### Normalization

In [5]:
def normalize(data,eps=1e-8):
    
    data-=data.mean(axis=(0,1,2),keepdims=True)
    
    std = np.sqrt(data.var(axis=(0,1,2), ddof=1, keepdims=True))
    
    std[std<eps]=1
    
    data/=std
    
    return data

train_data = train_data.astype('float64')
test_data = test_data.astype('float64')

train_data = normalize(train_data)
test_data = normalize(test_data)

print('train_data: ',train_data.shape)
print('test_data: ',test_data.shape)

train_data:  (3360, 28, 28)
test_data:  (1440, 28, 28)


### ZCA Whitening

In [6]:
train_data_flat = train_data.reshape(train_data.shape[0],-1).T
test_data_flat = test_data.reshape(test_data.shape[0],-1).T

print('train_data_flat: ',train_data_flat.shape)
print('test_data_flat: ',test_data_flat.shape)

train_data_flat_t = train_data_flat.T
test_data_flat_t = test_data_flat.T

print('train_data_flat_t: ',train_data_flat.shape)
print('test_data_flat_t: ',test_data_flat.shape)

train_data_flat:  (784, 3360)
test_data_flat:  (784, 1440)
train_data_flat_t:  (784, 3360)
test_data_flat_t:  (784, 1440)


### PCA

In [7]:
from sklearn.decomposition import PCA

train_data_pca = PCA(n_components=train_data_flat.shape[0]).fit_transform(train_data_flat) #changed from shape[1] to shape[0]
test_data_pca = PCA(n_components=test_data_flat.shape[0]).fit_transform(test_data_flat) #same as above

print(train_data_pca.shape)
print(test_data_pca.shape)

train_data_pca = train_data_pca.T
test_data_pca = test_data_pca.T

(784, 784)
(784, 784)
