In [1]:
# Data manipulation libs
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split

# DL libs
import tensorflow
import keras

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


ValueError: Invalid backend. Missing required entry : placeholder

# Prepare Fashion-MNIST Dataset

In [2]:
# Read in fashion-MNIST data from .csv
train_data = pd.read_csv('fasion_mnist_data/fashion-mnist_train.zip')
test_data = pd.read_csv('fasion_mnist_data/fashion-mnist_test.zip')

# Alternative using keras
# from keras.datasets import fashion_mnist
# ((train_X, train_Y), (test_X, test_Y)) = fashion_mnist.load_data()

In [3]:
train_data.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Check data shape
print(train_data.shape) # (60,000, 785)
print(test_data.shape) # (10000, 785)

(60000, 785)
(10000, 785)


In [15]:
# Split into X and Y
def split_x_y(train_data, test_data, y_label):
    train_X = np.array(train_data[train_data.columns.difference([y_label])])
    test_X = np.array(test_data[test_data.columns.difference([y_label])])
    train_Y = np.array(train_data.loc[:, y_label]) # (60000,)
    test_Y = np.array(test_data.loc[:, y_label]) # (10000,)

    return train_X, test_X, train_Y, test_Y

# Test
train_X, test_X, train_Y, test_Y = split_x_y(train_data, test_data, 'label')

In [16]:
print(train_X.shape)
print(test_X.shape)
print(train_Y.shape)
print(test_Y.shape)

(60000, 784)
(10000, 784)
(60000,)
(10000,)


In [9]:
# Transform classes
from keras.utils import np_utils

num_classes = len(np.unique(train_Y))
num_classes

# One-hot encode the training and testing labels
train_Y_one_hot = np_utils.to_categorical(train_Y, 10)
test_Y_one_hot = np_utils.to_categorical(test_Y, 10)
 
# initialize the label names
classes = ["top", "trouser", "pullover", "dress", "coat", "sandal", "shirt", "sneaker", "bag", "ankle boot"]

In [10]:
# Normalise the data to the range of [0, 1] and change data type
def normalize_X(data_X):
    data_X = data_X / 255.
    data_X = data_X.astype('float32')
    
    return data_X

# Test
train_X = normalize_X(train_X)
test_X = normalize_X(test_X)

In [17]:
# Convert the images into 3 channels using depth-wise stack
def convert_3chan(data_X):
    data_X = np.dstack([data_X] * 3)
    
    return data_X

# Test
train_X = convert_3chan(train_X)
test_X = convert_3chan(test_X)

train_X.shape, test_X.shape

((60000, 784, 3), (10000, 784, 3))

In [19]:
# Reshape images as per the tensor format required by tensorflow (channels_last)
from keras import backend as K

def reshape_img(data_X, dim, depth, K):
    # if we are using "channels first" ordering, then reshape the design matrix such that the matrix is:
    # num_samples x depth x rows x columns
    if K.image_data_format() == "channels_first":
        data_X = data_X.reshape((data_X.shape[0], depth, dim, dim))

    # otherwise, we are using "channels last" ordering, so the design
    # matrix shape should be: num_samples x rows x columns x depth
    else:
        data_X = data_X.reshape((data_X.shape[0], dim, dim, depth))
    
    return data_X

train_X = reshape_img(train_X, 28, 3, K)
test_X = reshape_img(test_X, 28, 3, K)

train_X.shape, test_X.shape

((60000, 28, 28, 3), (10000, 28, 28, 3))

In [20]:
# Resize the images 48*48 (only required by VGG16)
from keras.preprocessing.image import img_to_array, array_to_img

def resize_img(data_X, img_size):
    data_X = np.asarray([img_to_array(array_to_img(im, scale=False).resize((img_size, img_size))) for im in data_X])

    return data_X 

train_X_vgg16 = resize_img(train_X, 48)
test_X_vgg16 = resize_img(test_X, 48)

train_X_vgg16.shape, test_X_vgg16.shape

((60000, 48, 48, 3), (10000, 48, 48, 3))

In [21]:
# Splitting train data as train and validation data
train_X, valid_X, train_label, valid_label = train_test_split(train_X, 
                                                              train_Y, 
                                                              test_size=0.2, 
                                                              random_state=8
                                                             )

train_X_vgg16, valid_X_vgg16, train_label_vgg16, valid_label_vgg16 = train_test_split(train_X_vgg16, 
                                                              train_Y, 
                                                              test_size=0.2, 
                                                              random_state=8
                                                             )

In [22]:
# Check the data size whether it is as per tensorflow (or VGG16) requirements
print(train_X.shape, valid_X.shape, train_label.shape, valid_label.shape)
print(train_X_vgg16.shape, valid_X_vgg16.shape, train_label_vgg16.shape, valid_label_vgg16.shape)

(48000, 28, 28, 3) (12000, 28, 28, 3) (48000,) (12000,)
(48000, 48, 48, 3) (12000, 48, 48, 3) (48000,) (12000,)


In [23]:
# Plot the loss and accuracy of model (TF)
def plot_loss_acc(trained_model):
    acc = trained_model.history['acc']
    val_acc = trained_model.history['val_acc']
    loss = trained_model.history['loss']
    val_loss = trained_model.history['val_loss']
    epochs = range(1, len(acc) + 1)

    plt.title('Training and validation accuracy')
    plt.plot(epochs, acc, 'red', label='Training acc')
    plt.plot(epochs, val_acc, 'blue', label='Validation acc')
    plt.legend()

    plt.figure()
    plt.title('Training and validation loss')
    plt.plot(epochs, loss, 'red', label='Training loss')
    plt.plot(epochs, val_loss, 'blue', label='Validation loss')
    plt.legend()

    plt.show()