In [0]:
# fashion mnist dataset - composed of 70,000 images of size 28 X 28 of different clothing types - 10 target classes
# 60,000 in training set and 10,000 in testing set

In [0]:
# input of ann - of 1 dimension - will have to flatten the input pixels in to 1d array of 784 pixels.

In [0]:
# uninstall tf 1 and install tf 2

In [0]:
import tensorflow as tf
import numpy as np # tf 2.0 works really well with numpy
import datetime
from tensorflow.keras.datasets import fashion_mnist

In [4]:
tf.__version__

'2.0.0'

#DATA PREPROCESSING

In [0]:
# loading the dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
# this function returns the images in training and testing datasets
# X_train contains 60,000 2d arrays corresponding to pixels of images
# X_test contains 10,000 2d arrays corresponding to pixels of images
# y_train contains classes/targets of training set
# y_test contains classes/targets of testing set

In [0]:
# normalize the images - we divide each pixel of the training and testing images by the maximum value of a pixel, 255. In this way, each
# pixel will be in the range [0,1].
# By normalizing, we make sure our ann model trains faster - recommended to always normalize images while working with ann
X_train = X_train/255.0
X_test = X_test/255.0

In [0]:
# Reshaping the dataset - since we are building a fully connected layer, we reshape the training and testing set to be in vector format.
# since dim of each image is 28 X 28, we reshape the full dataset to [-1, height*width] -> -1 takes all the images in the first dimension
# of the original array.
X_train = X_train.reshape(-1,28*28)
X_test = X_test.reshape(-1, 28*28)

In [17]:
X_train.shape, X_test.shape

((60000, 784), (10000, 784))

#BUILDING THE ANN

DEFINE THE ARCHITECTURE

In [0]:
# defining the model - simply define an object of the sequential model
model = tf.keras.models.Sequential()
# we are building a fully connected layer which is a sequence of layers, as opposed to a being computational graph. therefore, we use 
# sequential class

In [0]:
# add first fully connected layer
# layer hyperparameters :-
# 1. no of units/ neurons = 128
# 2. activation fn = ReLU - ReLU breaks linearity
# 3. input_shape = (784,)
model.add(tf.keras.layers.Dense(units=128, activation='relu',input_shape=(784,)))
# input_shape necessary for first layer

In [0]:
# add 2nd fully connected layer with dropout
# Dropout is a regularization technique where we randomly set neurons in a layer to zero. That way, while training, those neurons wont be
# updated. Because some percentage of neurons wont be updated, the whole training process is long and we have less chance of overfitting.
model.add(tf.keras.layers.Dropout(0.2))

In [0]:
# add the output layer 
# units = no of classes in output layer - 10
# activation = softmax
model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

COMPILING THE MODEL

In [0]:
# compile the model - connect it to an optimizer and choose a loss
# optimizer - adam - tool that will update the weights dusing sgd
# loss = sparse softmax(categorical) cross-entropy
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['sparse_categorical_accuracy'])

In [23]:
# get summary of model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


In [0]:
# non trainable parameters are the hyperparameters that are set and tuned by us.
# trainable parameters are those which are tuned by the ann, ie, the weights.

TRAINING THE MODEL

In [25]:
model.fit(X_train, y_train, epochs = 5) # can add batch_size also

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fed01f2c940>

#MODEL EVALUATION AND PREDICTION

In [0]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)

In [28]:
test_loss, test_accuracy

(0.36567544990777967, 0.8653)

In [0]:
# accuracy of training and testing sets are almost similar
# training accuracy = 87.80 %
# testing accuracy = 86.53 %

# HOMEWORK

TRY 1

In [30]:
model2 = tf.keras.models.Sequential()
model2.add(tf.keras.layers.Dense(units=128, activation='relu', input_shape=(784,)))
model2.add(tf.keras.layers.Dense(units=10, activation='softmax'))
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['sparse_categorical_accuracy'])
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 128)               100480    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


In [36]:
model2.fit(X_train, y_train, epochs = 10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fecf652df28>

In [37]:
test_loss, test_accuracy = model2.evaluate(X_test, y_test)



In [38]:
test_loss, test_accuracy

(0.32623945214748384, 0.887)

In [0]:
# training accuracy = 91.02 %
# testing accuracy = 88.7 %

TRY 2

In [35]:
model3 = tf.keras.models.Sequential()
model3.add(tf.keras.layers.Dense(units=256, activation='relu', input_shape=(784,)))
model3.add(tf.keras.layers.Dense(units=128, activation='relu'))
model3.add(tf.keras.layers.Dense(units=10, activation='softmax'))
model3.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['sparse_categorical_accuracy'])
model3.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 256)               200960    
_________________________________________________________________
dense_5 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_6 (Dense)              (None, 10)                1290      
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
_________________________________________________________________


In [39]:
model3.fit(X_train , y_train, epochs = 10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fecf5901940>

In [40]:
test_loss, test_accuracy = model3.evaluate(X_test, y_test)



In [41]:
test_loss, test_accuracy

(0.3329423331797123, 0.888)

In [0]:
# training accuracy =  91.46%
# testing accuracy =  88.8%

#SAVING THE MODEL

In [0]:
# save the architecture/topology of the network
model_json = model3.to_json()
with open("fashion_model.json", "w") as json_file:
    json_file.write(model_json)

In [0]:
# save the network weights
model3.save_weights("fashion_model.h5")