In [1]:
import tensorflow as tf
from keras.utils import np_utils
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt


In [2]:


fashion_mnist = tf.keras.datasets.fashion_mnist

# load the training and test data    
(tr_x, tr_y), (te_x, te_y) = fashion_mnist.load_data()

# reshape the feature data
tr_x = tr_x.reshape(tr_x.shape[0], 784)
te_x = te_x.reshape(te_x.shape[0], 784)

# noramlise feature data
tr_x = tr_x / 255.0
te_x = te_x / 255.0

print( "Shape of training features ", tr_x.shape)
print( "Shape of test features ", te_x.shape)


# one hot encode the training labels and get the transpose
tr_y = np_utils.to_categorical(tr_y,10)
tr_y = tr_y.T
print ("Shape of training labels ", tr_y.shape)

# one hot encode the test labels and get the transpose
te_y = np_utils.to_categorical(te_y,10)
te_y = te_y.T
print ("Shape of testing labels ", te_y.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
Shape of training features  (60000, 784)
Shape of test features  (10000, 784)
Shape of training labels  (10, 60000)
Shape of testing labels  (10, 10000)


In [3]:
def softmax_activation(X):
  expo = tf.exp(X - tf.reduce_max(X, axis=0))
  s = tf.reduce_sum(expo, axis=0, keepdims=True)
  output =  expo / s
  return output

In [4]:
def forward_pass( X , W1 , b1, W2 , b2 , W3 , b3,probThreshold):
  #push all the weights and data along with the bias through the first layer
  hypo_1 = (W1@ X) + b1
  relu_act1 = tf.nn.relu(hypo_1)

  neuronsSize = relu_act1.shape[0]
  trainingSize = relu_act1.shape[1]

  drop_out_matrix = tf.cast(tf.random.normal(shape=(neuronsSize , trainingSize), dtype=tf.float64) * 0.01 < probThreshold,tf.float64)
  
  drop_out_layer = relu_act1 * drop_out_matrix
  drop_out_layer  = relu_act1 / probThreshold

  hypo_2 = (W2 @ drop_out_layer ) + b2
  relu_act2 = tf.nn.relu(hypo_2)

  hypo_3 = (W3 @ relu_act2 ) + b3
  pred_output = softmax_activation(hypo_3)

  return pred_output 

In [5]:
def cross_entropy(tr_y , predictedYProb):
  reduce_sum = -tf.reduce_sum(tr_y * tf.math.log(predictedYProb),axis = 0)
  loss =  tf.cast(tf.reduce_mean(reduce_sum), tf.float64)
  return loss

In [6]:
def calculate_accuracy(Y , predictedYProb):
  predictedYValues = tf.round(predictedYProb)  
  #we need only those values whose actualy value being 1 matches the predicted value being 1
  pred_correction = tf.cast(tf.equal(np.argmax(predictedYValues, axis = 0),np.argmax(Y, axis = 0)),tf.float64)  
  accuracy_sum = tf.reduce_sum(pred_correction)
  acc_score = accuracy_sum / Y.shape[1]
  return acc_score
  

In [7]:
def gradient_descent(Xtrain,Ytrain, Xtest, Ytest, W1 , b1 , W2 , b2, W3 , b3):
  learning_rate = 0.001
  adam_optimizer = tf.keras.optimizers.Adam(learning_rate)

  train_loss_list =[]
  train_acc_list = []

  test_loss_list = []
  test_acc_list = []

  no_of_iter = 500
  dropOutprob = 0.3
  probThreshold = 1 - 0.3

  for i in range(no_of_iter + 1):
    #GradientTape will record all the operation till it encounters gradient
    with tf.GradientTape() as tape:

      train_predictedYProb  = forward_pass(X_train,W1 , b1 , W2 , b2, W3 , b3 ,probThreshold)
      train_loss = cross_entropy(Ytrain ,train_predictedYProb)

    gradients = tape.gradient(train_loss,[W1 , b1 , W2 , b2, W3 , b3])
    train_accuracy  = calculate_accuracy(Ytrain ,train_predictedYProb)

    test_predictedYProb  = forward_pass(X_test , W1 , b1 , W2 , b2, W3 , b3 ,probThreshold)
    test_loss = cross_entropy(Ytest ,test_predictedYProb)
    test_accuracy = calculate_accuracy(Ytest ,test_predictedYProb)
    
    train_loss_list.append(train_loss.numpy())
    train_acc_list.append(train_accuracy)
    test_loss_list.append(test_loss.numpy())
    test_acc_list.append(test_accuracy)

    adam_optimizer.apply_gradients(zip(gradients , [W1 , b1 , W2 , b2, W3 , b3]))

    print("Train Iteration :", i , "Training Loss :",train_loss.numpy() , "Training Accuracy",train_accuracy.numpy())
    print("Test Iteration :", i , "Test Loss :",test_loss.numpy() , "test Accuracy",test_accuracy.numpy())

  return train_loss_list , train_acc_list , test_loss_list, test_acc_list

In [None]:
# # for easy multiplication now the shape of X_train(784,60000) and X_test(784,10000)
X_train = tf.convert_to_tensor(tr_x.T, dtype=tf.float64)
X_test  = tf.convert_to_tensor(te_x.T,dtype= tf.float64)
tr_y =    tf.convert_to_tensor(tr_y ,dtype= tf.float64)
te_y =    tf.convert_to_tensor(te_y ,dtype= tf.float64)

no_of_neurons_layer1 = 300
no_of_neurons_layer2 = 100
#describes the number of types of clothes(10)
no_of_output_units  = 10
no_of_features = X_train.shape[0]


# define the weight whos shape is as the no_of_neurons ; but the weights cannot be same . because if we give same weights then the layers won't learn properly to produce different outputs
# Multiply with small factor , its just a way to initialize the weights, we can experiment keeping (0.01) or removing (0.01)
W1 = tf.Variable(tf.random.normal(shape=(no_of_neurons_layer1, no_of_features), dtype=tf.float64) * 0.01)
#define the bais as vector of zeros 
b1 = tf.Variable(tf.zeros((no_of_neurons_layer1, 1),dtype=tf.float64))

W2 = tf.Variable(tf.random.normal(shape=(no_of_neurons_layer2, no_of_neurons_layer1), dtype=tf.float64) * 0.01)
b2 = tf.Variable(tf.zeros((no_of_neurons_layer2, 1),dtype=tf.float64))

W3 = tf.Variable(tf.random.normal(shape=(no_of_output_units, no_of_neurons_layer2), dtype=tf.float64) * 0.01)
b3 =tf.Variable(tf.zeros((no_of_output_units, 1),dtype=tf.float64))

train_loss_list , train_acc_list , test_loss_list, test_acc_list = gradient_descent(X_train,tr_y, X_test,te_y, W1 , b1 , W2 , b2 , W3, b3)

Train Iteration : 0 Training Loss : 2.302612394240942 Training Accuracy 0.1
Test Iteration : 0 Test Loss : 2.3026213800671997 test Accuracy 0.1
Train Iteration : 1 Training Loss : 2.2976512022570383 Training Accuracy 0.1
Test Iteration : 1 Test Loss : 2.297676998705274 test Accuracy 0.1
Train Iteration : 2 Training Loss : 2.2887215532811265 Training Accuracy 0.1
Test Iteration : 2 Test Loss : 2.2887944731391734 test Accuracy 0.1
Train Iteration : 3 Training Loss : 2.2743779798793957 Training Accuracy 0.1
Test Iteration : 3 Test Loss : 2.2745502357880047 test Accuracy 0.1
Train Iteration : 4 Training Loss : 2.252564220526116 Training Accuracy 0.1
Test Iteration : 4 Test Loss : 2.252844599335378 test Accuracy 0.1
Train Iteration : 5 Training Loss : 2.221871511051411 Training Accuracy 0.1
Test Iteration : 5 Test Loss : 2.222313576417062 test Accuracy 0.1
Train Iteration : 6 Training Loss : 2.181491593968509 Training Accuracy 0.1
Test Iteration : 6 Test Loss : 2.1821495616741595 test Accur

In [None]:
plt.plot(train_loss_list)
plt.plot(test_loss_list)
plt.title("loss")
plt.xlabel('number of iterations')
plt.ylabel("loss")
plt.legend(['train', 'test'], loc='upper right')

In [None]:
plt.plot(train_acc_list)
plt.plot(test_acc_list)
plt.title("Accuracy")
plt.xlabel('number of iterations')
plt.ylabel("Accuracy")
plt.legend(['train', 'test'], loc='lower right')

In [None]:
plt.plot(np.log(train_acc_list))
plt.plot(np.log(test_acc_list))
plt.plot(np.log(train_loss_list))
plt.plot(np.log(test_loss_list))
plt.title("Accuracy/Loss")
plt.xlabel('number of iterations')
plt.ylabel("Accuracy/Loss")
plt.legend(['train_loss', 'test_loss','train_acc', 'test_acc'], loc='upper right')