In [1]:
#Importing libraries

from __future__ import absolute_import, division, print_function
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf

import numpy as np 
import pandas as pd 
import io
import matplotlib.pyplot as plt

In [2]:
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Convert to float32.
x_train = np.array(x_train, np.float32)
x_test = np.array(x_test, np.float32)


# Flatten images to 1D vector of 784 features (28*28).
num_features=784
x_train = x_train.reshape(60000, num_features)
x_test = x_test.reshape(10000, num_features)


# Normalize images value from [0, 255] to [0, 1].
x_train = x_train / 255
x_test = x_test /255

x_train

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [3]:
x_test

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [4]:
x_train.shape

(60000, 784)

In [5]:
x_test.shape

(10000, 784)

In [6]:
y_train.shape

(60000,)

In [7]:
#setting up hyperparameters and data set parameters

#initialize model parameters
#num_class = number of outputs (10) (0 to 9 digits)
#num_features = number of input para (784)

# MNIST dataset parameters.

num_classes = 10 # 0 to 9 digits

num_features = 784 # 28*28

# Training parameters.

learning_rate = 0.01

training_steps = 1000

batch_size = 256

display_step = 50

In [8]:
# Use tf.data API to shuffle and batch data.
num_batches = int(x_train.shape[0] / batch_size)
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [9]:
#initializing weights and biases
#with ones and zeros

# Weight of shape [784, 10], the 28*28 image features, and a total number of classes.

W = tf.Variable(np.random.randn(784, 10).astype(np.float32))

# Bias of shape [10], the total number of classes.
B = tf.Variable(np.random.randn(10).astype(np.float32))

In [10]:
#defining logistic regression and cost function

#which converts the inputs into a probability distribution proportional to the exponents of the inputs using the softmax function. 
#The softmax function, which is implemented using the function tf.nn.softmax, also makes sure that the sum of all the inputs equals one.

# Logistic regression (Wx + b).

def logistic_regression(x):

    # Apply softmax to normalize the logits to a probability distribution.
    return tf.nn.softmax(tf.add(tf.matmul(x, W), B))
    

# Cross-Entropy loss function.

def cross_entropy(y_pred, y_true):

    # Encode label to a one hot vector.
    y_true = tf.one_hot(y_true, depth = num_classes)
    

    # Clip prediction values to avoid log(0) error.

    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)    

    # Compute cross-entropy.
    loss = tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
    return loss

In [11]:
# Accuracy metric.

def accuracy(y_pred, y_true):

  # Predicted class is the index of the highest score in prediction vector (i.e. argmax).

  correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
  return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Optimization Process and Updating Weights and Biases

Now we define run_optimization() method where we update the weights of our model. We calculate the predictions using the logistic_regression(x) method by taking inputs and find out the loss generated by comparing the predicted value and the original value present in the data set. Next, we compute the gradients using and update the weights of the model with our stochastic gradient descent optimizer.

In [12]:
# Optimization process. 

optimizer = tf.optimizers.SGD(learning_rate)

def run_optimization(x, y):

# Wrap computation inside a GradientTape for automatic differentiation.

    with tf.GradientTape() as g:
        pred = logistic_regression(x)
        loss = cross_entropy(pred, y)

    # Compute gradients.

    gradients = g.gradient(loss, [W, B])
    # Stochastic gradient descent optimizer.
    # Update W and b following gradients.

    optimizer.apply_gradients(zip(gradients, [W, B]))

In [13]:
# Run training for the given number of steps.

for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):

    # Run the optimization to update W and b values.

    run_optimization(batch_x, batch_y)
    if step % display_step == 0:

        #Obtain Predictions
        pred = logistic_regression(batch_x)
        #Ccompute loss
        loss = cross_entropy(pred, batch_y)
        #Compute Accuracy
        acc = accuracy(pred, batch_y)
        #print accuracy
        print(f"step: {step}, loss: {loss}, accuracy: {acc}")

step: 50, loss: 220.14715576171875, accuracy: 0.8203125
step: 100, loss: 178.2867431640625, accuracy: 0.84765625
step: 150, loss: 143.408203125, accuracy: 0.85546875
step: 200, loss: 139.97572326660156, accuracy: 0.87109375
step: 250, loss: 106.55428314208984, accuracy: 0.9140625
step: 300, loss: 132.14854431152344, accuracy: 0.85546875
step: 350, loss: 226.07354736328125, accuracy: 0.83984375
step: 400, loss: 62.558685302734375, accuracy: 0.93359375
step: 450, loss: 128.35804748535156, accuracy: 0.875
step: 500, loss: 80.87374877929688, accuracy: 0.91796875
step: 550, loss: 110.55818176269531, accuracy: 0.91015625
step: 600, loss: 142.197509765625, accuracy: 0.87890625
step: 650, loss: 64.973388671875, accuracy: 0.9375
step: 700, loss: 70.18213653564453, accuracy: 0.93359375
step: 750, loss: 105.34352111816406, accuracy: 0.890625
step: 800, loss: 105.02420806884766, accuracy: 0.90625
step: 850, loss: 121.63016510009766, accuracy: 0.89453125
step: 900, loss: 98.333251953125, accuracy: 

In [14]:
# Test model on validation set.
pred = logistic_regression(x_test)
a = accuracy(pred, y_test)
print(f"Test Accuracy: {a}")

Test Accuracy: 0.9034000039100647
