In [1]:
import tensorflow as tf
import numpy as np


num_classes = 10 # 10 images digit
num_features = 784 # 28 x 28 (img dimension)

learning_rate = .1
training_steps = 1000
batch_size = 256
display_step = 50

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)



In [2]:
# Prepare datasets
from tensorflow.keras.datasets import mnist
# x_: numpy array (60000, 28, 28), y_: numpy array (60000, 1) with 60000 data samples
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Convert to float32
x_train, x_test = np.array(x_train, dtype=np.float32), np.array(x_test, dtype=np.float32)

# Flatten array
# x_: numpy array (60000, 784), y_: numpy array (60000, 1) with 60000 data samples
x_train, x_test = x_train.reshape((-1, num_features)), x_test.reshape((-1, num_features))

# Normalize
x_train, x_test = x_train / 255, x_test / 255

# Shuffle data
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)


In [3]:
# Weight of shape [784, 10], the features and the num classes
W = tf.Variable(tf.ones(shape=(num_features, num_classes), name="weight"))
b = tf.Variable(tf.zeros(shape=num_classes), name="bias")
optimizer = tf.optimizers.SGD(learning_rate)

In [4]:
# Function

def logistic_regression(x):
    # Use softmax to convert logits to probability distribution
    # x * W + b (256, 784) * (784, 10) + b -> (256, 10)
    return tf.nn.softmax(x @ W + b)


def cross_entropy(y_pred, y_true):
    """
    Compute cross_entropy @see Cross Entropy in Reference for solution

    :param y_pred:
    :param y_true:
    :return: cross entropy loss
    """
    # One hot encode
    y_true = tf.one_hot(y_true, depth=num_classes)
    # Clip to remove math error when compute
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1)
    # Compute cross_entropy @see Cross Entropy in Reference for solution
    # y_true (256, 10) * log(y_pred) (256, 10) -> (256, 10) -> sum(axis=1) -> (256,) -> reduce_mean -> (1,)
    return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred), axis=1))


def accuracy(y_pred, y_true):
    """
    Compute the accuracy of the predicted
    :param y_pred: the predicted value
    :param y_true: true label
    :return:
    """
    # Check the arg max of prediction equal to label
    # -> true or false depend on it is same as label or not
    # Then compute the mean product the accuracy
    correct_prediction = tf.equal(tf.argmax(y_pred, axis=1), tf.cast(y_true, dtype=tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32))


def run_optimization(x, y):
    with tf.GradientTape() as g:
        y_pred = logistic_regression(x)
        loss = cross_entropy(y_pred, y)

    # Individual gradients respect to each variable in W and b
    # Include the gradient of W with shape same as W (784, 10) and gradient of b with same shape as b (10,)
    gradients = g.gradient(loss, [W, b])
    optimizer.apply_gradients(zip(gradients, [W, b]))



In [5]:
# Training

for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    run_optimization(batch_x, batch_y)

    if step % display_step == 0:
        pred = logistic_regression(batch_x)

        loss = cross_entropy(pred, batch_y)
        acc = accuracy(y_pred=pred, y_true=batch_y)
        print("step: %d, loss: %.3f, acc: %.3f" % (step, loss, acc))



step: 50, loss: 0.751, acc: 0.863
step: 100, loss: 0.617, acc: 0.875
step: 150, loss: 0.559, acc: 0.859
step: 200, loss: 0.466, acc: 0.883
step: 250, loss: 0.488, acc: 0.891
step: 300, loss: 0.414, acc: 0.883
step: 350, loss: 0.465, acc: 0.867
step: 400, loss: 0.500, acc: 0.859
step: 450, loss: 0.357, acc: 0.895
step: 500, loss: 0.386, acc: 0.887
step: 550, loss: 0.387, acc: 0.902
step: 600, loss: 0.395, acc: 0.887
step: 650, loss: 0.428, acc: 0.867
step: 700, loss: 0.316, acc: 0.922
step: 750, loss: 0.455, acc: 0.883
step: 800, loss: 0.387, acc: 0.895
step: 850, loss: 0.317, acc: 0.918
step: 900, loss: 0.359, acc: 0.887
step: 950, loss: 0.397, acc: 0.883
step: 1000, loss: 0.298, acc: 0.922


In [6]:
"""
    Test accuracy
"""
pred = logistic_regression(x_test)
print("Test Accuracy: %f" % accuracy(pred, y_test))

Test Accuracy: 0.909500


In [7]:

"""
    Visualize
"""
import matplotlib.pyplot as plt
start = 10
n_images = 5
test_images = x_test[start:start + n_images]
predictions = logistic_regression(test_images)

for i in range(n_images):
    plt.imshow(np.reshape(test_images[i], (28, 28)),
               cmap='gray')
    plt.show()
    print("Model Predictions: %d" % np.argmax(predictions.numpy()[i]))

<Figure size 640x480 with 1 Axes>

Model Predictions: 0
Model Predictions: 6
Model Predictions: 9
Model Predictions: 0
Model Predictions: 1


<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>