# Implementing MNIST digit recognition in tensorflow without using Keras API

In [9]:
import tensorflow as tf
%colors nocolor



## A Simple Dense Class 

Dense layer implements the following input transformation, where W and b are model parameters, and activation is an element-wise function (usually relu, but it would be softmax for the last layer):

       output = activation(dot(W, input) + b)

In [10]:
# implementing simple Python class, NaiveDense, 
# that creates two TensorFlow variables, W and b, 
# and exposes a __call__() method that applies the preceding transformation

class NaiveDense:
    def __init__(self, input_size , output_size , activation ):
        self.activation = activation
        
        # Create a matrix, W, of shape (input_size, output_size), initialized with random values.
        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
        self.W = tf.Variable(w_initial_value)
        
        # Create a vector, b, of shape (output_size,), initialized with zeros.
        b_shape = (output_size,)
        b_intial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_intial_value)
        
    # Applying the forward pass    
    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W) + self.b)
    
    @property
    # Convenience method for retrieving the layer’s weights
    def weights(self):
        return [self.W, self.b]
    

## A simple Sequential Class
 
 Create a NaiveSequential class to chain these layers. It wraps a list of layers and exposes a __call__() method that simply calls the underlying layers on the inputs, in order. It also features a weights property to easily keep track of the layers’ parameters.

In [11]:
class NaiveSequential:
    def __init__(self,layers) :
        self.layers = layers
        
    def __call__(self,inputs):
        x = inputs 
        for layer in self.layers :
            x = layer(x)
        return x
    
    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights 
        return weights 
    


In [12]:
# Using this NaiveDense class and this NaiveSequential class, we can create a mock 
# Keras model:

model = NaiveSequential([
    NaiveDense(input_size=28*28, output_size=512, activation=tf.nn.relu),
    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])

assert len(model.weights) == 4


## Batch Generator

A way to iterate over the MNIST data in mini-batches

In [13]:
import math
class BatchGenerator:
    def __init__(self, images, labels, batch_size = 128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images)/batch_size)
        
    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels         

## Running one training step

The “training step”: updating the weights of the model after running it on one batch of data. We need to
1.  Compute the predictions of the model for the images in the batch.
2.  Compute the loss value for these predictions, given the actual labels.
3.  Compute the gradient of the loss with regard to the model’s weights.
4.  Move the weights by a small amount in the direction opposite to the gradient.

In [14]:
#  compute the gradient, we will use the TensorFlow GradientTape object

def one_training_step(model, images_batch, labels_batch):
    # Run the “forward pass” (compute the model’s predictions under a GradientTape scope)
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch,predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    
    # Compute the gradient of the loss with regard to the weights. The output gradients
    # is a list where each entry corresponds to a weight from the model.weights list.
    gradients = tape.gradient(average_loss, model.weights)
    
    # Update the weights using the gradients
    update_weights(gradients, model.weights)
    return average_loss


In [15]:
# Updating the weights 

learning_rate = 1e-3

def update_weights(gradients, weights ):
    for g, w in zip(gradients, weights):
        # assign_sub is the equivalent of -= for TensorFlow variables.
        w.assign_sub(g * learning_rate)

## A full training loop 

An epoch of training simply consists of repeating the training step for each batch in the training data, and the full training loop is simply the repetition of one epoch

In [16]:
def fit(model, images, labels, epochs, batch_size = 128):
    for epoch_counter in range(epochs):
        print(F"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch , labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(F"loss at batch {batch_counter}: {loss: .2f}")
                
        

## TestDrive 

In [22]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28*28))
train_images = train_images.astype("float32")/255
test_images = test_images.reshape((10000, 28*28))
test_images = test_images.astype("float32")/255

fit(model, train_images, train_labels, epochs=10, batch_size=128)



Epoch 0
loss at batch 0:  6.95
loss at batch 100:  2.26
loss at batch 200:  2.22
loss at batch 300:  2.12
loss at batch 400:  2.26
Epoch 1
loss at batch 0:  1.94
loss at batch 100:  1.90
loss at batch 200:  1.84
loss at batch 300:  1.74
loss at batch 400:  1.86
Epoch 2
loss at batch 0:  1.61
loss at batch 100:  1.59
loss at batch 200:  1.51
loss at batch 300:  1.45
loss at batch 400:  1.52
Epoch 3
loss at batch 0:  1.35
loss at batch 100:  1.35
loss at batch 200:  1.25
loss at batch 300:  1.23
loss at batch 400:  1.28
Epoch 4
loss at batch 0:  1.15
loss at batch 100:  1.16
loss at batch 200:  1.05
loss at batch 300:  1.06
loss at batch 400:  1.11
Epoch 5
loss at batch 0:  0.99
loss at batch 100:  1.02
loss at batch 200:  0.91
loss at batch 300:  0.94
loss at batch 400:  0.98
Epoch 6
loss at batch 0:  0.88
loss at batch 100:  0.91
loss at batch 200:  0.80
loss at batch 300:  0.84
loss at batch 400:  0.90
Epoch 7
loss at batch 0:  0.80
loss at batch 100:  0.82
loss at batch 200:  0.72
lo

In [21]:
len(train_labels)
len(train_images)

10000

## Model Evaluation

Evaluate the model by taking the argmax of its predictions over the test images, and comparing it to the expected labels:

In [23]:
import numpy as np
predictions = model(test_images)
predictions = predictions.numpy() # Calling .numpy() on a TensorFlow tensor converts it to a NumPy tensor.
predicted_labels = np.argmax(predictions,axis=1)
matches = predicted_labels == test_labels
print(F"accuracy : {matches.mean(): .3f}")

accuracy :  0.815
