In [1]:
import tensorflow as tf

In [4]:
class NaiveDense:
  # Intialize variables
  def __init__(self, input_size, output_size, activation):
    # Create activation object linked to activation input
    self.activation = activation

    '''
    Create the shape of the weights matrix where input_size corresponds to
    the number of features and output_size to the number of neurons.
    This shape enables matrix multiplication between the inputs and weights.

    Initialize the weights using a uniform distribution, which samples values
    evenly between 0 and 0.1. This keeps initial weights small, helping to
    avoid exploding outputs or gradients during training.

    Wrap the initialized weights in a TensorFlow Variable so they can be updated
    during training via backpropagation.
    '''
    w_shape        = (input_size, output_size)
    w_intial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
    self.W         = tf.Variable(w_intial_value)

    '''
    Create the shape of the bias vector using output_size, as each neuron
    gets a single bias value.

    Initialize all biases to zero and wrap them in a TensorFlow Variable
    so they can also be updated during training.
    '''
    b_shape        = (output_size,)
    b_intial_value = tf.zeros(b_shape)
    self.b         = tf.Variable(b_intial_value)

  # Define how the layer is called on inputs — performs the forward pass
  def __call__(self, inputs):
    return self.activation(tf.matmul(inputs, self.W) + self.b)

  # Provide a convenient way to access the layer's weights and biases
  # \@property allows the functions to act similar to a variable
  @property
  def weights(self):
    return [self.W, self.b]


In [5]:
class NaiveSequential:
  # Intialize thw self.layers object for further automation
  def __init__(self, layers):
    self.layers = layers

  '''
  Defines how the model handles input data.

  When the model is called with input data, it passes the data sequentially
  through each NaiveDense layer by invoking that layer’s __call__ method.

  Each layer takes in the input, performs its own weight and bias
  computation (via NaiveDense.__call__), and passes the result to the next layer.

  The final output after all layers is returned.
  '''
  def __call__(self, inputs):
    x = inputs
    for layer in self.layers:
      x = layer(x)
    return x

  '''
  Collects and returns all the weights and biases from each NaiveDense layer.

  Each NaiveDense instance has a `weights` property that returns a list
  containing its weight and bias tensors. This method gathers all such
  lists into a single flat list across all layers in the model.
  '''
  @property
  def weights(self):
    weights = []
    for layer in self.layers:
      weights += layer.weights
    return weights

In [6]:
# Call the sequential models this will create all the weights for all the layers.
model = NaiveSequential([
    NaiveDense(input_size = 28 * 28, output_size = 512, activation = tf.nn.relu),
    NaiveDense(input_size = 512, output_size = 10, activation = tf.nn.softmax)
])

assert len(model.weights) == 4

In [7]:
import math

'''
The BatchGenerator class splits the dataset into smaller, manageable batches
for training. This batching process is a common preprocessing step in machine
learning that enables efficient computation and improves training performance
by allowing multiple iterations (batches) over the data in each epoch.
'''

class BatchGenerator:
  # Intialize relevant objects for calculating batch sizes
  def __init__(self, images, labels, batch_size=128):
    assert len(images) == len(labels)
    self.index      = 0
    self.images     = images
    self.labels     = labels
    self.batch_size = batch_size
    self.batch_num  = math.ceil(len(images) / self.batch_size)

  # Return the next batch of images and labels
  def next(self):
    images = self.images[self.index : self.index+self.batch_size]
    labels = self.labels[self.index : self.index+self.batch_size]
    self.index += self.batch_size
    return images, labels

In [17]:
'''
The `one_training_step` function performs a single forward and backward pass:

1. It uses the model to make predictions on a batch of input images.
2. It computes the per-sample loss between the predicted and true labels using
   sparse categorical crossentropy.
3. It averages these losses to get a single scalar loss value representing
   how well the model performed on the entire batch.
4. It then calculates the gradients (partial derivatives) of this average loss
   with respect to each of the model's weights.

These gradients indicate how much each weight contributed to the loss and
are used for updating the model.

The `update_weights` function (defined elsewhere) applies these gradients
to the model's weights using an optimization algorithm such as gradient descent.
'''

def one_training_step(model, images_batch, labels_batch):
  with tf.GradientTape() as tape:
    predictions       = model(images_batch)
    per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
    average_loss      = tf.reduce_mean(per_sample_losses)
  gradients = tape.gradient(average_loss, model.weights)
  update_weights(gradients, model.weights)
  return average_loss

In [12]:
'''
The update_weights function represents the final step of backpropagation.
It defines and applies the gradient descent update rule by adjusting each model weight
in the direction that minimizes the loss. This is done by subtracting the product of the
gradient and the learning rate from each corresponding weight.
'''

learning_rate = 1e-3

def update_weights(gradients, weights):
  for g,w in zip(gradients, weights):
    w.assign_sub(g * learning_rate)

In [15]:
'''
The fit function performs the training loop. It iterates over the specified number
of epochs, and within each epoch, it loops through the dataset in batches.
For each batch, it trains the model and prints the loss at regular intervals.
'''


def fit(model, images, labels, epochs, batch_size=128):
  for epoch_counter in range(epochs):
    print(f'Epoch no. {epoch_counter}')

    # Create an instance of the BatchGenerator class.
    batch_generator = BatchGenerator(images, labels)
    for batch_counter in range(batch_generator.batch_num):
      batch_images, batch_labels = batch_generator.next()
      loss = one_training_step(model, batch_images, batch_labels)
      if batch_counter % 100 == 0:
        print(f'loss at batch {batch_counter} is {loss:.2f}')

In [18]:
from tensorflow.keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch no. 0
loss at batch 0 is 3.87
loss at batch 100 is 2.22
loss at batch 200 is 2.17
loss at batch 300 is 2.08
loss at batch 400 is 2.23
Epoch no. 1
loss at batch 0 is 1.90
loss at batch 100 is 1.87
loss at batch 200 is 1.79
loss at batch 300 is 1.70
loss at batch 400 is 1.84
Epoch no. 2
loss at batch 0 is 1.57
loss at batch 100 is 1.57
loss at batch 200 is 1.47
loss at batch 300 is 1.42
loss at batch 400 is 1.52
Epoch no. 3
loss at batch 0 is 1.31
loss at batch 100 is 1.33
loss at batch 200 is 1.21
loss at batch 300 is 1.20
loss at batch 400 is 1.28
Epoch no. 4
loss at batch 0 is 1.11
loss at batch 100 is 1.15
loss at batch 200 is 1.02
loss at batch 300 is 1.04
loss at batch 400 is 1.11
Epoch no. 5
loss at batch 0 is 0.96
loss at batch 100 is 1.01
loss at batch 200 is 0.89
loss at batch 300 is 0.92
loss at batch 400 is 0.99
Epoch no. 6
loss at batch 0 is 0.86
loss at batch 100 is 0.91
loss at batch 200 is 0.79
loss at batch 300 is 0.83
loss at batch 400 is 0.90
Epoch no. 7
loss at 

In [19]:
import numpy as np

predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.82
