In [35]:
import math
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import optimizers

In [3]:
class Dense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation

        w_shape = (input_size, output_size)
        # initialize our weights
        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
        # make them mutable
        self.W = tf.Variable(w_initial_value)

        b_shape = (output_size,)
        # initialize our biases
        b_initial_value = tf.zeros(b_shape)
        # make them mutable
        self.b = tf.Variable(b_initial_value)

    def __call__(self, inputs):
        # matmul is the dot product in tf
        return self.activation(tf.matmul(inputs, self.W) + self.b)

    @property
    def weights(self):
        return [self.W, self.b]

In [21]:
class Sequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x

    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights

In [18]:
class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)

    def next(self):
        images = self.images[self.index: self.index + self.batch_size]
        labels = self.labels[self.index: self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

In [10]:
optimizer = optimizers.SGD(learning_rate=1e-3)
def update_weights(gradients, weights):
    optimizer.apply_gradients(zip(gradients, weights))

In [32]:
def training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        loss = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        avg_loss = tf.reduce_mean(loss)
    gradients = tape.gradient(avg_loss, model.weights)
    update_weights(gradients, model.weights)
    return avg_loss

In [33]:
def fit(model, images, labels, epochs, batch_size=128):
    for epoch in range(epochs):
        print(f"Epoch {epoch}")
        batch_generator = BatchGenerator(images, labels, batch_size=batch_size)
        for batch in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = training_step(model, images_batch, labels_batch)
            if batch % 100 == 0:
                print(f"Batch {batch}, loss {loss:.2f}")

In [27]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [28]:
train_images.shape[0]

60000

In [29]:
train_images = train_images.reshape(train_images.shape[0], 28 * 28)
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape(test_images.shape[0], 28 * 28)
test_images = test_images.astype("float32") / 255

In [30]:
model = Sequential([
    Dense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
    Dense(input_size=512, output_size=10, activation=tf.nn.softmax),
])

In [23]:
len(model.weights)

4

In [36]:
fit(model, train_images, train_labels, epochs=50, batch_size=128)

Epoch 0
Batch 0, loss 0.64
Batch 100, loss 0.66
Batch 200, loss 0.57
Batch 300, loss 0.63
Batch 400, loss 0.71
Epoch 1
Batch 0, loss 0.61
Batch 100, loss 0.62
Batch 200, loss 0.54
Batch 300, loss 0.60
Batch 400, loss 0.68
Epoch 2
Batch 0, loss 0.58
Batch 100, loss 0.59
Batch 200, loss 0.51
Batch 300, loss 0.57
Batch 400, loss 0.66
Epoch 3
Batch 0, loss 0.56
Batch 100, loss 0.56
Batch 200, loss 0.48
Batch 300, loss 0.55
Batch 400, loss 0.64
Epoch 4
Batch 0, loss 0.53
Batch 100, loss 0.53
Batch 200, loss 0.46
Batch 300, loss 0.53
Batch 400, loss 0.62
Epoch 5
Batch 0, loss 0.52
Batch 100, loss 0.51
Batch 200, loss 0.45
Batch 300, loss 0.51
Batch 400, loss 0.61
Epoch 6
Batch 0, loss 0.50
Batch 100, loss 0.49
Batch 200, loss 0.43
Batch 300, loss 0.50
Batch 400, loss 0.59
Epoch 7
Batch 0, loss 0.49
Batch 100, loss 0.48
Batch 200, loss 0.42
Batch 300, loss 0.48
Batch 400, loss 0.58
Epoch 8
Batch 0, loss 0.47
Batch 100, loss 0.46
Batch 200, loss 0.40
Batch 300, loss 0.47
Batch 400, loss 0.57
E

In [39]:
predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"Accuracy: {matches.mean():.2f}")

Accuracy: 0.90
