In [2]:
import tensorflow as tf

In [3]:
x = tf.Variable(0.)
with tf.GradientTape() as tape:
    y = 2*x+3
gradient_y_wrt_x = tape.gradient(y,x)

In [4]:
gradient_y_wrt_x

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>

In [5]:
class Naive_Dense:
        def __init__(self,input_size, output_size, activation):
                self.activation  = activation
                w_shape = (input_size,output_size)
                w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
                self.W = tf.Variable(w_initial_value)
                b_shape = (output_size,)
                b_initial_value = tf.zeros(b_shape)
                self.b = tf.Variable(b_initial_value)
        def __call__(self,inputs):
                return self.activation(tf.matmul(inputs,self.W)+self.b)
        @property
        def weights(self):
                return [self.W, self.b]
        
        
        



In [6]:
class NaiveSequential:
        def __init__(self,layers):
                self.layers = layers
        def __call__(self,inputs):
                x = inputs
                for layer in self.layers:
                    x = layer(x)
                return x
        #decorator to add setter, getter functionalities to weights in a pythonic oop way
        @property
        def weights(self):
            weights = []
            for layer in self.layers:
                weights += layer.weights
            return weights

In [25]:
model = NaiveSequential([
 Naive_Dense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
 Naive_Dense(input_size=512, output_size=10, activation=tf.nn.softmax)
]) 
assert len(model.weights) == 4

In [17]:
#a batch generator
import math
class Batch_Generator:
        def __init__(self,images,labels, batch_size=128):
                assert len(images) == len(labels)
                self.index = 0
                self.labels = labels
                self.images = images
                self.batch_size = batch_size
                self.num_batches = math.ceil(len(images)/batch_size)
        def next(self):
                images  = self.images[self.index : self.index + self.batch_size]
                labels = self.labels[self.index : self.index + self.batch_size]
                self.index += self.batch_size
                return images,labels

In [19]:
def one_training_step(model,images_batch, labels_batch):
        with tf.GradientTape() as tape:
                predictions = model(images_batch)
                per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch,predictions)
                average_loss = tf.reduce_mean(per_sample_losses)
        gradients = tape.gradient(average_loss, model.weights)
        update_weights(gradients, model.weights)
        return average_loss

In [20]:
learning_rate = 1e-3
def update_weights(gradients, weights):
        for g,w in zip(gradients, weights):
                w.assign_sub(g * learning_rate)

In [21]:
#entire epoch of training
def fit(model, images, labels, epochs, batch_size=128):
        for epoch_counter in range(epochs):
                batch_generator = Batch_Generator(images, labels,batch_size)
                for batch_counter in range(batch_generator.num_batches):
                        images_batch, labels_batch = batch_generator.next()
                        loss = one_training_step(model,images_batch, labels_batch)
                        if batch_counter % 100 == 0:
                                print(f"at batch number {batch_counter}: loss = {loss}")
                
                

In [22]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [23]:
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255 
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

In [26]:
fit(model, train_images, train_labels, epochs=10, batch_size=128)

at batch number 0: loss = 8.990598678588867
at batch number 100: loss = 2.2354736328125
at batch number 200: loss = 2.212531566619873
at batch number 300: loss = 2.070934295654297
at batch number 400: loss = 2.2353968620300293
at batch number 0: loss = 1.8969990015029907
at batch number 100: loss = 1.874892234802246
at batch number 200: loss = 1.827032208442688
at batch number 300: loss = 1.6866568326950073
at batch number 400: loss = 1.8382327556610107
at batch number 0: loss = 1.5721595287322998
at batch number 100: loss = 1.574385166168213
at batch number 200: loss = 1.5014338493347168
at batch number 300: loss = 1.4053261280059814
at batch number 400: loss = 1.5099635124206543
at batch number 0: loss = 1.3175833225250244
at batch number 100: loss = 1.336580514907837
at batch number 200: loss = 1.239443302154541
at batch number 300: loss = 1.1913337707519531
at batch number 400: loss = 1.2690908908843994
at batch number 0: loss = 1.1200611591339111
at batch number 100: loss = 1.1558

In [27]:
import numpy as np
predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy : {matches.mean(): .2f}")

accuracy :  0.82
