In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [3]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers.legacy import Adam

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train_div = x_train / 255
x_test_div = x_test / 255

y_train_cat = to_categorical(y_train, 10)
y_test_cat = to_categorical(y_test, 10)

In [5]:
class Conv2D_(tf.Module):
    def __init__(self, kernel=(3, 3), filters=1, strides=(1, 1), padding='VALID', activation='relu'):
        super().__init__()
        self.kernel = kernel
        self.filters = filters
        self.strides = strides
        self.padding = padding
        self.activation = activation
        self.fl_init = False
    def __call__(self, x):
        if not self.fl_init:
            self.w = tf.random.truncated_normal(shape=(*self.kernel, x.shape[-1], self.filters),
                                                mean=0.0, stddev=0.5, dtype=tf.float32, name='w')
            self.b = tf.zeros(shape=(self.filters,), dtype=tf.float32, name='b')
            
            self.w = tf.Variable(self.w)
            self.b = tf.Variable(self.b)
            self.fl_init = True
        
        y = tf.nn.conv2d(x, self.w, strides=(1, *self.strides, 1), padding=self.padding) + self.b
        
        if self.activation == 'relu':
            return tf.nn.relu(y)
        elif self.activation == 'softmax':
            return tf.nn.softmax(y)
        return y

In [7]:
class Dense_(tf.Module):
    def __init__(self, outputs, activate="relu"):
        super().__init__()
        self.outputs = outputs
        self.activate = activate
        self.fl_init = False

    def __call__(self, x):
        if not self.fl_init:
            self.w = tf.random.truncated_normal((x.shape[-1], self.outputs), stddev=0.1, dtype=tf.float32, name="w")
            self.b = tf.zeros([self.outputs], dtype=tf.float32, name="b")

            self.w = tf.Variable(self.w)
            self.b = tf.Variable(self.b)

            self.fl_init = True

        y = x @ self.w + self.b

        if self.activate == "relu":
            return tf.nn.relu(y)
        elif self.activate == "softmax":
            return tf.nn.softmax(y)

        return y

In [8]:
class Flatten_(tf.Module):
    def __init__(self):
        super().__init__()
    def __call__(self, inputs):
        return tf.reshape(inputs, shape=(inputs.shape[0], -1))

In [9]:
class MaxPooling_(tf.Module):
    def __init__(self, pool_size=(2, 2), strides=(1, 1), padding='VALID'):
        super().__init__()
        self.pool_size = pool_size
        self.strides = strides
        self.padding = padding
    def __call__(self, inputs):
        y = tf.nn.max_pool2d(input=inputs, 
                             ksize=self.pool_size, 
                             strides=self.strides, 
                             padding=self.padding)
        return y

In [10]:
class Model_(tf.keras.Model):
    def __init__(self, name='T-Rex'):
        super().__init__(name=name)
        self.layer1 = Conv2D_((3, 3), 32, (1, 1), 'SAME')
        self.layer2 = Conv2D_((3, 3), 32, (1, 1), 'SAME')
        self.layer3 = MaxPooling_(pool_size=(2, 2), strides=(1, 1), padding='SAME')
        self.layer4 = Conv2D_((3, 3), 64, (1, 1), 'SAME')
        self.layer5 = Conv2D_((3, 3), 64, (1, 1), 'SAME')
        self.layer6 = MaxPooling_(pool_size=(2, 2), strides=(1, 1), padding='SAME')
        self.layer7 = Flatten_()
        self.layer8 = Dense_(10, 'softmax')
        
    def call(self, inputs):
        inputs = tf.expand_dims(inputs, axis=-1)
        y = self.layer1(inputs)
        y = self.layer2(y)
        y = self.layer3(y)
        y = self.layer4(y)
        y = self.layer5(y)
        y = self.layer6(y)
        y = self.layer7(y)
        y = self.layer8(y)
        return y

In [11]:
EPOCHS = 10
TOTAL = x_train_div.shape[0]
BATCH_SIZE = 32
BATCH_EPOCHS = TOTAL // BATCH_SIZE

batch_dataset = tf.data.Dataset.from_tensor_slices((x_train_div, y_train_cat))
batch_dataset = batch_dataset.shuffle(buffer_size=1024).batch(BATCH_SIZE)

opt = Adam(learning_rate=0.0018)
cross_entropy = lambda y_true, y_pred: tf.reduce_mean(tf.losses.categorical_crossentropy(y_true, y_pred))

model = Model_()

@tf.function
def f_loss(x_batch, y_batch):
    with tf.GradientTape(watch_accessed_variables=True, persistent=False) as tape:
        loss = cross_entropy(y_batch, model(x_batch))
    grads = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients(zip(grads, model.trainable_variables))
    return loss

In [13]:
for n in range(EPOCHS):
    loss = 0
    cat_accuracy = 0
    passed = 0
    for x_batch, y_batch in batch_dataset:
        loss += f_loss(x_batch, y_batch)
        ac = tf.keras.metrics.CategoricalAccuracy()
        ac.update_state(y_batch, model(x_batch))
        cat_accuracy = ac.result().numpy()
        passed += 1
        print(f'Epoch: {n+1} |{passed}/{BATCH_EPOCHS}| loss: {loss}, bacategorical_accuracy: {cat_accuracy}',\
              end='\r')
    print(f'Epoch: {n+1}| epoch_loss: {loss}, epoch_categorical_accuracy: {cat_accuracy};                               ')

Epoch: 1| epoch_loss: 2048.406005859375, epoch_categorical_accuracy: 0.96875;                               
Epoch: 2| epoch_loss: 1289.505615234375, epoch_categorical_accuracy: 1.0;                               
Epoch: 3| epoch_loss: 906.9053344726562, epoch_categorical_accuracy: 1.0;                               
Epoch: 4| epoch_loss: 597.3038330078125, epoch_categorical_accuracy: 1.0;                               
Epoch: 5| epoch_loss: 408.99932861328125, epoch_categorical_accuracy: 1.0;                               
Epoch: 6| epoch_loss: 309.1285400390625, epoch_categorical_accuracy: 1.0;                               
Epoch: 7| epoch_loss: 255.37704467773438, epoch_categorical_accuracy: 1.0;                               
Epoch: 8| epoch_loss: 179.13046264648438, epoch_categorical_accuracy: 1.0;                               
Epoch: 9| epoch_loss: 164.32870483398438, epoch_categorical_accuracy: 1.0;                               
Epoch: 10| epoch_loss: 124.21366119384766, epoc

In [14]:
ac = tf.keras.metrics.CategoricalAccuracy()
ac.update_state(y_test_cat, model(x_test_div))
test_categorical_accuracy = ac.result().numpy()

mean_test_loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_test_cat, model(x_test_div)))

print(f'Mean_test_loss: {mean_test_loss}, test_categorical_accuracy: {test_categorical_accuracy}')

Mean_test_loss: 0.13927502930164337, test_categorical_accuracy: 0.9818000197410583
