In [None]:
#import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
#data prep
ds = tfds.load('mnist', split='train')

ds = ds.map(lambda feature_dict: feature_dict['image'], feature['label'])
ds = ds.map(lambda image, label: (tf.reshape(image,(-1)), label))
ds = ds.map(lambda image, label: ((tf.cast(image,tf.float32)/128)-1, label))
ds = ds.map(lambda image, label: (image, tf.F.one_hot(label, depth= 10)))
ds = ds.shuffle(1024).batch(128)
ds = ds.prefetch(4)

for x,y in ds.take(1):
    print(x,y)

In [None]:
#Model Creation via Subclassing from tf.keras.Model
class MLP_Model(tf.keras.Model):
    def __init__ (self, layer_sizes, output_size=10):
        super().__init__()
        self.mlp_layers = []
        #layer_sizes e. g. [256,256]
        for layer_size in layer_sizes:
            new_layer = tf.keras.layers.Dense(units = layer_size, activation='sigmoid')
            self.mlp_layers.append(new_layer)
        self.output_layer = tf.keras.layer.Dense(units = output_size, activation='softmax')

    def call(self, x):
        for layer in self.mlp_layers:
            x = layer(x)
        return self.output_layer(x)


In [None]:
EPOCHS = 10

#Training
model = MLP_Model(layer_sizes=(256,256))
cce = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.02)
ds = ds

for epoch in range( EPOCHS):
    for x, target in ds:
        #blackmagic
        #open a context manager
        losses = []
        with tf.GradientTape() as tape:
            pred = model.call(x)
            loss = cce(target, pred)
        
        gradients = tape.gardient(loss, model.variables)
        optimizer.apply_gradients(zip(gradients, model.variables))
        losses.append(loss.numpy())
    print(np.mean(losses))
