In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets
from collections import namedtuple
import numpy as np

2024-05-31 23:04:25.046668: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
def residual_block(inputs, num_filters, bottleneck_size, activation=tf.nn.relu):
    """Creates a residual block with bottleneck layers."""
    shortcut = layers.Conv2D(num_filters, (1, 1), activation=None, padding='valid')(inputs)
    x = layers.Conv2D(bottleneck_size, (1, 1), activation=activation, padding='valid')(inputs)
    x = layers.Conv2D(bottleneck_size, (3, 3), activation=activation, padding='same')(x)
    x = layers.Conv2D(num_filters, (1, 1), activation=None, padding='valid')(x)
    x = layers.add([x, shortcut])
    x = layers.Activation(activation)(x)
    return x

In [8]:
def residual_network(input_shape, n_outputs, activation=tf.nn.relu):
    """Builds a residual network."""
    LayerBlock = namedtuple('LayerBlock', ['num_repeats', 'num_filters', 'bottleneck_size'])
    blocks = [LayerBlock(3, 128, 32),
              LayerBlock(3, 256, 64),
              LayerBlock(3, 512, 128),
              LayerBlock(3, 1024, 256)]
    
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Conv2D(64, (7, 7), activation=activation, padding='same', strides=(2, 2))(inputs)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    for block_i, block in enumerate(blocks):
        for repeat_i in range(block.num_repeats):
            x = residual_block(x, block.num_filters, block.bottleneck_size, activation)
        if block_i < len(blocks) - 1:
            x = layers.Conv2D(blocks[block_i + 1].num_filters, (1, 1), padding='same', strides=(2, 2), activation=None)(x)
    
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(n_outputs, activation='softmax')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [9]:
def preprocess_data(x, y):
    x = x.reshape((-1, 28, 28, 1)).astype('float32') / 255.0
    y = tf.keras.utils.to_categorical(y, 10)
    return x, y

In [10]:
def test_mnist():
    """Test the resnet on MNIST."""
    (x_train, y_train), (x_valid, y_valid) = datasets.mnist.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)
    
    model = residual_network(input_shape=(28, 28, 1), n_outputs=10)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    model.fit(x_train, y_train, batch_size=50, epochs=5, validation_data=(x_valid, y_valid))
    model.evaluate(x_valid, y_valid, verbose=2)

In [None]:
if __name__ == '__main__':
    test_mnist()

Epoch 1/5
Epoch 2/5
 259/1200 [=====>........................] - ETA: 5:33 - loss: 0.1774 - accuracy: 0.9537