In [None]:
import tensorflow as tf
import numpy as np


In [None]:
height = 28
width = 28
channels = 1
n_inputs = height * width

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
model = tf.keras.Sequential((
    tf.keras.layers.Conv2D(
        filters=32,
        kernel_size=3,
        strides=1,
        padding="same",
        activation=tf.nn.relu,
        name="conv1"
    ),
    tf.keras.layers.MaxPool2D(
        
    ),
    tf.keras.layers.Conv2D(
        filters=64,
        kernel_size=3,
        strides=2,
        padding="same",
        activation=tf.nn.relu,
        name="conv2"
    ),
    tf.keras.layers.MaxPool2D(
        pool_size=(2, 2),
        strides=(2, 2),
        padding="valid"
    ),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        units=64,
        activation="relu",
        name="fc1"
    ),
    tf.keras.layers.Dense(
        units=10,
        activation="softmax",
        name="outputs"
    )
))
model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7f3db80b5780>

In [None]:
def shuffle_batch_cnn(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size

    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch.reshape(-1, height, width, channels), y_batch

learning_rate = 0.01

def loss_sparse(labels, logits):
    return tf.reduce_mean(tf.losses.sparse_categorical_crossentropy(labels, logits))

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

def run_training(X, y):
    with tf.GradientTape() as g:
        pred = model(X)
        loss = loss_sparse(labels=y, logits=pred)

    gradients = g.gradient(loss, model.trainable_variables)
    # print("Gradient ", gradients)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss


In [None]:
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.build(input_shape=(1, height, width, channels))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (1, 28, 28, 32)           320       
_________________________________________________________________
conv2 (Conv2D)               (1, 14, 14, 64)           18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (1, 7, 7, 64)             0         
_________________________________________________________________
flatten_2 (Flatten)          (1, 3136)                 0         
_________________________________________________________________
fc1 (Dense)                  (1, 64)                   200768    
_________________________________________________________________
outputs (Dense)              (1, 10)                   650       
Total params: 220,234
Trainable params: 220,234
Non-trainable params: 0
________________________________________________

In [None]:
num_epochs = 20
batch_size = 64

for epoch in range(num_epochs):
    loss = 10
    for X_batch, y_batch in shuffle_batch_cnn(X_train, y_train, batch_size=batch_size):
        loss = run_training(X_batch, y_batch)
    print('Epoch %d Loss %.4f' % (epoch + 1, loss))


Epoch 1 Loss 0.0139
Epoch 2 Loss 0.0840
Epoch 3 Loss 0.0331
Epoch 4 Loss 0.0370
Epoch 5 Loss 0.0124
Epoch 6 Loss 0.0034
Epoch 7 Loss 0.0045
Epoch 8 Loss 0.1178
Epoch 9 Loss 0.0171
Epoch 10 Loss 0.1187
Epoch 11 Loss 0.0025
Epoch 12 Loss 0.0054
Epoch 13 Loss 0.0009
Epoch 14 Loss 0.0622
Epoch 15 Loss 0.0000
Epoch 16 Loss 0.0376
Epoch 17 Loss 0.1988
Epoch 18 Loss 0.0004
Epoch 19 Loss 0.0357
Epoch 20 Loss 0.0175


In [None]:
acc_batch = model.evaluate(X_batch, y_batch)
acc_test = model.evaluate(X_valid.reshape(-1, height, width, channels), y_valid)
print("Batch accuracy:", acc_batch, "Test accuracy:", acc_test)

Batch accuracy: [0.009051492437720299, 1.0] Test accuracy: [0.17800623178482056, 0.982200026512146]


In [None]:
tf.__version__


'2.4.0'