In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
mnist= tf.keras.datasets.mnist.load_data()

In [3]:
input_width = 28
input_height = 28
input_channels = 1
input_pixels = 784

n_conv1 = 32
n_conv2 = 64

stride_conv1 = 1
stride_conv2 = 1
conv1_k = 5
conv2_k = 5

max_pool1_k = 2
max_pool2_k = 2

n_hidden = 1024
n_out = 10

input_size_to_hidden = input_width//(max_pool1_k*max_pool2_k) * input_height//(max_pool1_k*max_pool2_k) * n_conv2

In [4]:
weights = {
    # weights for conv layers
    "wc1" : tf.Variable(tf.random.normal([conv1_k, conv1_k, input_channels, n_conv1])),
    "wc2" : tf.Variable(tf.random.normal([conv2_k, conv2_k, n_conv1, n_conv2])),
    # weights for hidden layers
    "wh1" : tf.Variable(tf.random.normal([input_size_to_hidden, n_hidden])),
    # weights for output layers
    "wo" : tf.Variable(tf.random.normal([n_hidden, n_out]))
}

biases = {
    "bc1" : tf.Variable(tf.random.normal([n_conv1])),
    "bc2" : tf.Variable(tf.random.normal([n_conv2])),
    "bh1" : tf.Variable(tf.random.normal([n_hidden])),
    "bo" : tf.Variable(tf.random.normal([n_out]))
}

In [5]:
def conv(x, weights, bias, strides=1):   
    out = tf.nn.conv2d(x, weights, padding='SAME', strides= [1, strides, strides, 1])
    out = tf.nn.bias_add(out, bias)
    out = tf.nn.relu(out)
    return out

def maxpooling(x,k=2):
    return tf.nn.max_pool(x, padding='SAME', ksize=[1,k,k,1], strides=[1,k,k,1])

In [6]:
def cnn(x, weights, biases, dropout):
    # reshape images to nx28x28x1
    x = tf.reshape(x, shape = [-1 , input_height, input_width, input_channels])

    # passing to conv layer 1 
    conv1 = conv(x, weights["wc1"], biases["bc1"], stride_conv1)
    # passing through max pooling layer 1
    conv1_pool = maxpooling(conv1, max_pool1_k)

    # passing to conv layer 2 
    conv2 = conv(conv1_pool, weights["wc2"], biases["bc2"], stride_conv2)
    # passing through max pooling layer 2
    conv2_pool = maxpooling(conv2, max_pool2_k)

    # passing to dense layer
    hidden_input = tf.reshape(conv2_pool, shape = [-1, input_size_to_hidden])
    hidden_output_before_activation = tf.add(tf.matmul(hidden_input, weights["wh1"]), biases["bh1"])
    hidden_output_before_droput = tf.nn.relu(hidden_output_before_activation)

    # passing through droupout layer
    # dropout _rate  = 1-keep_prob
    hidden_output = tf.nn.dropout(hidden_output_before_droput, dropout)
    # output layer
    output = tf.add(tf.matmul(hidden_output, weights["wo"]), biases["bo"])

    return output

In [7]:
def loss_fn(predictions, y):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels = y))
    return cost

In [8]:
def model(x, dropout):
    return cnn(x, weights, biases, dropout)

In [9]:
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        predictions = model(inputs, dropout = 0.2)
        loss = loss_fn(predictions, labels)
    gradients = tape.gradient(loss, list(weights.values()) + list(biases.values()))
    optimizer.apply_gradients(zip(gradients, list(weights.values()) + list(biases.values())))
    return loss

In [10]:
optimizer = tf.optimizers.Adam(learning_rate=0.01)

In [11]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train = x_train.reshape(-1, input_pixels).astype('float32') / 255.0
x_test = x_test.reshape(-1, input_pixels).astype('float32') / 255.0

y_train = tf.one_hot(y_train, n_out)
y_test = tf.one_hot(y_test, n_out)

In [12]:
batch_size = 100
for i in range(25):
    num_batches = int(len(x_train)/batch_size)
    total_cost = 0
    for j in range(num_batches):
        batch_x = x_train[i*batch_size : (i+1)*batch_size]
        batch_y = y_train[i*batch_size : (i+1)*batch_size]
        loss = train_step(batch_x, batch_y)
        total_cost += loss
    print(f"Epoch {i+1}, Loss: {total_cost/num_batches}")
    print(total_cost)

Epoch 1, Loss: 988.6290893554688
tf.Tensor(593177.44, shape=(), dtype=float32)
Epoch 2, Loss: 17.55411148071289
tf.Tensor(10532.467, shape=(), dtype=float32)
Epoch 3, Loss: 10.601325988769531
tf.Tensor(6360.7954, shape=(), dtype=float32)
Epoch 4, Loss: 5.649898052215576
tf.Tensor(3389.939, shape=(), dtype=float32)
Epoch 5, Loss: 7.158421993255615
tf.Tensor(4295.053, shape=(), dtype=float32)
Epoch 6, Loss: 5.609836101531982
tf.Tensor(3365.9016, shape=(), dtype=float32)
Epoch 7, Loss: 6.1315999031066895
tf.Tensor(3678.96, shape=(), dtype=float32)
Epoch 8, Loss: 6.785881996154785
tf.Tensor(4071.5293, shape=(), dtype=float32)
Epoch 9, Loss: 7.912781715393066
tf.Tensor(4747.669, shape=(), dtype=float32)
Epoch 10, Loss: 7.835995197296143
tf.Tensor(4701.597, shape=(), dtype=float32)
Epoch 11, Loss: 10.647860527038574
tf.Tensor(6388.7163, shape=(), dtype=float32)
Epoch 12, Loss: 4.819756507873535
tf.Tensor(2891.854, shape=(), dtype=float32)
Epoch 13, Loss: 9.837686538696289
tf.Tensor(5902.612,

In [13]:
predictions = model(x_test, dropout = 0.0)
correct_predictions = tf.equal(tf.argmax(predictions, 1), tf.argmax(y_test, 1))
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
print(f"Test Accuracy: {accuracy.numpy()}")

Test Accuracy: 0.9280999898910522
