In [None]:
"""
Removed the unnecessary activation funtion
activation=tf.nn.softmax
"""

In [2]:
import tensorflow as tf
import os
from datasets import MNISTDataset
import time
logdir = os.path.join("logs", "linear" + str(time.time()))
train_writer = tf.summary.create_file_writer(os.path.join(logdir, "train"))
test_writer = tf.summary.create_file_writer(os.path.join(logdir, "test"))

# get the data
(train_imgs, train_lbls), (test_imgs, test_lbls) = tf.keras.datasets.mnist.load_data()
mnist = MNISTDataset(train_imgs.reshape((-1, 784)), train_lbls,
                     test_imgs.reshape((-1, 784)), test_lbls,
                     batch_size=256, seed=int(time()))


# define the model first, from input to output

# let's use fewer layers...
n_units = 100
n_layers = 2

# just set up a "chain" of hidden layers
layers = []
for layer in range(n_layers):
    layers.append(tf.keras.layers.Dense(
        n_units, activation=tf.nn.relu,
        kernel_initializer=tf.initializers.RandomUniform(minval=-0.01,
                                                         maxval=0.01),
        bias_initializer=tf.initializers.constant(0.001)))

# finally add the softmax output layer :))
layers.append(tf.keras.layers.Dense(
    10,
    kernel_initializer=tf.initializers.RandomUniform(minval=-0.01,
                                                     maxval=0.01)))

lr = 0.1
for step in range(2000):
    img_batch, lbl_batch = mnist.next_batch()
    with tf.GradientTape() as tape:
        # here we just run all the layers in sequence via a for-loop
        out = img_batch
        for layer in layers:
            out = layer(out)
        xent = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=out, labels=lbl_batch))

    weights = [var for l in layers for var in l.trainable_variables]
    grads = tape.gradient(xent, weights)
    for grad, var in zip(grads, weights):
        var.assign_sub(lr*grad)
    i=0        
    with train_writer.as_default():
        tf.summary.scalar("loss", xent, step=step)
        tf.summary.histogram("logits", out, step=step)
        for W in weights:
            tf.summary.histogram("weights"+str(i), W, step=step)
            i+=1
    tf.summary.scalar("Loss", xent, step=step)
    if not step % 100:
        preds = tf.argmax(out, axis=1, output_type=tf.int32)
        acc = tf.reduce_mean(tf.cast(tf.equal(preds, lbl_batch), tf.float32))
        print("Loss: {} Accuracy: {}".format(xent, acc))
        with train_writer.as_default():
            tf.summary.scalar("Trainaccuracy", acc, step=step)
            tf.summary.image("input", tf.reshape(img_batch, [-1, 28, 28, 1]), step=step)

out = mnist.test_data
for layer in layers:
    out = layer(out)
test_preds = tf.argmax(out, axis=1, output_type=tf.int32)
acc = tf.reduce_mean(tf.cast(tf.equal(test_preds, mnist.test_labels), tf.float32))
with test_writer.as_default():
    tf.summary.scalar("Testaccuracy", acc, step=step)
print("Final test accuracy: {}".format(acc))


Loss: 2.3025732040405273 Accuracy: 0.078125
Loss: 2.2994253635406494 Accuracy: 0.1328125
Loss: 2.301631450653076 Accuracy: 0.109375
Starting new epoch...
Loss: 2.2971246242523193 Accuracy: 0.13671875
Loss: 2.300426483154297 Accuracy: 0.125
Starting new epoch...
Loss: 2.0981760025024414 Accuracy: 0.265625
Loss: 1.1062638759613037 Accuracy: 0.61328125
Loss: 0.6072927713394165 Accuracy: 0.7890625
Starting new epoch...
Loss: 0.6619802713394165 Accuracy: 0.80859375
Loss: 0.5937100648880005 Accuracy: 0.83203125
Starting new epoch...
Loss: 0.4487733840942383 Accuracy: 0.86328125
Loss: 0.5037652254104614 Accuracy: 0.84765625
Starting new epoch...
Loss: 0.532645583152771 Accuracy: 0.828125
Loss: 0.44039982557296753 Accuracy: 0.86328125
Loss: 0.4009498655796051 Accuracy: 0.88671875
Starting new epoch...
Loss: 0.3070647716522217 Accuracy: 0.91796875
Loss: 0.27986472845077515 Accuracy: 0.91015625
Starting new epoch...
Loss: 0.26541846990585327 Accuracy: 0.92578125
Loss: 0.3387908637523651 Accuracy