In [1]:
#Import libraries
import tensorflow as tf
import numpy as np

In [2]:
print("TensorFlow Version:" , tf.__version__)
print("Numpy Version:" , np.__version__)

TensorFlow Version: 2.3.1
Numpy Version: 1.19.2


In [3]:
print(tf.version.GIT_VERSION, tf.version.VERSION)

unknown 2.3.1


In [4]:
#Load MNIST dataset
mnist = tf.keras.datasets.mnist

In [5]:
#Do train, test split on the dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [6]:
#Convert to [0.0,1.0] range
x_train, x_test = x_train / 255.0, x_test / 255.0

In [7]:
x_train.shape, x_test.shape

((60000, 28, 28), (10000, 28, 28))

In [8]:
np.unique(y_train), np.unique(y_test)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8),
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8))

In [9]:
#Build Sequential model
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)), #28*28
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10)
])

In [10]:
#Model summary print all layers except input & output layers
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                650       
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________


In [11]:
#Print shape of the weights
[i.shape for i in model.weights]

[TensorShape([784, 128]),
 TensorShape([128]),
 TensorShape([128, 64]),
 TensorShape([64]),
 TensorShape([64, 10]),
 TensorShape([10])]

In [12]:
#Total Weights
np.sum([np.prod(i.shape) for i in model.weights])

109386

In [13]:
# Print shape of 1st row of data
x_train[:1].shape #x_train[0].shape -> (28, 28)

(1, 28, 28)

In [14]:
pred = model(x_train[:1])
pred

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[-0.2812133 , -0.5844016 ,  0.2925447 , -0.24334165, -0.5506126 ,
        -0.02314209,  0.49853683, -0.32296348, -0.10521667,  0.24225092]],
      dtype=float32)>

In [15]:
#Sum of logits
np.sum(pred)

-1.0775589

In [16]:
#Sum of softmax of logits
np.sum(tf.nn.softmax(pred))

1.0

In [17]:
#Specify the training configuration
model.compile(
    optimizer=tf.keras.optimizers.Adam(), #'adam'
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), #'SparseCategoricalCrossentropy'
    metrics=['accuracy'] #[tf.keras.metrics.Accuracy()]
)

In [18]:
#Train the model. Default batch_size is 32
model.fit(x_train, y_train, epochs=5, verbose=2) #verbose=1

Epoch 1/5
1875/1875 - 11s - loss: 0.3842 - accuracy: 0.8887
Epoch 2/5
1875/1875 - 11s - loss: 0.1702 - accuracy: 0.9519
Epoch 3/5
1875/1875 - 11s - loss: 0.1261 - accuracy: 0.9646
Epoch 4/5
1875/1875 - 11s - loss: 0.1015 - accuracy: 0.9705
Epoch 5/5
1875/1875 - 15s - loss: 0.0846 - accuracy: 0.9754


<tensorflow.python.keras.callbacks.History at 0x7fb5e4306160>

In [19]:
#Evaluate the model on the test data
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - loss: 0.0887 - accuracy: 0.9745


[0.08871984481811523, 0.9745000004768372]

In [20]:
#Weights of the model after training
model.weights

[<tf.Variable 'dense/kernel:0' shape=(784, 128) dtype=float32, numpy=
 array([[ 0.07532626, -0.06384136,  0.06075919, ..., -0.06172321,
          0.01852404, -0.06608634],
        [-0.07155178,  0.04215815, -0.01569028, ...,  0.04940862,
          0.04105105, -0.01039482],
        [-0.0708271 , -0.02865387, -0.00497837, ..., -0.06653178,
          0.05019989, -0.07354483],
        ...,
        [-0.04881169, -0.04165126, -0.03424032, ...,  0.00023571,
         -0.06361999,  0.02430227],
        [ 0.06801414,  0.01041029, -0.01180425, ...,  0.0734253 ,
          0.03529762, -0.03592621],
        [ 0.06078602,  0.01741731, -0.07609256, ...,  0.02920101,
          0.05281069,  0.04570641]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(128,) dtype=float32, numpy=
 array([-0.00816435,  0.05467582,  0.0059404 , -0.04810855, -0.0122777 ,
        -0.03747416, -0.01295789,  0.02554084,  0.03455232, -0.0524217 ,
        -0.14098917, -0.0906456 ,  0.0900158 , -0.06211576,  0.08093797,
    