In [37]:
import tensorflow as tf
from tensorflow import keras

print(tf.__version__)

2.6.0


**Load MNIST dataset**

In [38]:
data = tf.keras.datasets.mnist
x = data.load_data()

In [8]:
(xtrain, ytrain), (xtest, ytest) = x
# xtrain is 60,000 images with dimensions (28 x 28)
# ytrain are the label labels for the images
xtrain.shape, ytrain.shape

xtrain, xtest = xtrain / 255.0, xtest/255.0

**Build a tf.keras.Sequential model by stacking layers**

In [54]:
from tensorflow.keras.layers import Dense, Dropout, Flatten

model = keras.Sequential(
    [
        Flatten(input_shape=(28, 28)),
        Dense(128),
        Dropout(0.2),
        Dense(10)
    ], name= "First-Model"
)


In [None]:

print(xtrain[0:1].shape)
print(xtrain[:1].shape)

ut_pr = model([xtrain[0:1]])

# The output array is an ary of 'Logits'
print(ut_pr.numpy())

##### **LOGITS**
**vector of raw (non-normalized) predictions that a classification model generates**, passed to a normalization function(typically tf.nn.softmax)

**The tf.nn.softmax function converts these logits to probabilities for each class**

In [35]:
tf.nn.softmax(ut_pr.numpy())

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[0.11993641, 0.01811036, 0.04775747, 0.11147215, 0.20157707,
        0.11768109, 0.1428853 , 0.05624944, 0.08445621, 0.09987446]],
      dtype=float32)>

##### **Adding loss fn, Compiling and Summary**

In [56]:
from keras import optimizers, losses 

lossfn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
lossfn(ytrain[:1], ut_pr).numpy()

model.compile(
    optimizer= 'adam',
    loss= lossfn,
    metrics= ['accuracy']
)

model.build()


##### **Training**

In [57]:
model.fit(xtrain, ytrain, epochs=5, batch_size=32, verbose=1)
model.evaluate(xtest, ytest, batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.2777617871761322, 0.9248999953269958]