In [None]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.18.0


In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

print("x train shape: ",x_train.shape)
print("y test shape: ",x_test.shape)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
x train shape:  (60000, 28, 28)
y test shape:  (10000, 28, 28)


In [None]:
x_train, x_test = x_train / 255.0, x_test / 255.0

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

  super().__init__(**kwargs)


*  tf.keras.models.Sequential(): Creates a simple, linear stack of layers for the neural network.
*   tf.keras.layers.Flatten(input_shape=(28, 28)): This layer flattens the 28x28 input images into a 1D array of 784 pixels (28 * 28 = 784), which is required for fully connected layers.
*  tf.keras.layers.Dense(128, activation='relu'): A fully connected (dense) layer with 128 neurons and the ReLU activation function. The ReLU function introduces non-linearity, enabling the network to learn complex patterns.
* tf.keras.layers.Dropout(0.2): A dropout layer that randomly "drops" (sets to zero) 20% of the neurons during training to prevent overfitting. By doing so, it forces the network to learn more robust features rather than relying on specific neurons.
* tf.keras.layers.Dense(10): The output layer has 10 neurons corresponding to the 10 possible classes (digits 0-9). It uses a linear activation function by default.





In [None]:
print(x_train[0])
print(y_train[0])

[[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.    

In [None]:
predictions = model(x_train[:1])
predictions

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[-13.078039 ,  -5.698321 ,  -2.2054358,  10.04821  , -24.430523 ,
         11.505349 , -18.72177  ,  -6.1426544, -12.91408  ,  -6.521214 ]],
      dtype=float32)>

In [None]:
predictions = model(x_train[:1]).numpy()
predictions

array([[-13.078039 ,  -5.698321 ,  -2.2054358,  10.04821  , -24.430523 ,
         11.505349 , -18.72177  ,  -6.1426544, -12.91408  ,  -6.521214 ]],
      dtype=float32)

model(x_train[:1]): You pass a batch of input data (x_train[:1], which is the first image in the dataset) to the model.

The model computes a prediction for this image. This prediction is not a direct label (like 0, 1, 2), but a set of logits. These logits are raw outputs from the model (before they are converted to probabilities), and each logit corresponds to a "score" for each class (digits 0-9 in this case).

So, predictions is a tensor with shape (1, 10) (1 image, 10 possible classes). The values represent the model’s raw predictions (logits) for each class.




In [None]:
tf.nn.softmax(predictions).numpy()

array([[0.05533126, 0.0758725 , 0.09113963, 0.0929936 , 0.04491719,
        0.13898942, 0.14895822, 0.15153258, 0.13664864, 0.06361692]],
      dtype=float32)

The Softmax function converts the logits into probabilities, where the sum of all 10 output values will be 1, and each value represents the probability of the image belonging to a particular class (digit 0-9).

In [None]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) defines the loss function used to calculate how far the model's predictions are from the true labels.

This function is suitable for multi-class classification problems where the labels are integers (not one-hot encoded).

from_logits=True specifies that the model outputs raw logits (not probabilities), so the loss function will internally apply Softmax to the logits.



In [None]:
loss_fn(y_train[:1], predictions).numpy()

np.float32(1.9733574)

In [None]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8665 - loss: 0.4659
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.9568 - loss: 0.1448
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9672 - loss: 0.1058
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9739 - loss: 0.0835
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9772 - loss: 0.0720


<keras.src.callbacks.history.History at 0x79652944eb50>

In [None]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - 3ms/step - accuracy: 0.9786 - loss: 0.0695


[0.06951350718736649, 0.978600025177002]

In [None]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [None]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[1.8560739e-08, 2.0493431e-09, 2.8229200e-05, 5.2320538e-04,
        2.1597375e-09, 2.2805173e-07, 7.9336180e-13, 9.9937290e-01,
        5.6077993e-06, 6.9755552e-05],
       [2.5326350e-09, 2.8616787e-06, 9.9999499e-01, 2.0348616e-06,
        4.5062126e-16, 7.5140001e-08, 8.5363722e-10, 7.0283893e-15,
        8.2256388e-09, 1.6509733e-14],
       [3.9340126e-08, 9.9868363e-01, 4.3213255e-05, 1.0642373e-06,
        3.2629436e-05, 1.5742535e-05, 7.6435244e-06, 1.1664330e-03,
        4.8532944e-05, 1.0099951e-06],
       [9.9873430e-01, 1.2993516e-08, 1.1583973e-03, 4.3123491e-06,
        3.2072234e-07, 1.8207531e-05, 3.4055531e-06, 2.2155032e-06,
        8.7198822e-07, 7.7932971e-05],
       [4.6447937e-07, 2.0228642e-08, 3.1893014e-05, 1.3218335e-07,
        9.9132991e-01, 1.5570490e-07, 1.0622097e-06, 3.2979235e-05,
        3.1157600e-05, 8.5721454e-03]], dtype=float32)>