In [1]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [2]:
train_images.shape

(60000, 28, 28)

In [3]:
len(train_labels)

60000

In [4]:
train_labels

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [5]:
test_images.shape

(10000, 28, 28)

In [6]:
len(test_labels)

10000

In [7]:
test_labels

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

## The network acrhitecture 

In [8]:
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
    layers.Dense(512, activation='relu'), # 512 neurons in the first layer
    layers.Dense(10, activation='softmax') # 10 neurons in the second layer (one for each digit) 
    # 10 neurons reprsent the probability of the image being a digit from 0 to 9 based on probabilities of the 10 neurons. 
    # activion function softmax is used to convert the output to probabilities 
    # activion relu is used to convert the output to probabilities 
])




## The compliation step

In [10]:
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
# optimizer is the mechanism through which the model updates itself based on the data it sees and its loss function
# loss function is the mechanism through which the model will be able to measure its performance on the training data
# metrics is the mechanism through which the model will be able to measure its performance on the test data




## Preparing the image data

In [11]:
train_images = train_images.reshape((60000, 28 * 28)) # reshape the training data from 3D to 2D array 
train_images = train_images.astype('float32') / 255 # normalize the training data 
test_images = test_images.reshape((10000, 28 * 28)) # reshape the test data from 3D to 2D array 
test_images = test_images.astype('float32') / 255   # normalize the test data 

## "Fitting" the model 

In [12]:
model.fit(train_images, train_labels, epochs=5, batch_size=128)

Epoch 1/5


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x18a43e399d0>

## using the model predictions

In [13]:
test_digits = test_images[0:10]
prediction = model.predict(test_digits) 
prediction[0]



array([1.9511898e-08, 1.2194802e-08, 2.6855637e-06, 4.2575881e-05,
       3.1334643e-11, 6.5742118e-09, 1.4492460e-12, 9.9995387e-01,
       3.5296992e-08, 7.8237582e-07], dtype=float32)

In [14]:
prediction[0].argmax()

7

In [15]:
prediction[0][7]

0.99995387