# ChatGPT - MNIST model

In [1]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist

# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Preprocess the data
# Reshape the data to fit the input shape of the neural network and normalize it to values between 0 and 1
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255

# Convert labels to one-hot encoded format
train_labels = tf.keras.utils.to_categorical(train_labels)
test_labels = tf.keras.utils.to_categorical(test_labels)

# Define the neural network model
model = models.Sequential()
# Add a Flatten layer to convert the 2D image data into a 1D array
model.add(layers.Flatten(input_shape=(28, 28, 1)))
# Add a Dense layer with 128 units and ReLU activation function
model.add(layers.Dense(128, activation='relu'))
# Add a Dense layer with 10 units (for 10 classes) and softmax activation function for classification
model.add(layers.Dense(10, activation='softmax'))

# Compile the model
# Use the Adam optimizer, categorical crossentropy as the loss function, and track accuracy as a metric
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
# Use the training data and labels, batch size of 32, and train for 5 epochs
model.fit(train_images, train_labels, batch_size=32, epochs=5)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"Test accuracy: {test_acc}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.9768999814987183


# Book - MNIST model

In [2]:
# Importing the MNIST dataset from TensorFlow's Keras API
from tensorflow.keras.datasets import mnist

# Loading the MNIST dataset into training and testing sets
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Importing necessary modules from TensorFlow for building the neural network
from tensorflow import keras
from tensorflow.keras import layers

# Defining the neural network model using Keras' Sequential API
# The model consists of two dense layers:
# - The first dense layer has 512 neurons and uses the ReLU activation function
# - The second dense layer has 10 neurons (representing the 10 digits) and uses the softmax activation function for classification
model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(10, activation="softmax")
])

# Compiling the model with the following specifications:
# - Optimizer: RMSprop
# - Loss function: Sparse Categorical Crossentropy (suitable for integer labels)
# - Metrics: Accuracy (to monitor during training)
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

# Preprocessing the training images:
# - Reshaping the images from 28x28 matrices to flat vectors of size 784 (28*28)
# - Normalizing the pixel values to be between 0 and 1 by dividing by 255
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255

# Preprocessing the testing images in the same way as the training images
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

# Training the model using the preprocessed training images and labels
# - Epochs: 5 (number of times the model will see the entire dataset)
# - Batch size: 128 (number of samples processed before updating the model's weights)
model.fit(train_images, train_labels, epochs=5, batch_size=128)

# Extracting the first 10 test images for prediction
test_digits = test_images[0:10]

# Predicting the class probabilities for the extracted test images
predictions = model.predict(test_digits)

# Displaying the predicted class probabilities for the first test image
print(predictions[0])

# Finding the class with the highest probability for the first test image
print(predictions[0].argmax())

# Displaying the probability of the class 7 for the first test image
print(predictions[0][7])

# Evaluating the model's performance on the entire test set
test_loss, test_acc = model.evaluate(test_images, test_labels)

# Printing the accuracy of the model on the test set
print(f"test_acc: {test_acc}")


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[3.27006831e-08 1.71879311e-08 1.99961341e-05 4.68345315e-05
 9.22495587e-12 1.16408046e-07 1.15397657e-11 9.99931753e-01
 6.76901522e-08 1.08296399e-06]
7
0.99993175
test_acc: 0.9803000092506409
