In [None]:
# Load the dependencies
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential # to create an artificial neural network
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.utils import to_categorical # to change it to 0s and 1s for One Hot Encoding
import matplotlib.pyplot as plt
import numpy as np

**One Hot Encoding**

0 =   [1,0,0,0,0,0,0,0,0,0]

1 =   [0,1,0,0,0,0,0,0,0,0]

2 =   [0,0,1,0,0,0,0,0,0,0]

3 =   [0,0,0,1,0,0,0,0,0,0]

4 =   [0,0,0,0,1,0,0,0,0,0]

5 =   [0,0,0,0,0,1,0,0,0,0]

6 =   [0,0,0,0,0,0,1,0,0,0]

7 =   [0,0,0,0,0,0,0,1,0,0]

8 =   [0,0,0,0,0,0,0,0,1,0]

9 =   [0,0,0,0,0,0,0,0,0,1]

1. **Load Dependencies:** Import necessary libraries like TensorFlow, Keras, and Matplotlib.
2. **Load and Preprocess Data:** Load the MNIST dataset, normalize pixel values, and apply one-hot encoding to the labels.
3. **Build Neural Network:** Create a sequential model with Flatten, Dense (with ReLU activation), and a final Dense layer (with Softmax activation).
4. **Compile and Train Model:** Configure the model with Adam optimizer, categorical crossentropy loss, and accuracy metric. Train the model on the training data for a specified number of epochs and batch size, using a validation split.
5. **Evaluate and Predict:** Evaluate the trained model on the test data and make predictions.
6. **Visualize Results:** Display a selection of test images along with their predicted and true labels.

In [None]:
# Load the dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize data
x_train, x_test = x_train / 255.0, x_test / 255.0 # colour ranges from 0 to 255, anf the data is normalized by bringing it between 0 and 1 by dividing it by 255
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

#Build Neural Network
model = Sequential(
    [ Flatten(input_shape=(28,28)), # convert it into a single array
      Dense(128, activation='relu'), # input layer (can take 128 images at a time)
      Dense(64, activation='relu'), # hidden layer = 64 (usually multiples of 2)
      Dense(10, activation='softmax') # dense = interconnected neurons, softmax gives a set of probabilities from the range 0 to 1
    ])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy', # when softmax is used in the last layer, cross entropy is used, loss func is used to compare actual result to predicted result, accuracy is the metric used
              metrics=['accuracy'])

# Train the model
history = model.fit(x_train, y_train, epochs=5, batch_size=32, validation_split = 0.2) # at a time only 32 data(x,y) is used to save memory, using mini batch descent, number of iterations = (total/32)

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
predictions = model.predict(x_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  super().__init__(**kwargs)


Epoch 1/5


In [None]:
# Visualize the data
plt.figure(figsize=(12, 6))
for i in range(10):
  idx = np.random.choice(len(x_test))
  plt.subplot(2, 5, i + 1)
  plt.imshow(x_test[idx], cmap='gray')
  plt.title(f"Predicted: {np.argmax(predictions[idx])}, True: {np.argmax(y_test[idx])}") # np.argmax looks at the id where the probability is maximum and returns the index/position
  plt.axis('off')
plt.show()