<a href="https://colab.research.google.com/github/SeokhunEom/DeepLearning-study/blob/main/Section_11_MNIST_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
import random

In [2]:
# Load the MNIST dataset using TensorFlow's Keras API.
mnist = tf.keras.datasets.mnist

# Split the dataset into training and testing sets.
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the images to a range of 0 to 1 by dividing by 255.
x_test = x_test / 255
x_train = x_train / 255

# Reshape the images to include a channel dimension (28, 28, 1) for compatibility with Conv2D layers.
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

# One-hot encode the labels for the training and testing datasets.
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# Set the hyperparameters for the model training.
learning_rate = 0.001
training_epochs = 12
batch_size = 128

In [4]:
# Initialize a sequential model using Keras API.
tf.model = tf.keras.Sequential()

# Add the first convolutional layer with 16 filters, a kernel size of 3x3, ReLU activation, and input shape of (28, 28, 1).
tf.model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), input_shape=(28, 28, 1), activation='relu'))

# Add a max pooling layer with a pool size of 2x2 to down-sample the input.
tf.model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

# Add the second convolutional layer with 32 filters, a kernel size of 3x3, and ReLU activation.
tf.model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))

# Add another max pooling layer with a pool size of 2x2.
tf.model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

# Flatten the output from the convolutional layers to feed it into the dense (fully connected) layer.
tf.model.add(tf.keras.layers.Flatten())

# Add a dense (fully connected) layer with 10 units (for the 10 classes) and softmax activation for classification.
tf.model.add(tf.keras.layers.Dense(units=10, kernel_initializer='glorot_normal', activation='softmax'))

In [5]:
# Compile the model with categorical crossentropy loss, Adam optimizer with the specified learning rate, and accuracy as a metric.
tf.model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), metrics=['accuracy'])

# Print a summary of the model architecture.
tf.model.summary()

# Train the model using the training data, with the specified batch size and number of epochs.
tf.model.fit(x_train, y_train, batch_size=batch_size, epochs=training_epochs)



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 16)        160       
                                                                 
 max_pooling2d (MaxPooling2  (None, 13, 13, 16)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 32)        4640      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 5, 5, 32)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 800)               0         
                                                                 
 dense (Dense)               (None, 10)                8

<keras.src.callbacks.History at 0x7e4b0c9eded0>

In [6]:
# Predict the labels for the test dataset.
y_predicted = tf.model.predict(x_test)

# Print the actual and predicted labels for 10 random samples from the test dataset.
for x in range(0, 10):
    random_index = random.randint(0, x_test.shape[0]-1)
    print("index: ", random_index,
          "actual y: ", np.argmax(y_test[random_index]),
          "predicted y: ", np.argmax(y_predicted[random_index]))

index:  3710 actual y:  0 predicted y:  0
index:  4836 actual y:  3 predicted y:  3
index:  2645 actual y:  4 predicted y:  4
index:  3821 actual y:  9 predicted y:  9
index:  3189 actual y:  7 predicted y:  7
index:  8137 actual y:  0 predicted y:  0
index:  9320 actual y:  4 predicted y:  4
index:  4029 actual y:  4 predicted y:  4
index:  3239 actual y:  2 predicted y:  2
index:  3745 actual y:  1 predicted y:  1


In [7]:
# Evaluate the model on the test dataset and print the loss and accuracy.
evaluation = tf.model.evaluate(x_test, y_test)
print('loss: ', evaluation[0])
print('accuracy', evaluation[1])

loss:  0.042773839086294174
accuracy 0.9850999712944031


In [8]:
# Migrate from TensorFlow 1.x to TensorFlow 2 by ChatGPT

import tensorflow as tf
import random
import numpy as np
from tensorflow.keras import layers, models, datasets

# Set random seed for reproducibility
tf.random.set_seed(777)

# Load MNIST dataset using Keras datasets API
mnist = datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the input data to be in the range [0, 1]
x_train = x_train / 255.0
x_test = x_test / 255.0

# Reshape the data to add a channel dimension for the Conv2D layers
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# One-hot encode the labels using Keras utility
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Define hyperparameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

# Define a Keras Sequential model
model = models.Sequential()

# L1: Add the first Conv2D layer followed by ReLU activation and MaxPooling
model.add(layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.3))

# L2: Add the second Conv2D layer followed by ReLU activation and MaxPooling
model.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.3))

# L3: Add the third Conv2D layer followed by ReLU activation and MaxPooling, then flatten the output
model.add(layers.Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.3))
model.add(layers.Flatten())

# L4: Add a fully connected (Dense) layer with 625 units
model.add(layers.Dense(625, activation='relu'))
model.add(layers.Dropout(0.3))

# L5: Add the final output layer with 10 units (for 10 classes) and softmax activation
model.add(layers.Dense(10, activation='softmax'))

# Compile the model with categorical crossentropy loss, Adam optimizer, and accuracy metric
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display the model architecture
model.summary()

# Train the model using the training data
model.fit(x_train, y_train, epochs=training_epochs, batch_size=batch_size)

# Evaluate the model on the test data
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Loss: {loss:.4f}')
print(f'Accuracy: {accuracy:.4f}')

# Predict and display results for 10 random test samples
y_pred = model.predict(x_test)
for _ in range(10):
    random_index = random.randint(0, x_test.shape[0] - 1)
    print(f'Index: {random_index}, Actual: {np.argmax(y_test[random_index])}, Predicted: {np.argmax(y_pred[random_index])}')

# Function to evaluate the model on a given dataset
def evaluate(x_sample, y_sample, batch_size=512):
    N = x_sample.shape[0]
    correct_sample = 0

    for i in range(0, N, batch_size):
        x_batch = x_sample[i:i + batch_size]
        y_batch = y_sample[i:i + batch_size]
        N_batch = x_batch.shape[0]

        correct_sample += model.evaluate(x_batch, y_batch, verbose=0)[1] * N_batch

    return correct_sample / N

# Evaluate and print accuracy for training and test datasets
print("\nAccuracy Evaluates")
print("-------------------------------")
print('Train Accuracy:', evaluate(x_train, y_train))
print('Test Accuracy:', evaluate(x_test, y_test))

# Get one random test sample and predict its label
print("\nGet one and predict")
print("-------------------------------")
r = random.randint(0, x_test.shape[0] - 1)
print(f'Label: {np.argmax(y_test[r:r + 1])}')
print(f'Prediction: {np.argmax(model.predict(x_test[r:r + 1]))}')


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 13, 13, 32)        0         
 g2D)                                                            
                                                                 
 dropout (Dropout)           (None, 13, 13, 32)        0         
                                                                 
 conv2d_3 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 5, 5, 64)         