<a href="https://colab.research.google.com/github/SeokhunEom/DeepLearning-study/blob/main/Section_10_MNIST_Neural_Network_Dropout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import random
import tensorflow as tf

In [2]:
# Set a fixed seed for random number generation for reproducibility.
random.seed(777)

# Define hyperparameters for the model training.
learning_rate = 0.001  # Learning rate for the optimizer.
batch_size = 100  # Number of samples per gradient update.
training_epochs = 15  # Number of epochs to train the model.
nb_classes = 10  # Number of output classes for classification (digits 0-9).
drop_rate = 0.3  # Dropout rate for regularization.

In [3]:
# Load the MNIST dataset, which is already split into training and testing sets.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
print(x_train.shape)  # Output the shape of the training data.

(60000, 28, 28)


In [4]:
# Reshape the training and testing data from (num_samples, 28, 28) to (num_samples, 784).
# This converts each 28x28 image into a 784-dimensional vector.
x_train = x_train.reshape(x_train.shape[0], 28 * 28)
x_test = x_test.reshape(x_test.shape[0], 28 * 28)

In [5]:
# Convert class vectors (integers) to binary class matrices (one-hot encoding).
# This is required for the categorical cross-entropy loss function.
y_train = tf.keras.utils.to_categorical(y_train, nb_classes)
y_test = tf.keras.utils.to_categorical(y_test, nb_classes)

In [6]:
# Initialize a Sequential model.
tf.model = tf.keras.Sequential()

In [7]:
# Add dense (fully connected) layers with Xavier normal initializer (also called Glorot normal initializer).
# The Glorot normal initializer draws samples from a truncated normal distribution centered on 0 with stddev = sqrt(2 / (fan_in + fan_out)).
# This helps in maintaining the variance of weights during initialization.

# First dense layer with 512 units and ReLU activation.
tf.model.add(tf.keras.layers.Dense(input_dim=784, units=512, kernel_initializer='glorot_normal', activation='relu'))
# Dropout layer to prevent overfitting by randomly setting a fraction of input units to 0.
tf.model.add(tf.keras.layers.Dropout(drop_rate))

# Second dense layer with 512 units and ReLU activation.
tf.model.add(tf.keras.layers.Dense(units=512, kernel_initializer='glorot_normal', activation='relu'))
tf.model.add(tf.keras.layers.Dropout(drop_rate))

# Third dense layer with 512 units and ReLU activation.
tf.model.add(tf.keras.layers.Dense(units=512, kernel_initializer='glorot_normal', activation='relu'))
tf.model.add(tf.keras.layers.Dropout(drop_rate))

# Fourth dense layer with 512 units and ReLU activation.
tf.model.add(tf.keras.layers.Dense(units=512, kernel_initializer='glorot_normal', activation='relu'))
tf.model.add(tf.keras.layers.Dropout(drop_rate))

# Output layer with units equal to the number of classes (10) and softmax activation.
# The softmax activation function outputs a probability distribution over the classes.
tf.model.add(tf.keras.layers.Dense(units=nb_classes, kernel_initializer='glorot_normal', activation='softmax'))

In [8]:
# Compile the model with categorical cross-entropy loss, Adam optimizer, and accuracy metric.
tf.model.compile(loss='categorical_crossentropy',
                 optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), metrics=['accuracy'])

In [9]:
# Print a summary of the model architecture.
tf.model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               401920    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 512)               262656    
                                                                 
 dropout_1 (Dropout)         (None, 512)               0         
                                                                 
 dense_2 (Dense)             (None, 512)               262656    
                                                                 
 dropout_2 (Dropout)         (None, 512)               0         
                                                                 
 dense_3 (Dense)             (None, 512)               2

In [10]:
# Train the model on the training data.
# The history object contains the training metrics (loss and accuracy) for each epoch.
history = tf.model.fit(x_train, y_train, batch_size=batch_size, epochs=training_epochs)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [11]:
# Predict the class labels for the test set.
y_predicted = tf.model.predict(x_test)



In [12]:
# Print the actual and predicted labels for 10 random test samples.
for x in range(0, 10):
    random_index = random.randint(0, x_test.shape[0] - 1)
    print("index: ", random_index,
          "actual y: ", np.argmax(y_test[random_index]),
          "predicted y: ", np.argmax(y_predicted[random_index]))

index:  4420 actual y:  5 predicted y:  5
index:  5507 actual y:  2 predicted y:  2
index:  8809 actual y:  1 predicted y:  1
index:  654 actual y:  5 predicted y:  5
index:  7302 actual y:  8 predicted y:  8
index:  8966 actual y:  2 predicted y:  2
index:  6383 actual y:  0 predicted y:  0
index:  9854 actual y:  8 predicted y:  8
index:  4734 actual y:  2 predicted y:  2
index:  1979 actual y:  9 predicted y:  9


In [13]:
# Evaluate the model on the test set to determine the loss and accuracy.
evaluation = tf.model.evaluate(x_test, y_test)
print('loss: ', evaluation[0])
print('accuracy', evaluation[1])

loss:  0.12168717384338379
accuracy 0.9715999960899353
