In [None]:
#WAP to implement a three-layer neural network using Tensor flow library (only, no keras) to classify MNIST handwritten digits dataset. Demonstrate the implementation of feed-forward and back-propagation approaches.
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import ops

# Disable eager execution to use tf.compat.v1.placeholder
tf.compat.v1.disable_eager_execution()  # Disable eager execution to use placeholders in TensorFlow

# Load MNIST dataset
mnist = tf.keras.datasets.mnist  # Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()  # Split dataset into training and test sets

# Normalize and reshape input data
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize the pixel values to be between 0 and 1
x_train = x_train.reshape(-1, 784)  # Flatten each 28x28 image into a 784-dimensional vector
x_test = x_test.reshape(-1, 784)  # Same reshaping for the test data

# One-hot encoding for labels
y_train = np.eye(10)[y_train]  # Convert training labels to one-hot encoding
y_test = np.eye(10)[y_test]    # Convert test labels to one-hot encoding

# Reset default graph
ops.reset_default_graph()  # Clear the TensorFlow graph to avoid reusing variables

# Define placeholders
X = tf.compat.v1.placeholder(tf.float32, [None, 784])  # Placeholder for input data (28x28 images flattened to 784)
Y = tf.compat.v1.placeholder(tf.float32, [None, 10])   # Placeholder for one-hot encoded labels (10 possible digits)

# Define model parameters
def init_weights(shape):
    return tf.Variable(tf.random.normal(shape, stddev=0.1))  # Initialize weights with random values from a normal distribution

# Initialize weights and biases for the layers
W1 = init_weights([784, 128])  # Weights for the first layer (784 input features, 128 hidden units)
b1 = tf.Variable(tf.zeros([128]))  # Biases for the first layer

W2 = init_weights([128, 64])  # Weights for the second layer (128 hidden units, 64 hidden units)
b2 = tf.Variable(tf.zeros([64]))  # Biases for the second layer

W3 = init_weights([64, 10])  # Weights for the output layer (64 hidden units, 10 output units for classification)
b3 = tf.Variable(tf.zeros([10]))  # Biases for the output layer

# Feed-forward pass
def forward_propagation(X):
    z1 = tf.matmul(X, W1) + b1  # First layer linear transformation
    a1 = tf.nn.relu(z1)  # First layer activation (ReLU)

    z2 = tf.matmul(a1, W2) + b2  # Second layer linear transformation
    a2 = tf.nn.relu(z2)  # Second layer activation (ReLU)

    z3 = tf.matmul(a2, W3) + b3  # Third layer (output layer) linear transformation
    output = tf.nn.softmax(z3)  # Softmax activation to get probabilities for each class

    return output

y_pred = forward_propagation(X)  # Get predictions from the model

# Loss function
# Use softmax cross-entropy loss function to compute the loss between predictions and actual labels
# Replace softmax_cross_entropy_with_logits_v2 with softmax_cross_entropy_with_logits due to TensorFlow version compatibility
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=Y)) 

# Back-propagation using Stochastic Gradient Descent (SGD)
learning_rate = 0.01  # Set learning rate for the gradient descent optimizer
optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate).minimize(loss)  # Minimize the loss using gradient descent

# Accuracy metric
correct_pred = tf.equal(tf.argmax(y_pred, 1), tf.argmax(Y, 1))  # Compare predicted and actual labels
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  # Calculate accuracy as the mean of correct predictions

# Training
epochs = 20  # Number of epochs to train the model
batch_size = 128  # Size of each training batch

# Start the TensorFlow session
with tf.compat.v1.Session() as sess:
    sess.run(tf.compat.v1.global_variables_initializer())  # Initialize variables
    
    # Train the model for the specified number of epochs
    for epoch in range(epochs):
        for i in range(0, x_train.shape[0], batch_size):
            batch_x = x_train[i:i+batch_size]  # Get a batch of training images
            batch_y = y_train[i:i+batch_size]  # Get the corresponding batch of labels
            sess.run(optimizer, feed_dict={X: batch_x, Y: batch_y})  # Run the optimizer to update weights
        
        # Compute and print the loss and accuracy for the training set after each epoch
        train_loss, train_acc = sess.run([loss, accuracy], feed_dict={X: x_train, Y: y_train})
        print(f"Epoch {epoch+1}, Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")

    # Evaluate the model on the test set after training
    test_acc = sess.run(accuracy, feed_dict={X: x_test, Y: y_test})  # Calculate accuracy on the test set
    print(f"\nTest Accuracy: {test_acc:.4f}")  # Print the test accuracy


Epoch 1, Loss: 2.2774, Accuracy: 0.1706
Epoch 2, Loss: 2.2158, Accuracy: 0.3039
Epoch 3, Loss: 2.0808, Accuracy: 0.4857
Epoch 4, Loss: 1.9519, Accuracy: 0.5673
Epoch 5, Loss: 1.8755, Accuracy: 0.6376
Epoch 6, Loss: 1.8433, Accuracy: 0.6483
Epoch 7, Loss: 1.8270, Accuracy: 0.6548
Epoch 8, Loss: 1.8171, Accuracy: 0.6591
Epoch 9, Loss: 1.8103, Accuracy: 0.6631
Epoch 10, Loss: 1.8053, Accuracy: 0.6656
Epoch 11, Loss: 1.8014, Accuracy: 0.6675
Epoch 12, Loss: 1.7983, Accuracy: 0.6693
Epoch 13, Loss: 1.7957, Accuracy: 0.6707
Epoch 14, Loss: 1.7934, Accuracy: 0.6719
Epoch 15, Loss: 1.7907, Accuracy: 0.6727
Epoch 16, Loss: 1.7703, Accuracy: 0.7072
Epoch 17, Loss: 1.7560, Accuracy: 0.7244
Epoch 18, Loss: 1.7464, Accuracy: 0.7316
Epoch 19, Loss: 1.7395, Accuracy: 0.7370
Epoch 20, Loss: 1.7344, Accuracy: 0.7406

Test Accuracy: 0.7421
