**Objective:**

The primary goal of this project is to train a simple fully connected neural network (ANN) on the MNIST dataset and analyze how different batch sizes and epoch values impact performance. 

**Description Of Model:**

1. Input: Flattened 28×28 images (784 features).
2. Hidden Layer: 256 neurons, ReLU activation.
3. Output Layer: 10 neurons (digits 0-9), softmax activation.
4. Loss Function: Categorical cross-entropy.
5. Optimizer: Adam.
6. learning rate = 0.1.
7. Different batch sizes: 1, 10, 100
8. Different epochs: 10, 50, 100
9. Metrics: Train loss, train accuracy, test accuracy, confusion matrix.

In [None]:
import tensorflow as tf
import numpy as np
import time
import pandas as pd
import os
from openpyxl import Workbook, load_workbook
from openpyxl.drawing.image import Image
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Check GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    print('GPU not found. Using CPU instead.')

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = tf.convert_to_tensor(x_train.reshape(-1, 784).astype(np.float32) / 255.0)
x_test = tf.convert_to_tensor(x_test.reshape(-1, 784).astype(np.float32) / 255.0)

y_train = tf.one_hot(y_train, depth=10, dtype=tf.float32)
y_test_labels = tf.convert_to_tensor(y_test)  # Original labels for confusion matrix
y_test = tf.one_hot(y_test, depth=10, dtype=tf.float32)

# Model parameters
n_hidden = 256
tf.random.set_seed(42)

W1 = tf.Variable(tf.random.normal([784, n_hidden], stddev=0.1, dtype=tf.float32))
b1 = tf.Variable(tf.random.normal([n_hidden], stddev=0.1, dtype=tf.float32))
W2 = tf.Variable(tf.random.normal([n_hidden, 10], stddev=0.1, dtype=tf.float32))
b2 = tf.Variable(tf.random.normal([10], stddev=0.1, dtype=tf.float32))

# Define model
@tf.function
def model(X):
    Z1 = tf.add(tf.matmul(X, W1), b1)
    A1 = tf.nn.relu(Z1)
    Z2 = tf.add(tf.matmul(A1, W2), b2)
    return Z2

# Loss function
def loss_fn(y_true, y_pred):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))

# Optimizer
optimizer = tf.optimizers.Adam(learning_rate=0.1)

# Training step
@tf.function
def train_step(batch_x, batch_y):
    with tf.GradientTape() as tape:
        logits = model(batch_x)
        loss = loss_fn(batch_y, logits)
    grads = tape.gradient(loss, [W1, b1, W2, b2])
    optimizer.apply_gradients(zip(grads, [W1, b1, W2, b2]))
    return loss

# Configurations for batch size and epochs
configs = [(10, 100), (10, 50), (10, 10), (100, 100), (100, 50), (100, 10), (1, 100), (1, 50), (1, 10)]

# Excel file setup
output_file = "mnist_nn_results.xlsx"

# If file exists, load; else, create a new one
if os.path.exists(output_file):
    try:
        wb = load_workbook(output_file)
        ws = wb.active
    except:
        os.remove(output_file)  # Delete corrupted file
        wb = Workbook()
        ws = wb.active
        ws.title = "Results"
        ws.append(["Batch Size", "Epochs", "Train Loss", "Train Accuracy", "Test Accuracy", "Execution Time"])
        wb.save(output_file)
else:
    wb = Workbook()
    ws = wb.active
    ws.title = "Results"
    ws.append(["Batch Size", "Epochs", "Train Loss", "Train Accuracy", "Test Accuracy", "Execution Time"])
    wb.save(output_file)

# Run training
with tf.device('/GPU:0'):
    for batch_size, epochs in configs:
        start_time = time.time()
        loss_curve, acc_curve = [], []

        dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size).shuffle(10000).prefetch(tf.data.AUTOTUNE)

        for epoch in range(epochs):
            for batch_x, batch_y in dataset:
                train_loss = train_step(batch_x, batch_y)

            # Compute train accuracy
            train_logits = model(x_train)
            train_acc = tf.reduce_mean(tf.cast(tf.argmax(train_logits, axis=1) == tf.argmax(y_train, axis=1), tf.float32))
            loss_curve.append(train_loss.numpy())
            acc_curve.append(train_acc.numpy())

        # Compute test accuracy
        test_logits = model(x_test)
        test_acc = tf.reduce_mean(tf.cast(tf.argmax(test_logits, axis=1) == tf.argmax(y_test, axis=1), tf.float32))
        y_pred = tf.argmax(test_logits, axis=1)
        conf_matrix = confusion_matrix(y_test_labels.numpy(), y_pred.numpy())
        exec_time = time.time() - start_time

        # Append results to Excel
        ws.append([batch_size, epochs, train_loss.numpy(), train_acc.numpy(), test_acc.numpy(), exec_time])

        # Save intermediate results
        wb.save(output_file)

        # Plot Loss and Accuracy Curves
        plt.figure(figsize=(12, 5))
        plt.subplot(1, 2, 1)
        plt.plot(range(epochs), loss_curve, label='Loss', color='red')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title(f'Loss Curve (Batch {batch_size}, Epochs {epochs})')
        plt.legend()

        plt.subplot(1, 2, 2)
        plt.plot(range(epochs), acc_curve, label='Accuracy', color='blue')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title(f'Accuracy Curve (Batch {batch_size}, Epochs {epochs})')
        plt.legend()

        plt.tight_layout()
        plt.savefig(f"mnist_curves_{batch_size}_{epochs}.png")
        plt.close()

        # Save confusion matrix
        df_conf_matrix = pd.DataFrame(conf_matrix)
        with pd.ExcelWriter(output_file, mode='a', if_sheet_exists="new") as writer:
            df_conf_matrix.to_excel(writer, sheet_name=f"Conf_Matrix_{batch_size}_{epochs}")

        # Add accuracy/loss curve image to Excel
        ws_image = wb.create_sheet(title=f"Curve_{batch_size}_{epochs}")
        img = Image(f"mnist_curves_{batch_size}_{epochs}.png")
        ws_image.add_image(img, "A1")

        print(f"Batch Size: {batch_size}, Epochs: {epochs}")
        print(f"Test Accuracy: {test_acc.numpy():.4f}")
        print(f"Execution Time: {exec_time:.2f} seconds\n")

    # Final Save
    wb.save(output_file)
    wb.close()

print(f"Results saved to {output_file}")


Num GPUs Available:  1
Batch Size: 10, Epochs: 100
Test Accuracy: 0.3021
Execution Time: 1236.24 seconds

Batch Size: 10, Epochs: 50
Test Accuracy: 0.2982
Execution Time: 602.06 seconds

Batch Size: 10, Epochs: 10
Test Accuracy: 0.2984
Execution Time: 122.82 seconds

Batch Size: 100, Epochs: 100
Test Accuracy: 0.3281
Execution Time: 136.75 seconds

Batch Size: 100, Epochs: 50
Test Accuracy: 0.3284
Execution Time: 69.26 seconds

Batch Size: 100, Epochs: 10
Test Accuracy: 0.3305
Execution Time: 13.85 seconds

Results saved to mnist_nn_results.xlsx


**Description Of Code**


**1. Data Preprocessing**

   11. The MNIST dataset is loaded and split into training (x_train, y_train) and test (x_test, y_test) sets.
   12. Images are reshaped from (28,28) to (784,) and normalized to values between 0 and 1.
   13. Labels are converted to one-hot encoded vectors for categorical classification.


**2. Model Initialization**

   21. Weights (W1, W2) and biases (b1, b2) are initialized for both layers using a small random normal distribution.


**3. Forward Propagation (model())**

   31. Layer 1: Z1 = X × W1 + b1, followed by ReLU(Z1).
   32. Output Layer: Z2 = A1 × W2 + b2, which gives logits (pre-softmax scores).


**4. Loss Calculation (loss_fn())**

   41. Computes categorical cross-entropy loss between predicted logits and true labels.


**5. Training Step (train_step())**

   51. Uses Gradient Tape to compute gradients and updates weights via Adam optimizer.


**6. Training Loop**

   61. Iterates over multiple epochs, updating weights and tracking loss and accuracy.
   62. Runs for different batch size-epoch configurations, as listed below:
     (10, 100), (10, 50), (10, 10)
     (100, 100), (100, 50), (100, 10)
     (1, 100), (1, 50), (1, 10)


**7. Performance Evaluation**

   71. Computes training accuracy at each epoch.
   72. Computes test accuracy after training completion.
   73. Generates confusion matrices to visualize prediction errors.
   74. Saves loss and accuracy curves for better visualization.


**8. Saving Results to Excel**

   81. Stores results such as batch size, epochs, train loss, train accuracy, test accuracy, and execution time in an Excel file (mnist_nn_results.xlsx).
   82. Loss and accuracy plots are saved as images and embedded into the Excel file.

**Observations from Results:**

1. Best Train & Test Accuracy: 32.9% (Batch Size: 100, Epochs: 100).
2. Training with Batch Size = 1 took more time but did not improve accuracy.
3. Increasing epochs improves accuracy slightly, but beyond 50 epochs, improvements diminish.
4. Execution time is much lower for larger batch sizes.
5. Test accuracy remains around 32%, indicating an issue with learning stability.

**My Comments:**

1. Reducing  learning rate (e.g., 0.01 or 0.001) can improve accuracy.
2. Increasing the number of hidden layers may improve model accuracy.
3. In current model the use of batch size 1 is not required and just increasing the training time.