## 1. Load and Preprocess the MNIST Dataset

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt

#Comment: These lines load and split the MNIST dataset into training and test sets. 
# The `X_train` and `X_test` variables store the image data, while `y_train` and `y_test` hold the corresponding labels. 
# Printing the shape helps verify the dataset size and format for further processing.
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train.shape)

#Comment: This step normalizes the image data by converting pixel values to float32 and scaling them between 0 and 1. 
# Normalization helps improve model performance and stability during training by ensuring consistent input ranges.
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0

#Comment: Reshaping the dataset flattens each 28x28 image into a 1D array of 784 pixels. 
# This format is required as input for fully connected layers in the neural network.
X_train = X_train.reshape(X_train.shape[0], 28*28)
X_test = X_test.reshape(X_test.shape[0], 28*28)


print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)

#Comment: This section visualizes a sample of the training images along with their corresponding labels. 
# Each image is reshaped back to 28x28, displayed in grayscale, and labeled for easy verification.
plt.figure(figsize=(10, 5))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(X_train[i].reshape(28, 28), cmap='gray')
    plt.title(f"Label: {y_train[i]}")
    plt.axis('off')
plt.show()


## Build the Neural Network Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input

# Build the neural network model
#Write you code here. Define "model" using Sequential, Input and Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

# Build the neural network model for Part 2
model = Sequential([
    Input(shape=(28*28,)),  # Input layer
    Dense(8, activation='relu'),  # Hidden layer with 8 neurons
    Dense(10, activation='softmax')  # Output layer with 10 classes
])

model.summary()


## Train the Neural Network

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

#Comment: The `model.compile()` function configures the neural network for training. 
# The Adam optimizer is used for efficient gradient descent, the sparse categorical crossentropy loss function is chosen for multi-class classification, 
# and accuracy is selected as the metric to evaluate performance.
model.compile(optimizer=Adam(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

#Comment: The ModelCheckpoint callback saves the model's weights during training whenever the validation accuracy improves. 
# This ensures the best version of the model is saved for evaluation.
checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')

#Comment: The `model.fit()` function trains the neural network on the training dataset for 10 epochs with a batch size of 32. 
# It also validates the model on the test dataset after each epoch and uses the checkpoint to save the best-performing model.
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, callbacks=[checkpoint])

#Comment: These lines visualize training and validation accuracy over epochs. 
# They help monitor the model's learning progress and detect potential overfitting or underfitting.
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

#Comment: These lines visualize training and validation loss over epochs. 
# The graphs indicate how well the model is minimizing the loss function, which reflects prediction errors.
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()


## Evaluate the Model

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns


best_model = tf.keras.models.load_model('best_model.keras')

#Comment: The `evaluate()` method calculates the loss and accuracy of the saved best model on the test dataset. 
# This provides an unbiased evaluation of the model's generalization performance.
test_loss, test_accuracy = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")

#Comment: This line predicts the probabilities of each class for all test samples. 
# These probabilities are then converted to class predictions using `np.argmax`, which selects the class with the highest probability.
y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_classes))

#Comment: The confusion matrix compares true labels with predicted labels. 
# It provides insights into the model's performance, such as how well each class is predicted and where errors occur.
cm = confusion_matrix(y_test, y_pred_classes)

#Comment: The heatmap visualizes the confusion matrix, making it easier to interpret the performance of the model on each class. 
# Each cell shows the number of samples for a true vs. predicted label pair.
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap='Blues', xticklabels=[str(i) for i in range(10)], yticklabels=[str(i) for i in range(10)])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()


## Visualize Predictions

In [None]:
#Comment: This line randomly selects 10 samples from the test dataset. 
# These samples will be used to visualize the model's predictions.
indices = np.random.choice(len(X_test), 10, replace=False)

#Comment: This section displays the true labels and predicted labels for the selected samples. 
# It helps visualize individual predictions and identify any misclassifications.
plt.figure(figsize=(10, 5))
for i, idx in enumerate(indices):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.title(f"True: {y_test[idx]}\nPred: {y_pred_classes[idx]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

#Comment: This line identifies the indices of all misclassified samples in the test dataset. 
# It enables further analysis of where the model struggled.
incorrect_indices = np.where(y_test != y_pred_classes)[0]

#Comment: This section visualizes the true labels and predicted labels for the first 10 misclassified samples. 
# These examples can help identify patterns in the model's errors.
plt.figure(figsize=(10, 5))
for i, idx in enumerate(incorrect_indices[:10]):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.title(f"True: {y_test[idx]}\nPred: {y_pred_classes[idx]}")
    plt.axis('off')
plt.tight_layout()
plt.show()


## Part 3: Experimenting with a Larger Hidden Layer

In [None]:
# Building------------------------------------------------------------------------------------------------------
# Build the neural network model for Part 3
model_128 = Sequential([
    Input(shape=(28*28,)),  # Input layer
    Dense(128, activation='relu'),  # Hidden layer with 128 neurons
    Dense(10, activation='softmax')  # Output layer
])

model_128.summary()




# Training------------------------------------------------------------------------------------------------------
# Compile the model
model_128.compile(optimizer=Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Set up a checkpoint for the 128-neuron model
checkpoint_128 = ModelCheckpoint('best_model_128.keras', save_best_only=True, monitor='val_accuracy', mode='max')

# Train the model
history_128 = model_128.fit(X_train, y_train,
                            validation_data=(X_test, y_test),
                            epochs=10,
                            batch_size=32,
                            callbacks=[checkpoint_128])

# Plot training and validation accuracy and loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_128.history['accuracy'], label='Training Accuracy')
plt.plot(history_128.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy for 128 Neurons')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_128.history['loss'], label='Training Loss')
plt.plot(history_128.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss for 128 Neurons')




# Evaluation------------------------------------------------------------------------------------------------------
# Load the best model for 128 neurons
best_model_128 = tf.keras.models.load_model('best_model_128.keras')

# Evaluate the model
test_loss_128, test_accuracy_128 = best_model_128.evaluate(X_test, y_test)
print(f"Test Accuracy for 128-Neuron Model: {test_accuracy_128:.4f}")

# Predict class probabilities and convert to class labels
y_pred_128 = best_model_128.predict(X_test)
y_pred_classes_128 = np.argmax(y_pred_128, axis=1)

# Print the classification report
print(classification_report(y_test, y_pred_classes_128))

# Generate and display the confusion matrix
cm_128 = confusion_matrix(y_test, y_pred_classes_128)

plt.figure(figsize=(10, 8))
sns.heatmap(cm_128, annot=True, fmt="d", cmap='Blues', 
            xticklabels=[str(i) for i in range(10)], 
            yticklabels=[str(i) for i in range(10)])
plt.title('Confusion Matrix for 128 Neurons')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()





# Visualization------------------------------------------------------------------------------------------------------
# Randomly select some test samples and visualize predictions
indices_128 = np.random.choice(len(X_test), 10, replace=False)

plt.figure(figsize=(10, 5))
for i, idx in enumerate(indices_128):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.title(f"True: {y_test[idx]}\nPred: {y_pred_classes_128[idx]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

# Visualize incorrectly classified samples
incorrect_indices_128 = np.where(y_test != y_pred_classes_128)[0]

plt.figure(figsize=(10, 5))
for i, idx in enumerate(incorrect_indices_128[:10]):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.title(f"True: {y_test[idx]}\nPred: {y_pred_classes_128[idx]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

## Part 4: Custom Neural Network For 99% Average F1 Score

In [None]:
# Building------------------------------------------------------------------------------------------------------

# Define the custom neural network model for Part 4
model_custom_4 = Sequential([
    Input(shape=(28*28,)),  # Input layer
    Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),  # First hidden layer with L2 regularization
    Dropout(0.4),  # Dropout for regularization
    Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),  # Second hidden layer
    Dropout(0.4),  # Dropout for regularization
    Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),  # Third hidden layer
    Dropout(0.3),  # Dropout for regularization
    Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),  # Fourth hidden layer
    Dense(10, activation='softmax')  # Output layer
])

model_custom_4.summary()

# Training------------------------------------------------------------------------------------------------------

# Compile the custom model
model_custom_4.compile(optimizer=Adam(),
                       loss='sparse_categorical_crossentropy',
                       metrics=['accuracy'])

# Set up a checkpoint for the custom model
checkpoint_custom_4 = ModelCheckpoint('best_model_custom_4.keras', save_best_only=True, monitor='val_accuracy', mode='max')

# Train the model
history_custom_4 = model_custom_4.fit(X_train, y_train,
                                      validation_data=(X_test, y_test),
                                      epochs=15,  # Increased epochs for better convergence
                                      batch_size=64,  # Adjusted batch size for performance
                                      callbacks=[checkpoint_custom_4])

# Plot training and validation accuracy and loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_custom_4.history['accuracy'], label='Training Accuracy')
plt.plot(history_custom_4.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy for Part 4')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_custom_4.history['loss'], label='Training Loss')
plt.plot(history_custom_4.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss for Part 4')
plt.legend()
plt.show()





# Evaluating------------------------------------------------------------------------------------------------------


# Load the best custom model
best_model_custom_4 = tf.keras.models.load_model('best_model_custom_4.keras')

# Evaluate the custom model
test_loss_custom_4, test_accuracy_custom_4 = best_model_custom_4.evaluate(X_test, y_test)
print(f"Test Accuracy for Part 4: {test_accuracy_custom_4:.4f}")

# Predict class probabilities and convert to class labels
y_pred_custom_4 = best_model_custom_4.predict(X_test)
y_pred_classes_custom_4 = np.argmax(y_pred_custom_4, axis=1)

# Print the classification report
print(classification_report(y_test, y_pred_classes_custom_4))

# Generate and display the confusion matrix
cm_custom_4 = confusion_matrix(y_test, y_pred_classes_custom_4)

plt.figure(figsize=(10, 8))
sns.heatmap(cm_custom_4, annot=True, fmt="d", cmap='Blues',
            xticklabels=[str(i) for i in range(10)],
            yticklabels=[str(i) for i in range(10)])
plt.title('Confusion Matrix for Part 4')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()



# Visualizing------------------------------------------------------------------------------------------------------

# Randomly select some test samples and visualize predictions
indices_custom_4 = np.random.choice(len(X_test), 10, replace=False)

plt.figure(figsize=(10, 5))
for i, idx in enumerate(indices_custom_4):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.title(f"True: {y_test[idx]}\nPred: {y_pred_classes_custom_4[idx]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

# Visualize incorrectly classified samples
incorrect_indices_custom_4 = np.where(y_test != y_pred_classes_custom_4)[0]

plt.figure(figsize=(10, 5))
for i, idx in enumerate(incorrect_indices_custom_4[:10]):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.title(f"True: {y_test[idx]}\nPred: {y_pred_classes_custom_4[idx]}")
    plt.axis('off')
plt.tight_layout()
plt.show()
