In [8]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.callbacks import TensorBoard
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
# Scale the pixel values to be between 0 and 1
x_train = x_train / 255.0
x_test = x_test / 255.0
# Convert the labels to one-hot encoded vectors
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
# Split data into 90% training and 10% validation sets
split_idx = int(len(x_train) * 0.9)
x_train, x_val = x_train[:split_idx], x_train[split_idx:]
y_train, y_val = y_train[:split_idx], y_train[split_idx:]

In [5]:
# Define MLP model architecture
model = Sequential()
model.add(Flatten(input_shape=(28, 28, 1)))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [18]:
# Define tensorboard logs directory
log_dir = "C:/Users/kstre_3ikvnbf/CS 4375/Project/MnistMLP"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# Train the model
model.fit(x_train, y_train, epochs=30, validation_data=(x_val, y_val), callbacks=[tensorboard_callback])

In [9]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)

# Generate predictions on the test set
y_pred = model.predict(x_test)

# Convert predicted probabilities to predicted class labels
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert true labels to one-hot encoding
y_true = np.argmax(y_test, axis=1)



In [11]:
# Compute evaluation metrics
conf_matrix = confusion_matrix(y_true, y_pred_classes)
precision = precision_score(y_true, y_pred_classes, average='macro')
recall = recall_score(y_true, y_pred_classes, average='macro')
f1score = f1_score(y_true, y_pred_classes, average='macro')
auc_roc = roc_auc_score(y_test, y_pred, multi_class='ovr')
    
# Print the evaluation metrics
print("Test Loss:", test_loss)
print("Accuracy:", test_acc)
print("Confusion Matrix:\n", conf_matrix)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1score)
print("AU-ROC:", auc_roc)

Test Loss: 0.12663140892982483
Accuracy: 0.9797999858856201
Confusion Matrix:
 [[ 971    1    1    1    1    1    1    1    2    0]
 [   0 1130    2    1    0    1    0    1    0    0]
 [   1    2 1000    7    3    0    1    6   12    0]
 [   2    0    5  988    0    4    0    3    5    3]
 [   2    1    1    1  962    2    2    0    1   10]
 [   1    0    0    7    0  871    3    0    6    4]
 [   4    2    2    1    5    6  934    0    4    0]
 [   1    5   10    2    2    0    0  997    4    7]
 [   2    0    3    4    1    2    1    4  953    4]
 [   1    2    0    3    6    2    0    2    1  992]]
Precision: 0.9797625985896017
Recall: 0.9796095136599968
F1-score: 0.9796623621896933
AU-ROC: 0.9996014383923276


In [17]:
# Log evaluation metrics to TensorBoard
test_writer = tf.summary.create_file_writer(log_dir)
with test_writer.as_default():
    tf.summary.scalar('Accuracy', test_acc, step=0)
    tf.summary.scalar('Precision', precision, step=0)
    tf.summary.scalar('Recall', recall, step=0)
    tf.summary.scalar('F1-score', f1score, step=0)
    tf.summary.scalar('AU-ROC', auc_roc, step=0)

In [12]:
# tensorboard --logdir_spec=FashionMLP:"C:/Users/kstre_3ikvnbf/CS 4375/Project/FashionMLP",FashionLeNet:"C:/Users/kstre_3ikvnbf/CS 4375/Project/FashionLeNet",MnistLeNet:"C:/Users/kstre_3ikvnbf/CS 4375/Project/MnistLeNet",MnistMLP:"C:/Users/kstre_3ikvnbf/CS 4375/Project/MnistMLP"