# MNIST Handwritten Digit Classifier

This Jupyter notebook contains a complete end-to-end implementation of a Deep Neural Network (DNN) for classifying handwritten digits using the MNIST dataset with **TensorFlow / Keras**.

**What it includes:**
- Data loading and preprocessing
- Model definition (Flatten → Dense(300) → Dense(100) → Dense(10))
- Training with validation
- Plots for accuracy & loss
- Confusion matrix and sample predictions
- Model save / load instructions


In [None]:
# Imports
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

print('TensorFlow version:', tf.__version__)


In [None]:
# Load MNIST dataset
(mnist_X_train_full, mnist_y_train_full), (mnist_X_test, mnist_y_test) = tf.keras.datasets.mnist.load_data()

# Normalize (0-1) and split validation set
X_valid = mnist_X_train_full[:5000] / 255.0
X_train = mnist_X_train_full[5000:] / 255.0
y_valid = mnist_y_train_full[:5000]
y_train = mnist_y_train_full[5000:]
X_test = mnist_X_test / 255.0

print('Train shape:', X_train.shape)
print('Validation shape:', X_valid.shape)
print('Test shape:', X_test.shape)


In [None]:
# Show a grid of sample images
plt.figure(figsize=(8,8))
for i in range(16):
    plt.subplot(4,4,i+1)
    plt.imshow(X_train[i], cmap='binary')
    plt.title(str(y_train[i]))
    plt.axis('off')
plt.suptitle('Sample MNIST images')
plt.show()


In [None]:
# Build the model
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input, Flatten, Dense

model = Sequential([
    Input(shape=(28,28)),
    Flatten(),
    Dense(300, activation='relu'),
    Dense(100, activation='relu'),
    Dense(10, activation='softmax')
])

model.summary()


In [None]:
# Compile and train
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='sgd',
    metrics=['accuracy']
)

EPOCHS = 30
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    validation_data=(X_valid, y_valid),
    batch_size=32
)

# Save training history to a DataFrame
hist_df = pd.DataFrame(history.history)
hist_df.head()


In [None]:
# Plot training curves
hist = pd.DataFrame(history.history)
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
hist[['accuracy','val_accuracy']].plot(title='Accuracy', ax=plt.gca())
plt.subplot(1,2,2)
hist[['loss','val_loss']].plot(title='Loss', ax=plt.gca())
plt.tight_layout()
plt.show()


In [None]:
# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, mnist_y_test, verbose=2)
print('\nTest accuracy:', test_acc)
print('Test loss:', test_loss)


In [None]:
# Predictions, confusion matrix & classification report
y_test_pred = np.argmax(model.predict(X_test), axis=1)
cm = confusion_matrix(mnist_y_test, y_test_pred)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

print('\nClassification Report:\n')
print(classification_report(mnist_y_test, y_test_pred))


In [None]:
# Show sample predictions
X_new = X_test[:9]
actual = mnist_y_test[:9]
y_pred_probs = model.predict(X_new)
y_pred = np.argmax(y_pred_probs, axis=1)

plt.figure(figsize=(8,8))
for i, (img, pred, act) in enumerate(zip(X_new, y_pred, actual)):
    plt.subplot(3,3,i+1)
    plt.imshow(img, cmap='binary')
    plt.title(f'Predicted: {pred} | Actual: {act}')
    plt.axis('off')
plt.suptitle('Sample predictions')
plt.show()


In [None]:
# Save the trained model
model.save('mnist_simple_clf.h5')
print('Model saved to mnist_simple_clf.h5')
