# MNIST Image Classification - MLOps Pipeline

This notebook demonstrates the complete ML pipeline for handwritten digit classification:
1. Data Loading and Preprocessing
2. Model Architecture Design
3. Model Training with Callbacks
4. Model Evaluation with Multiple Metrics
5. Visualization of Results
6. Model Saving

In [None]:
# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import sys
import os

# Add parent directory to path
sys.path.append(os.path.dirname(os.getcwd()))

# Set style for visualizations
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

## 1. Data Loading and Preprocessing

In [None]:
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print(f"Training set shape: {x_train.shape}")
print(f"Test set shape: {x_test.shape}")
print(f"Number of classes: {len(np.unique(y_train))}")

In [None]:
# Visualize sample images
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
fig.suptitle('Sample MNIST Images', fontsize=16)

for i, ax in enumerate(axes.flat):
    ax.imshow(x_train[i], cmap='gray')
    ax.set_title(f'Label: {y_train[i]}')
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Class distribution visualization
plt.figure(figsize=(10, 6))
unique, counts = np.unique(y_train, return_counts=True)
plt.bar(unique, counts, color='steelblue', edgecolor='black')
plt.xlabel('Digit Class', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.title('Training Data Class Distribution', fontsize=14)
plt.xticks(unique)
plt.grid(axis='y', alpha=0.3)
plt.show()

print("Class distribution:")
for digit, count in zip(unique, counts):
    print(f"Digit {digit}: {count} samples ({count/len(y_train)*100:.2f}%)")

In [None]:
# Preprocess data
# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Reshape to include channel dimension
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Create validation split
val_split = 0.1
val_size = int(len(x_train) * val_split)

x_val = x_train[:val_size]
y_val = y_train[:val_size]
x_train = x_train[val_size:]
y_train = y_train[val_size:]

print(f"Training set: {x_train.shape}")
print(f"Validation set: {x_val.shape}")
print(f"Test set: {x_test.shape}")

## 2. Model Architecture

In [None]:
# Create CNN model
model = keras.Sequential([
    # First convolutional block
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.BatchNormalization(),
    
    # Second convolutional block
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.BatchNormalization(),
    
    # Third convolutional block
    layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    layers.BatchNormalization(),
    
    # Flatten and dense layers
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(10, activation='softmax')
])

# Compile model
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Display model architecture
model.summary()

## 3. Model Training

In [None]:
# Define callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

model_checkpoint = ModelCheckpoint(
    '../models/cnn_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

# Train model
history = model.fit(
    x_train, y_train,
    batch_size=128,
    epochs=20,
    validation_data=(x_val, y_val),
    callbacks=[early_stopping, model_checkpoint],
    verbose=1
)

In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Accuracy plot
ax1.plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
ax1.plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Accuracy', fontsize=12)
ax1.set_title('Model Accuracy Over Epochs', fontsize=14)
ax1.legend()
ax1.grid(alpha=0.3)

# Loss plot
ax2.plot(history.history['loss'], label='Training Loss', linewidth=2)
ax2.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('Loss', fontsize=12)
ax2.set_title('Model Loss Over Epochs', fontsize=14)
ax2.legend()
ax2.grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Model Evaluation with Multiple Metrics

In [None]:
# Make predictions on test set
y_pred_probs = model.predict(x_test)
y_pred = np.argmax(y_pred_probs, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("="*50)
print("MODEL EVALUATION METRICS")
print("="*50)
print(f"Accuracy:  {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"Precision: {precision:.4f} ({precision*100:.2f}%)")
print(f"Recall:    {recall:.4f} ({recall*100:.2f}%)")
print(f"F1 Score:  {f1:.4f} ({f1*100:.2f}%)")
print("="*50)

In [None]:
# Per-class metrics
from sklearn.metrics import classification_report

print("\nPer-Class Classification Report:")
print(classification_report(y_test, y_pred, target_names=[str(i) for i in range(10)]))

## 5. Confusion Matrix Visualization

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True,
            xticklabels=range(10), yticklabels=range(10))
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.title('Confusion Matrix - MNIST Classification', fontsize=14)
plt.tight_layout()
plt.show()

# Normalized confusion matrix
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='RdYlGn', cbar=True,
            xticklabels=range(10), yticklabels=range(10))
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.title('Normalized Confusion Matrix', fontsize=14)
plt.tight_layout()
plt.show()

## 6. Sample Predictions Visualization

In [None]:
# Visualize correct and incorrect predictions
correct_indices = np.where(y_pred == y_test)[0]
incorrect_indices = np.where(y_pred != y_test)[0]

# Plot correct predictions
fig, axes = plt.subplots(2, 5, figsize=(14, 6))
fig.suptitle('Correct Predictions', fontsize=16)

for i, ax in enumerate(axes.flat):
    idx = correct_indices[i]
    ax.imshow(x_test[idx].reshape(28, 28), cmap='gray')
    ax.set_title(f'True: {y_test[idx]}, Pred: {y_pred[idx]}\nConf: {y_pred_probs[idx][y_pred[idx]]:.2f}')
    ax.axis('off')

plt.tight_layout()
plt.show()

# Plot incorrect predictions
if len(incorrect_indices) > 0:
    fig, axes = plt.subplots(2, 5, figsize=(14, 6))
    fig.suptitle('Incorrect Predictions', fontsize=16)
    
    for i, ax in enumerate(axes.flat):
        if i < len(incorrect_indices):
            idx = incorrect_indices[i]
            ax.imshow(x_test[idx].reshape(28, 28), cmap='gray')
            ax.set_title(f'True: {y_test[idx]}, Pred: {y_pred[idx]}\nConf: {y_pred_probs[idx][y_pred[idx]]:.2f}', color='red')
            ax.axis('off')
        else:
            ax.axis('off')
    
    plt.tight_layout()
    plt.show()
else:
    print("No incorrect predictions found!")

## 7. Model Saving

In [None]:
# Save final model
model_path = '../models/cnn_model.h5'
os.makedirs(os.path.dirname(model_path), exist_ok=True)
model.save(model_path)
print(f"Model saved to {model_path}")

# Verify model can be loaded
loaded_model = keras.models.load_model(model_path)
print("Model loaded successfully!")

# Verify loaded model predictions
test_pred = loaded_model.predict(x_test[:5])
print("\nTest predictions from loaded model:")
print(np.argmax(test_pred, axis=1))
print("True labels:")
print(y_test[:5])

## Summary

This notebook demonstrated a complete MLOps pipeline for MNIST digit classification:

1. **Data Preprocessing**: Loaded and normalized MNIST dataset
2. **Model Architecture**: Built a CNN with 3 convolutional blocks
3. **Training**: Used early stopping and model checkpointing
4. **Evaluation**: Calculated 4 key metrics (Accuracy, Precision, Recall, F1)
5. **Visualization**: Created confusion matrix and training curves
6. **Model Persistence**: Saved model as .h5 file

The model achieves excellent performance and is ready for deployment!