# CNN Workshop: MNIST Digit Classification
## Teaching CNNs, Overfitting, and Data Augmentation

Welcome! In this notebook, you'll:
1. Build and train a CNN on MNIST data
2. Learn about overfitting
3. Experiment with data augmentation
4. Test the model with your own handwriting!

**Run each cell in order by pressing Shift+Enter**

In [None]:
import subprocess
# Install compatible versions for TensorFlow.js export
subprocess.check_call(['pip', 'install', '-q', 'tensorflow==2.15.0', 'tensorflowjs==4.11.0'])

In [None]:
# Runtime imports: tensorflow/keras, plotting, and helpers
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# ImageDataGenerator moved under keras.preprocessing for TF 2.x
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt

# Small helper to plot training histories used later in the notebook
def plot_history(history, title=None):
    # history can be a Keras History object or a dict-like with 'history' attr
    if hasattr(history, 'history'):
        hist = history.history
    else:
        hist = history
    epochs = range(1, len(hist.get('loss', [])) + 1)
    plt.figure(figsize=(10,4))
    plt.subplot(1,2,1)
    if 'loss' in hist:
        plt.plot(epochs, hist['loss'], label='train')
    if 'val_loss' in hist:
        plt.plot(epochs, hist['val_loss'], label='val')
    plt.title('Loss' + (f' - {title}' if title else ''))
    plt.legend()
    plt.subplot(1,2,2)
    if 'accuracy' in hist:
        plt.plot(epochs, hist['accuracy'], label='train')
    if 'val_accuracy' in hist:
        plt.plot(epochs, hist['val_accuracy'], label='val')
    plt.title('Accuracy' + (f' - {title}' if title else ''))
    plt.legend()
    plt.tight_layout()
    plt.show()

print('âœ“ Imports and helper functions loaded')

In [None]:
# Python version check for Colab / runtimes
import sys
from IPython.display import display, Markdown
py = sys.version_info
if py.major == 3 and py.minor == 8:
    display(Markdown(f'**Python {py.major}.{py.minor} detected â€” good to go.**'))
else:
    display(Markdown(f'**Warning:** Notebook running on Python {py.major}.{py.minor}. Colab may not be using Python 3.8.'))
    print('Options:')
    print('  1) Use a Colab runtime pre-configured with Python 3.8 (recommended for full kernel compatibility).')
    print('  2) Attempt to install Python 3.8 in this VM (experimental â€” may not switch the notebook kernel).')
    print('
To try option (2) programmatically, call install_py38() below â€” this will install python3.8 and pip and may require a runtime restart.')

def install_py38():
    """Experimental: install python3.8 and pip into the Colab VM."""
    import subprocess, sys
    cmds = [
        ['apt', 'update', '-y'],
        ['apt', 'install', '-y', 'python3.8', 'python3.8-venv', 'python3.8-dev'],
    ]
    for c in cmds:
        print('Running:', ' '.join(c))
        subprocess.check_call(c)
    # Install pip for python3.8
    subprocess.check_call(['curl', '-sS', 'https://bootstrap.pypa.io/get-pip.py', '-o', '/tmp/get-pip.py'])
    subprocess.check_call(['python3.8', '/tmp/get-pip.py'])
    print('
Installed python3.8 and pip into the VM.')
    print('Note: the notebook kernel may still be the original Python. To use python3.8, either run training as a separate process (python3.8 train_script.py) or restart and reconfigure the runtime manually.')

print('Python check cell ready â€” call install_py38() if you want to attempt the experimental install.')

## Part 1: Load and Explore MNIST Data

In [None]:
# Load MNIST dataset (built into Keras!)
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

print(f"Training images: {x_train.shape[0]}")
print(f"Test images: {x_test.shape[0]}")
print(f"Image size: {x_train.shape[1]} x {x_train.shape[2]} pixels")

# Let's look at some examples!
plt.figure(figsize=(10, 4))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(x_train[i], cmap='gray')
    plt.title(f"Label: {y_train[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

## Part 2: Preprocess the Data

In [None]:
# Normalize pixel values to 0-1 range (originally 0-255)
x_train_normalized = x_train.astype('float32') / 255.0
x_test_normalized = x_test.astype('float32') / 255.0

# Reshape for CNN (add channel dimension)
x_train_normalized = x_train_normalized.reshape(-1, 28, 28, 1)
x_test_normalized = x_test_normalized.reshape(-1, 28, 28, 1)

# Convert labels to categorical (one-hot encoding)
y_train_cat = keras.utils.to_categorical(y_train, 10)
y_test_cat = keras.utils.to_categorical(y_test, 10)

print("âœ“ Data preprocessed and ready!")
print(f"Training data shape: {x_train_normalized.shape}")
print(f"Training labels shape: {y_train_cat.shape}")

## Part 3: Build a CNN Model

In [None]:
def create_cnn_model():
    """Create a CNN for digit classification"""
    # Clear backend session for clean model creation
    tf.keras.backend.clear_session()
    
    model = keras.Sequential([
        # First Convolutional Block
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),

        # Second Convolutional Block
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        # Flatten and Dense Layers
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),  # Dropout helps prevent overfitting!
        layers.Dense(10, activation='softmax')  # 10 classes (0-9)
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# Create the model
model = create_cnn_model()

# Show the model architecture
model.summary()

## Part 4: Train the Baseline Model (No Augmentation)

In [None]:
# Split data for validation
# We'll use 10,000 images for validation
x_val = x_train_normalized[:10000]
y_val = y_train_cat[:10000]
x_train_subset = x_train_normalized[10000:]
y_train_subset = y_train_cat[10000:]

print("Training baseline model...")
print("Watch the training and validation accuracy!\n")

# Train the model
history_baseline = model.fit(
    x_train_subset, y_train_subset,
    epochs=10,
    batch_size=128,
    validation_data=(x_val, y_val),
    verbose=1
)

print("\nâœ“ Baseline model trained!")

In [None]:
# Export baseline model for TensorFlow.js
import tensorflowjs as tfjs

print("Exporting baseline model to TensorFlow.js format...")
tfjs.converters.save_keras_model(model, 'tfjs_baseline')
print("âœ“ Baseline model exported to tfjs_baseline/")

## Part 6: Train Overfitting ðŸš¨

Let's intentionally overfit by:
- Using a very small dataset (1000 images)
- Training for many epochs (50)
- Watch training accuracy go up while validation accuracy plateaus or drops!

In [None]:
# Export overfitted model for TensorFlow.js
print("Exporting overfitted model to TensorFlow.js format...")
tfjs.converters.save_keras_model(model_overfit, 'tfjs_overfitted')
print("âœ“ Overfitted model exported to tfjs_overfitted/")

In [None]:
# Create a new model for overfitting experiment
model_overfit = create_cnn_model()

# Use only 1000 training images (way too small!)
x_train_tiny = x_train_normalized[10000:11000]
y_train_tiny = y_train_cat[10000:11000]

print("ðŸ§ª OVERFITTING EXPERIMENT")
print(f"Training on only {len(x_train_tiny)} images for 50 epochs...")
print("Watch how training and validation accuracy diverge!\n")

# Train with small dataset for many epochs
history_overfit = model_overfit.fit(
    x_train_tiny, y_train_tiny,
    epochs=50,
    batch_size=32,
    validation_data=(x_val, y_val),
    verbose=0  # Silent training, we'll see the plot
)

print("\nâœ“ Overfitting experiment complete!")
print("\nNotice in the plot:")
print("- Training accuracy keeps improving (model memorizes)")
print("- Validation accuracy plateaus or gets worse (doesn't generalize)")
print("- This is OVERFITTING!")

plot_history(history_overfit, "Overfitting Demonstration - Small Dataset")

## Download Models for Web App

After running the export cells above, download the model folders and place them in your Next.js app:
1. In the Files panel (left sidebar), find the three folders: `tfjs_baseline`, `tfjs_overfitted`, `tfjs_augmented`
2. Download each folder (right-click > Download)
3. Extract and place them in your Next.js `public/models/` directory

Each folder contains:
- `model.json` - Model architecture
- `group1-shard1of1.bin` - Model weights

In [None]:
# Export augmented model for TensorFlow.js
print("Exporting augmented model to TensorFlow.js format...")
tfjs.converters.save_keras_model(model_augmented, 'tfjs_augmented')
print("âœ“ Augmented model exported to tfjs_augmented/")

## Part 8: Train with Data Augmentation

In [None]:
# Create augmented data generator
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
)

# Create a fresh model
model_augmented = create_cnn_model()

print("Training model WITH data augmentation...")
print("This creates more varied training examples!\n")

# Train with augmented data
history_augmented = model_augmented.fit(
    datagen.flow(x_train_subset, y_train_subset, batch_size=128),
    epochs=10,
    validation_data=(x_val, y_val),
    steps_per_epoch=len(x_train_subset) // 128,
    verbose=1
)

print("\nâœ“ Augmented model trained!")
plot_history(history_augmented, "Model with Data Augmentation")