In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split

# -----------------------------------------------------------------------------
# Functions to simulate the final preprocessing pipeline
# -----------------------------------------------------------------------------

def simulate_final_preprocessing(img_uint8, desired=64, padding=10):
    """
    Mimic the digit_recognition preprocessing:
      - Given a grayscale uint8 image (assumed shape (64,64)),
      - Apply thresholding (using Otsu's method with inversion),
      - Locate the bounding box of the white digit,
      - Add padding and letterbox the cropped digit into a desired (64×64) image.
    """
    # Apply Otsu's thresholding with inversion:
    # (in your digit_recognition, THRESH_BINARY_INV+OTSU is used)
    _, thresh = cv2.threshold(img_uint8, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Find the bounding box of non-zero (white) pixels
    coords = cv2.findNonZero(thresh)
    if coords is None:
        # If nothing is found, return a blank image.
        return np.zeros((desired, desired), dtype=np.uint8)
    
    x, y, w, h = cv2.boundingRect(coords)
    # Add some padding around the digit
    x1 = max(x - padding, 0)
    y1 = max(y - padding, 0)
    x2 = min(x + w + padding, thresh.shape[1])
    y2 = min(y + h + padding, thresh.shape[0])
    
    digit_roi = thresh[y1:y2, x1:x2]
    
    # Letterbox the digit_roi to a fixed size image (desired x desired)
    h_roi, w_roi = digit_roi.shape
    scale = min(desired / w_roi, desired / h_roi)
    new_w = int(w_roi * scale)
    new_h = int(h_roi * scale)
    resized_digit = cv2.resize(digit_roi, (new_w, new_h), interpolation=cv2.INTER_AREA)
    
    # Create a blank (black) image and place the resized digit in the center
    letterboxed = np.zeros((desired, desired), dtype=np.uint8)
    x_offset = (desired - new_w) // 2
    y_offset = (desired - new_h) // 2
    letterboxed[y_offset:y_offset+new_h, x_offset:x_offset+new_w] = resized_digit
    
    return letterboxed

def final_preprocessing(image):
    """
    Given an image (assumed to be a 64x64x1 normalized float image in [0,1]),
    simulate the same steps as in digit_recognition.py:
      1. Convert to uint8 [0,255]
      2. Apply thresholding, cropping with padding, and letterboxing.
      3. Return a normalized image with the same shape (64,64,1).
    """
    # Remove the channel dimension temporarily and convert to uint8.
    img_uint8 = (np.squeeze(image) * 255).astype(np.uint8)
    
    # Simulate the preprocessing as in digit_recognition.py
    processed = simulate_final_preprocessing(img_uint8, desired=64, padding=10)
    
    # Normalize back to [0, 1] and restore the channel dimension.
    processed = processed.astype(np.float32) / 255.0
    processed = np.expand_dims(processed, axis=-1)
    return processed

def batch_final_preprocessing(X):
    """
    Process an entire dataset X (assumed shape: (N, 64, 64, 1)) by applying
    final_preprocessing to every image.
    """
    processed_images = np.zeros_like(X)
    for i in range(X.shape[0]):
        processed_images[i] = final_preprocessing(X[i])
    return processed_images

# -----------------------------------------------------------------------------
# Load and Prepare the Data (Kaggle's Digit Recognizer / MNIST)
# -----------------------------------------------------------------------------

# Set a random seed for reproducibility
np.random.seed(42)

# Adjust the file path to your train.csv file
data_path = "drive/MyDrive/digit_recognizer/train.csv"
train_df = pd.read_csv(data_path)

# Separate features and labels
X = train_df.drop("label", axis=1)
y = train_df["label"]

# Normalize pixel values and reshape from flat vectors to 28×28×1 images
X = X / 255.0
X = X.values.reshape(-1, 28, 28, 1)

# -----------------------------------------------------------------------------
# Resize to 64x64
# -----------------------------------------------------------------------------
def resize_to_64(images):
    n = images.shape[0]
    X_64 = np.zeros((n, 64, 64, 1), dtype=np.float32)
    for i in range(n):
        # Get the 2D image from the single channel (28×28)
        img_28 = images[i, :, :, 0]
        # Resize with INTER_AREA interpolation for good quality
        img_64 = cv2.resize(img_28, (64, 64), interpolation=cv2.INTER_AREA)
        X_64[i, :, :, 0] = img_64
    return X_64

X_64 = resize_to_64(X)
print("After resizing, X_64 shape:", X_64.shape)  # Expected: (num_samples, 64, 64, 1)

# -----------------------------------------------------------------------------
# Simulate the final preprocessing pipeline on the training images
# -----------------------------------------------------------------------------
print("Applying final preprocessing to training images to simulate scanning...")
X_final = batch_final_preprocessing(X_64)
print("X_final shape:", X_final.shape)

# Convert labels to one-hot encoding
y_cat = to_categorical(y, num_classes=10)

# -----------------------------------------------------------------------------
# Train/Validation Split
# -----------------------------------------------------------------------------
X_train, X_val, y_train, y_val = train_test_split(X_final, y_cat, test_size=0.1, random_state=2)
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)

# -----------------------------------------------------------------------------
# Build an Improved CNN Model
# -----------------------------------------------------------------------------
# (We add BatchNormalization layers to help learning and use similar dropout.)
model = Sequential()

model.add(Conv2D(32, kernel_size=(5, 5), padding='same', activation='relu', input_shape=(64, 64, 1)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=(5, 5), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

# Compile the model with RMSprop optimizer (adjust learning rate as needed)
optimizer = RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# -----------------------------------------------------------------------------
# Data Augmentation
# -----------------------------------------------------------------------------
# Here we keep augmentation parameters; you might further tweak these
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=5,  # optional: add slight shear
    horizontal_flip=False,
    vertical_flip=False
)
datagen.fit(X_train)

# -----------------------------------------------------------------------------
# Callbacks: Learning Rate Reduction and Early Stopping
# -----------------------------------------------------------------------------
lr_reduction = ReduceLROnPlateau(monitor='val_accuracy',
                                 patience=3,
                                 verbose=1,
                                 factor=0.5,
                                 min_lr=1e-5)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=8, restore_best_weights=True, verbose=1)

# -----------------------------------------------------------------------------
# Train the Model
# -----------------------------------------------------------------------------
epochs = 50  # increase epochs to help convergence
batch_size = 86

history = model.fit(datagen.flow(X_train, y_train, batch_size=batch_size),
                    epochs=epochs,
                    validation_data=(X_val, y_val),
                    steps_per_epoch=X_train.shape[0] // batch_size,
                    callbacks=[lr_reduction, early_stopping],
                    verbose=2)

# -----------------------------------------------------------------------------
# Plot Training History
# -----------------------------------------------------------------------------
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
ax1.plot(history.history['loss'], label='Training Loss', color='blue')
ax1.plot(history.history['val_loss'], label='Validation Loss', color='red')
ax1.set_title("Loss")
ax1.legend()

ax2.plot(history.history['accuracy'], label='Training Accuracy', color='blue')
ax2.plot(history.history['val_accuracy'], label='Validation Accuracy', color='red')
ax2.set_title("Accuracy")
ax2.legend()

plt.tight_layout()
plt.show()

# -----------------------------------------------------------------------------
# Save the Model
# -----------------------------------------------------------------------------
save_path = "drive/MyDrive/handwritten_digit_cnn_improved.h5"
model.save(save_path)
print(f"Model saved as '{save_path}'")