In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

In [None]:
print("Loading dataset...")
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
#load 30,000 images in
X_train = X_train[:30000]
y_train = y_train[:30000]

#normalize values
X_train = X_train / 255.0
X_test = X_test / 255.0
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
#reshape
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

Loading dataset...


In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print("Training set:", X_train.shape, y_train.shape)
print("Validation set:", X_val.shape, y_val.shape)
print("Test set:", X_test.shape, y_test.shape)

Training set: (24000, 28, 28, 1) (24000,)
Validation set: (6000, 28, 28, 1) (6000,)
Test set: (10000, 28, 28, 1) (10000,)


In [None]:
print("Building baseline CNN model...")
model = keras.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Compile model
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train model
print("Training baseline model...")
training = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

# Evaluate model
print("Evaluating model...")
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

Building baseline CNN model...
Training baseline model...
Epoch 1/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 36ms/step - accuracy: 0.8404 - loss: 0.4897 - val_accuracy: 0.9740 - val_loss: 0.0914
Epoch 2/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 36ms/step - accuracy: 0.9797 - loss: 0.0653 - val_accuracy: 0.9793 - val_loss: 0.0714
Epoch 3/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 35ms/step - accuracy: 0.9872 - loss: 0.0373 - val_accuracy: 0.9850 - val_loss: 0.0596
Epoch 4/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 35ms/step - accuracy: 0.9903 - loss: 0.0303 - val_accuracy: 0.9820 - val_loss: 0.0658
Epoch 5/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 35ms/step - accuracy: 0.9936 - loss: 0.0189 - val_accuracy: 0.9857 - val_loss: 0.0584
Epoch 6/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 33ms/step - accuracy: 0.9958 - loss: 0.0141 

In [None]:
# Data augmentation
print("Applying data augmentation...")
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(X_train)



Applying data augmentation...


In [None]:
# Improved CNN model with dropout and batch normalization
print("Building improved CNN model...")
model = keras.Sequential([
    layers.Conv2D(64, (3,3), activation='relu', input_shape=(28,28,1)),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),
    layers.Dropout(0.4),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),
    layers.Dropout(0.4),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

optimizer = Adam(learning_rate=0.001)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

print("Training improved model...")
history = model.fit(datagen.flow(X_train, y_train, batch_size=64), validation_data=(X_val, y_val), epochs=15, callbacks=[lr_scheduler])

Building improved CNN model...
Training improved model...
Epoch 1/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m304s[0m 540ms/step - accuracy: 0.7276 - loss: 0.9277 - val_accuracy: 0.7527 - val_loss: 0.7924 - learning_rate: 0.0010
Epoch 2/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 499ms/step - accuracy: 0.9482 - loss: 0.1670 - val_accuracy: 0.9843 - val_loss: 0.0499 - learning_rate: 0.0010
Epoch 3/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 494ms/step - accuracy: 0.9609 - loss: 0.1189 - val_accuracy: 0.9880 - val_loss: 0.0389 - learning_rate: 0.0010
Epoch 4/15
[1m275/469[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m1:32[0m 477ms/step - accuracy: 0.9682 - loss: 0.1042