In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import files
uploaded = files.upload()

In [7]:

(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
#load 30,000 images in
X_train = X_train[:30000]
y_train = y_train[:30000]

#normalize values
X_train = X_train / 255.0
X_test = X_test / 255.0
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
#reshape
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print("Training set:", X_train.shape, y_train.shape)
print("Validation set:", X_val.shape, y_val.shape)
print("Test set:", X_test.shape, y_test.shape)

Training set: (24000, 28, 28, 1) (24000,)
Validation set: (6000, 28, 28, 1) (6000,)
Test set: (10000, 28, 28, 1) (10000,)


In [9]:
print("Building baseline CNN model")
model = keras.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Compile model
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train model
print("Training baseline model")
training = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

# Evaluate model
print("Evaluating model")
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

Building baseline CNN model
Training baseline model
Epoch 1/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - accuracy: 0.8633 - loss: 0.4618 - val_accuracy: 0.9683 - val_loss: 0.1099
Epoch 2/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 32ms/step - accuracy: 0.9781 - loss: 0.0668 - val_accuracy: 0.9803 - val_loss: 0.0707
Epoch 3/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 31ms/step - accuracy: 0.9875 - loss: 0.0381 - val_accuracy: 0.9830 - val_loss: 0.0677
Epoch 4/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 31ms/step - accuracy: 0.9900 - loss: 0.0288 - val_accuracy: 0.9852 - val_loss: 0.0560
Epoch 5/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 31ms/step - accuracy: 0.9933 - loss: 0.0197 - val_accuracy: 0.9813 - val_loss: 0.0710
Epoch 6/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 32ms/step - accuracy: 0.9942 - loss: 0.0174 - val_

In [10]:
# Data augmentation
print("Applying data augmentation")
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(X_train)



Applying data augmentation


In [11]:
# Improved CNN model with dropout and batch normalization
print("Building improved CNN model")
model = keras.Sequential([
    layers.Conv2D(64, (3,3), activation='relu', input_shape=(28,28,1)),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),
    layers.Dropout(0.4),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),
    layers.Dropout(0.4),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

optimizer = Adam(learning_rate=0.001)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

print("Training improved model")
history = model.fit(datagen.flow(X_train, y_train, batch_size=64), validation_data=(X_val, y_val), epochs=15, callbacks=[lr_scheduler])

Building improved CNN model
Training improved model
Epoch 1/15


  self._warn_if_super_not_called()


[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 570ms/step - accuracy: 0.6937 - loss: 1.0499 - val_accuracy: 0.2898 - val_loss: 2.7913 - learning_rate: 0.0010
Epoch 2/15
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 588ms/step - accuracy: 0.9359 - loss: 0.1976 - val_accuracy: 0.9827 - val_loss: 0.0563 - learning_rate: 0.0010
Epoch 3/15
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 550ms/step - accuracy: 0.9582 - loss: 0.1306 - val_accuracy: 0.9852 - val_loss: 0.0487 - learning_rate: 0.0010
Epoch 4/15
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 579ms/step - accuracy: 0.9687 - loss: 0.1064 - val_accuracy: 0.9857 - val_loss: 0.0430 - learning_rate: 0.0010
Epoch 5/15
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m214s[0m 570ms/step - accuracy: 0.9699 - loss: 0.0956 - val_accuracy: 0.9867 - val_loss: 0.0440 - learning_rate: 0.0010
Epoch 6/15
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m