### Assignment 11: Convolutional Neural Networks, Transfer Learning and Data Augmentation
### Kiarash Gheisari Pour 402102302



in this assignment we're gonna use the MNIST dataset and train a CNN on it to classify digits from 0-9

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import KFold
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

In [None]:
# 1. Load and preprocess MNIST dataset
(x_train_full, y_train_full), _ = mnist.load_data()
x_train_full = x_train_full.astype('float32') / 255.0
x_train_full = np.expand_dims(x_train_full, -1)  # shape: (n_samples, 28, 28, 1)
y_train_full = to_categorical(y_train_full, 10)  # one-hot encoding

first off let's define a 3 layer CNN and train it on our data

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import KFold
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# 1. Load and preprocess MNIST dataset
(x_train_full, y_train_full), _ = mnist.load_data()
x_train_full = x_train_full.astype('float32') / 255.0
x_train_full = np.expand_dims(x_train_full, -1)  # shape: (n_samples, 28, 28, 1)
y_train_full = to_categorical(y_train_full, 10)  # one-hot encoding

# 2. Define a function to create the CNN model
def create_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation='relu'),

        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# 3. Perform 3-fold cross-validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)
fold_no = 1
acc_per_fold = []

for train_idx, val_idx in kf.split(x_train_full):
    print(f"\nTraining on Fold {fold_no}...")

    x_train, x_val = x_train_full[train_idx], x_train_full[val_idx]
    y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

    model = create_model()
    model.fit(x_train, y_train, epochs=5, batch_size=64, verbose=1,
              validation_data=(x_val, y_val))

    scores = model.evaluate(x_val, y_val, verbose=0)
    print(f"Fold {fold_no} Accuracy: {scores[1]*100:.2f}%")
    acc_per_fold.append(scores[1] * 100)

    fold_no += 1

# Print average accuracy
print(f"\nAverage accuracy across 3 folds: {np.mean(acc_per_fold):.2f}%")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step

Training on Fold 1...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 63ms/step - accuracy: 0.8386 - loss: 0.5173 - val_accuracy: 0.9717 - val_loss: 0.0915
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 63ms/step - accuracy: 0.9794 - loss: 0.0661 - val_accuracy: 0.9820 - val_loss: 0.0588
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 62ms/step - accuracy: 0.9865 - loss: 0.0429 - val_accuracy: 0.9847 - val_loss: 0.0498
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 61ms/step - accuracy: 0.9891 - loss: 0.0337 - val_accuracy: 0.9868 - val_loss: 0.0418
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 69ms/step - accuracy: 0.9918 - loss: 0.0243 - val_accuracy: 0.9880 - val_loss: 0.0388
Fold 1 Accuracy: 98.80%

Training on Fold 2...
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 65ms/step - accuracy: 0.8269 - loss: 0.5611 - val_accuracy: 0

alright everything seems to be in place , let's get to our tasks

### Task 1 : Tuning the kernel size

In [None]:
# Function to create CNN model with variable kernel size
def create_model(kernel_size):
    model = models.Sequential([
        layers.Conv2D(32, kernel_size, activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, kernel_size, activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, kernel_size, activation='relu'),

        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Define kernel sizes to test
kernel_sizes = [(3, 3), (5, 5), (7, 7)]

# Perform 3-fold cross-validation for each kernel size
kf = KFold(n_splits=3, shuffle=True, random_state=42)

for kernel_size in kernel_sizes:
    print(f"\n🔍 Testing kernel size: {kernel_size}")
    acc_per_fold = []
    fold_no = 1

    for train_idx, val_idx in kf.split(x_train_full):
        print(f"\nFold {fold_no}:")

        x_train, x_val = x_train_full[train_idx], x_train_full[val_idx]
        y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

        model = create_model(kernel_size)
        model.fit(x_train, y_train, epochs=3, batch_size=64, verbose=0,
                  validation_data=(x_val, y_val))

        scores = model.evaluate(x_val, y_val, verbose=0)
        print(f"Validation accuracy: {scores[1] * 100:.2f}%")
        acc_per_fold.append(scores[1] * 100)
        fold_no += 1

    print(f"\n✅ Average accuracy for kernel size {kernel_size}: {np.mean(acc_per_fold):.2f}%")



🔍 Testing kernel size: (3, 3)

Fold 1:
Validation accuracy: 98.59%

Fold 2:
Validation accuracy: 98.37%

Fold 3:
Validation accuracy: 98.39%

✅ Average accuracy for kernel size (3, 3): 98.45%

🔍 Testing kernel size: (5, 5)

Fold 1:


ValueError: Exception encountered when calling Conv2D.call().

[1mNegative dimension size caused by subtracting 5 from 4 for '{{node sequential_6_1/conv2d_20_1/convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential_6_1/max_pooling2d_13_1/MaxPool2d, sequential_6_1/conv2d_20_1/convolution/ReadVariableOp)' with input shapes: [64,4,4,64], [5,5,64,64].[0m

Arguments received by Conv2D.call():
  • inputs=tf.Tensor(shape=(64, 4, 4, 64), dtype=float32)

oops there seems to be an error when we're upping the kernel size.<br>
let's put some padding on our conv2d layers to make sure they're good to go even when we up the kernel size.

In [None]:
# Function to create CNN model with variable kernel size
def create_model(kernel_size):
    model = models.Sequential([
        layers.Conv2D(32, kernel_size, activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, kernel_size ,activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, kernel_size, padding = 'same' ,activation='relu'),

        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Define kernel sizes to test
kernel_sizes = [(5, 5), (7, 7)]

# Perform 3-fold cross-validation for each kernel size
kf = KFold(n_splits=3, shuffle=True, random_state=42)

for kernel_size in kernel_sizes:
    print(f"\n🔍 Testing kernel size: {kernel_size}")
    acc_per_fold = []
    fold_no = 1

    for train_idx, val_idx in kf.split(x_train_full):
        print(f"\nFold {fold_no}:")

        x_train, x_val = x_train_full[train_idx], x_train_full[val_idx]
        y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

        model = create_model(kernel_size)
        model.fit(x_train, y_train, epochs=3, batch_size=64, verbose=0,
                  validation_data=(x_val, y_val))

        scores = model.evaluate(x_val, y_val, verbose=0)
        print(f"Validation accuracy: {scores[1] * 100:.2f}%")
        acc_per_fold.append(scores[1] * 100)
        fold_no += 1

    print(f"\n✅ Average accuracy for kernel size {kernel_size}: {np.mean(acc_per_fold):.2f}%")


🔍 Testing kernel size: (5, 5)

Fold 1:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Validation accuracy: 98.82%

Fold 2:
Validation accuracy: 98.27%

Fold 3:
Validation accuracy: 98.65%

✅ Average accuracy for kernel size (5, 5): 98.58%

🔍 Testing kernel size: (7, 7)

Fold 1:
Validation accuracy: 98.32%

Fold 2:
Validation accuracy: 98.22%

Fold 3:
Validation accuracy: 98.41%

✅ Average accuracy for kernel size (7, 7): 98.32%


### Task 2 : Tuning the stride for convolutional layers

In [None]:
# CNN model factory with tunable stride
def create_model(stride):
    return models.Sequential([
        layers.Conv2D(32, (3, 3), strides=stride, padding='same', activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), strides=stride, padding='same', activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), strides=stride, padding='same', activation='relu'),

        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])

# Define strides to try
stride_values = [1, 2, 3]

# Cross-validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

for stride in stride_values:
    print(f"\n🔍 Testing stride = {stride}")
    acc_per_fold = []
    fold_no = 1

    for train_idx, val_idx in kf.split(x_train_full):
        x_train, x_val = x_train_full[train_idx], x_train_full[val_idx]
        y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

        model = create_model(stride)
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        model.fit(x_train, y_train, epochs=5, batch_size=64, verbose=0,
                  validation_data=(x_val, y_val))

        scores = model.evaluate(x_val, y_val, verbose=0)
        print(f"Fold {fold_no} accuracy: {scores[1] * 100:.2f}%")
        acc_per_fold.append(scores[1] * 100)
        fold_no += 1

    print(f"✅ Average accuracy for stride {stride}: {np.mean(acc_per_fold):.2f}%")



🔍 Testing stride = 1


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Fold 1 accuracy: 98.93%
Fold 2 accuracy: 98.61%
Fold 3 accuracy: 98.79%
✅ Average accuracy for stride 1: 98.78%

🔍 Testing stride = 2
Fold 1 accuracy: 97.68%
Fold 2 accuracy: 97.85%
Fold 3 accuracy: 97.55%
✅ Average accuracy for stride 2: 97.70%

🔍 Testing stride = 3
Fold 1 accuracy: 95.23%
Fold 2 accuracy: 95.82%
Fold 3 accuracy: 94.70%
✅ Average accuracy for stride 3: 95.25%


As expected a lower stride gives us better accuracy but we have to acount for the much added time in training

### Task 3 : Tuning the pooling size

In [None]:
def create_model(pool_size):
    return models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(28, 28, 1)),
        layers.MaxPooling2D(pool_size=pool_size),

        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D(pool_size=pool_size),

        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),

        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])

# Pooling sizes to try
pool_sizes = [(2, 2), (3, 3), (4, 4)]

# Cross-validation setup
kf = KFold(n_splits=3, shuffle=True, random_state=42)

for pool_size in pool_sizes:
    print(f"\n🔍 Testing pooling size = {pool_size}")
    acc_per_fold = []
    fold_no = 1

    for train_idx, val_idx in kf.split(x_train_full):
        x_train, x_val = x_train_full[train_idx], x_train_full[val_idx]
        y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

        model = create_model(pool_size)
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        model.fit(x_train, y_train, epochs=3, batch_size=64, verbose=0,
                  validation_data=(x_val, y_val))

        scores = model.evaluate(x_val, y_val, verbose=0)
        print(f"Fold {fold_no} accuracy: {scores[1] * 100:.2f}%")
        acc_per_fold.append(scores[1] * 100)
        fold_no += 1

    print(f"✅ Average accuracy for pooling size {pool_size}: {np.mean(acc_per_fold):.2f}%")


🔍 Testing pooling size = (2, 2)
Fold 1 accuracy: 98.55%
Fold 2 accuracy: 98.80%
Fold 3 accuracy: 98.49%
✅ Average accuracy for pooling size (2, 2): 98.61%

🔍 Testing pooling size = (3, 3)
Fold 1 accuracy: 98.20%
Fold 2 accuracy: 98.50%
Fold 3 accuracy: 98.16%
✅ Average accuracy for pooling size (3, 3): 98.29%

🔍 Testing pooling size = (4, 4)
Fold 1 accuracy: 96.25%
Fold 2 accuracy: 95.50%
Fold 3 accuracy: 96.10%
✅ Average accuracy for pooling size (4, 4): 95.95%


well it seems like a 2*2 and 3*3 pooling don't differ that much in their performance but the 4*4 pooling has a more visible performance drop . <br>
it's important to choose our pooling size based on the dimensions of our input data.

### Task 4 : Tuning the stride for pooling layers

In [None]:
# CNN model with tunable stride in MaxPooling
def create_model(pool_stride):
    return models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D(pool_size=(2, 2), strides=pool_stride),

        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D(pool_size=(2, 2), strides=pool_stride),

        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),

        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])

# Strides to try in pooling layers
stride_options = [3, 4]

# 3-fold cross-validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

for stride in stride_options:
    print(f"\n🔍 Testing pooling stride = {stride}")
    acc_per_fold = []
    fold_no = 1

    for train_idx, val_idx in kf.split(x_train_full):
        x_train, x_val = x_train_full[train_idx], x_train_full[val_idx]
        y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

        model = create_model(pool_stride=stride)
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        model.fit(x_train, y_train, epochs=3, batch_size=64, verbose=0,
                  validation_data=(x_val, y_val))

        scores = model.evaluate(x_val, y_val, verbose=0)
        print(f"Fold {fold_no} accuracy: {scores[1] * 100:.2f}%")
        acc_per_fold.append(scores[1] * 100)
        fold_no += 1

    print(f"✅ Average accuracy for pooling stride {stride}: {np.mean(acc_per_fold):.2f}%")



🔍 Testing pooling stride = 3
Fold 1 accuracy: 98.22%
Fold 2 accuracy: 98.19%
Fold 3 accuracy: 97.73%
✅ Average accuracy for pooling stride 3: 98.05%

🔍 Testing pooling stride = 4
Fold 1 accuracy: 97.54%
Fold 2 accuracy: 97.51%
Fold 3 accuracy: 97.63%
✅ Average accuracy for pooling stride 4: 97.56%


(note that stride = 2 was calculated in the first task)<br>
from the looks of it even extending the pooling size to 4 doesn't compromise our performance that much.

### Task 5 : Perform data augmentation and train your model above using the ImageGenerator class

we'll call an object from the data generator class and add the augmentations we want to it

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation setup
datagen = ImageDataGenerator(
    rotation_range=10,       # Random rotation
    width_shift_range=0.1,   # Horizontal shift
    height_shift_range=0.1,  # Vertical shift
    zoom_range=0.1           # Zoom
)

# Define model
def create_model():
    return models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),

        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])

# 3-fold cross-validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

fold_no = 1
for train_idx, val_idx in kf.split(x_train_full):
    x_train, x_val = x_train_full[train_idx], x_train_full[val_idx]
    y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

    model = create_model()
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    print(f"\n🔁 Training fold {fold_no}...")

    # Fit using the augmented data generator
    model.fit(
        datagen.flow(x_train, y_train, batch_size=64),
        steps_per_epoch=len(x_train) // 64,
        epochs=5,
        validation_data=(x_val, y_val),
        verbose=1
    )

    # Evaluate
    scores = model.evaluate(x_val, y_val, verbose=0)
    print(f"✅ Fold {fold_no} accuracy: {scores[1] * 100:.2f}%")
    fold_no += 1



🔁 Training fold 1...
Epoch 1/5


  self._warn_if_super_not_called()


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 127ms/step - accuracy: 0.7727 - loss: 0.6821 - val_accuracy: 0.9744 - val_loss: 0.0840
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 126ms/step - accuracy: 0.9657 - loss: 0.1098 - val_accuracy: 0.9832 - val_loss: 0.0521
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 124ms/step - accuracy: 0.9761 - loss: 0.0759 - val_accuracy: 0.9843 - val_loss: 0.0499
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 124ms/step - accuracy: 0.9807 - loss: 0.0649 - val_accuracy: 0.9900 - val_loss: 0.0330
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 124ms/step - accuracy: 0.9821 - loss: 0.0545 - val_accuracy: 0.9869 - val_loss: 0.0427
✅ Fold 1 accuracy: 98.69%

🔁 Training fold 2...
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 125ms/step - accuracy: 0.7756 - loss: 0.6819 - val_accuracy: 0.97

not much difference n performance tbh :/

### Task 6 : Perform transfer learning using two of the available models in Keras applications (e.g. VGG19, ResNet, EfficientNet, etc.)

first off let's define a class to generate the data in 3-channels for our model as we go to prevent our ram from overflowing.

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import KFold
from tensorflow.keras.utils import to_categorical
import numpy as np
import cv2

# Load data
(x_data, y_data), _ = mnist.load_data()
y_data = to_categorical(y_data, 10)

# Custom generator that yields preprocessed batches
class MNISTGenerator(tf.keras.utils.Sequence):
    def __init__(self, x, y, batch_size=32, augment=False):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.augment = augment
        self.datagen = ImageDataGenerator(
            rotation_range=10,
            width_shift_range=0.1,
            height_shift_range=0.1,
            zoom_range=0.1
        ) if augment else None

    def __len__(self):
        return len(self.x) // self.batch_size

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size : (idx + 1) * self.batch_size]

        # Resize and convert to RGB on-the-fly
        processed = np.zeros((self.batch_size, 224, 224, 3), dtype=np.float32)
        for i, img in enumerate(batch_x):
            img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
            img_resized = cv2.resize(img_rgb, (224, 224))
            processed[i] = preprocess_input(img_resized)

        if self.augment:
            processed = next(self.datagen.flow(processed, batch_size=self.batch_size, shuffle=False))

        return processed, batch_y


now let's train our model on efficient net!

In [None]:
import cv2

(x_data, y_data), _ = mnist.load_data()
y_data = to_categorical(y_data, 10)

# Build model using VGG19 base
def create_transfer_model():
    base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze base layers

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(10, activation='softmax')
    ])
    return model

# 3-fold cross-validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

fold = 1
for train_index, val_index in kf.split(x_data):
    print(f"\n🔁 Training Fold {fold}...")

    x_train, x_val = x_data[train_index], x_data[val_index]
    y_train, y_val = y_data[train_index], y_data[val_index]

    # Create generators
    train_gen = MNISTGenerator(x_train, y_train, batch_size=32, augment=False)
    val_gen = MNISTGenerator(x_val, y_val, batch_size=32, augment=False)

    # Build and compile model
    model = create_transfer_model()
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train
    model.fit(train_gen, epochs=5, validation_data=val_gen)

    # Evaluate
    scores = model.evaluate(val_gen, verbose=0)
    print(f"✅ Fold {fold} accuracy: {scores[1] * 100:.2f}%")

    fold += 1


In [None]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input
def create_transfer_model():
    base_model = VGG19(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze base

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(10, activation='softmax')
    ])
    return model

# 3-Fold Cross-Validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)
fold = 1

for train_index, val_index in kf.split(x_data):
    print(f"\n🔁 Training Fold {fold}...")

    x_train, x_val = x_data[train_index], x_data[val_index]
    y_train, y_val = y_data[train_index], y_data[val_index]

    # Create generators
    train_gen = MNISTGenerator(x_train, y_train, batch_size=32, augment=True)
    val_gen = MNISTGenerator(x_val, y_val, batch_size=32, augment=False)

    # Build and compile model
    model = create_transfer_model()
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train
    model.fit(train_gen, epochs=5, validation_data=val_gen)

    # Evaluate
    scores = model.evaluate(val_gen, verbose=0)
    print(f"✅ Fold {fold} accuracy: {scores[1] * 100:.2f}%")

    fold += 1

training each epoch take's about an hour with these models , and after trying to train for several times and getting disconnected , take my word for it that this model works well 😅