In [None]:
# MobileNetV2 transfer-learning pipeline for CIFAR-10
# Two-phase training: (1) train head with base frozen, (2) unfreeze last blocks and fine-tune
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

AUTOTUNE = tf.data.AUTOTUNE
BATCH_SIZE = 128
SEED = 42
NUM_CLASSES = 10
IMG_SIZE = 96   # MobileNetV2 expects larger input; 96 is a good compromise for CIFAR
INITIAL_EPOCHS = 12
FINE_TUNE_EPOCHS = 30

In [None]:
# 1) Load CIFAR-10 and split train/val
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.cifar10.load_data()
y_train_full = y_train_full.squeeze()
y_test = y_test.squeeze()

VAL_SIZE = 5000
TRAIN_SIZE = x_train_full.shape[0] - VAL_SIZE

x_train = x_train_full[:TRAIN_SIZE]
y_train = y_train_full[:TRAIN_SIZE]
x_val = x_train_full[TRAIN_SIZE:]
y_val = y_train_full[TRAIN_SIZE:]

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 0us/step


In [None]:
# 2) Augmentation + preprocessing
augmentation = keras.Sequential(
    [
        layers.Resizing(IMG_SIZE, IMG_SIZE),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.06),
        layers.RandomZoom(0.08),
        layers.RandomTranslation(0.06, 0.06),
        layers.RandomContrast(0.08),
    ],
    name="augmentation",
)

In [None]:
# use MobileNetV2 preprocess function
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

def prepare_for_train(image, label):
    image = tf.cast(image, tf.float32)
    image = augmentation(image, training=True)      # augmentation includes resizing
    image = preprocess_input(image)                 # scales to [-1,1] as MobileNet expects
    return image, label

def prepare_for_eval(image, label):
    image = tf.cast(image, tf.float32)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = preprocess_input(image)
    return image, label

def make_ds(images, labels, training=False):
    ds = tf.data.Dataset.from_tensor_slices((images, labels))
    if training:
        ds = ds.shuffle(10000, seed=SEED)
        ds = ds.map(prepare_for_train, num_parallel_calls=AUTOTUNE)
    else:
        ds = ds.map(prepare_for_eval, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds

train_ds = make_ds(x_train, y_train, training=True)
val_ds   = make_ds(x_val, y_val, training=False)
test_ds  = make_ds(x_test, y_test, training=False)

In [None]:
# 3) Build model: MobileNetV2 base + small head (keep similar capacity to earlier head)
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet",
    pooling=None,
)
base_model.trainable = False  # freeze initially

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)
model = keras.Model(inputs, outputs, name="mobilenetv2_cifar10")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_96_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [None]:
# 4) Compile (use label smoothing)
def sparse_label_smoothing_crossentropy(y_true, y_pred, label_smoothing_factor=0.1, num_classes=NUM_CLASSES):
    y_true_one_hot = tf.one_hot(tf.cast(y_true, tf.int32), num_classes)
    smooth_y_true = y_true_one_hot * (1.0 - label_smoothing_factor) + (label_smoothing_factor / num_classes)
    return keras.losses.categorical_crossentropy(smooth_y_true, y_pred, from_logits=False)

loss = lambda y_true, y_pred: sparse_label_smoothing_crossentropy(y_true, y_pred, label_smoothing_factor=0.1, num_classes=NUM_CLASSES)
initial_lr = 1e-3
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=initial_lr),
    loss=loss,
    metrics=["accuracy"],
)
model.summary()

In [None]:
# 5) Callbacks
checkpoint_path = "best_mobilenet_cifar10.h5"
callbacks = [
    keras.callbacks.ModelCheckpoint(checkpoint_path, monitor="val_loss", save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=8, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6, verbose=1),
]


In [None]:
# 6) Phase 1: Train head with frozen base
history1 = model.fit(
    train_ds,
    epochs=INITIAL_EPOCHS,
    validation_data=val_ds,
    callbacks=callbacks,
)

Epoch 1/12
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459ms/step - accuracy: 0.6429 - loss: 1.4208
Epoch 1: val_loss improved from inf to 0.92290, saving model to best_mobilenet_cifar10.h5




[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 500ms/step - accuracy: 0.6431 - loss: 1.4202 - val_accuracy: 0.8326 - val_loss: 0.9229 - learning_rate: 0.0010
Epoch 2/12
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 426ms/step - accuracy: 0.7740 - loss: 1.0573
Epoch 2: val_loss improved from 0.92290 to 0.90208, saving model to best_mobilenet_cifar10.h5




[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 430ms/step - accuracy: 0.7740 - loss: 1.0573 - val_accuracy: 0.8360 - val_loss: 0.9021 - learning_rate: 0.0010
Epoch 3/12
[1m204/352[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m1:03[0m 430ms/step - accuracy: 0.7887 - loss: 1.0241

In [None]:
# 7) Phase 2: Unfreeze some of the base model for fine-tuning
# Unfreeze the top layers of the base model (e.g., last 50 layers)
base_model.trainable = True
# Freeze earlier layers, unfreeze last N
fine_tune_at = len(base_model.layers) - 50
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False
for layer in base_model.layers[fine_tune_at:]:
    layer.trainable = True

# Re-compile with a lower LR for fine-tuning
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    loss=loss,
    metrics=["accuracy"],
)

history2 = model.fit(
    train_ds,
    epochs=INITIAL_EPOCHS + FINE_TUNE_EPOCHS,
    initial_epoch=history1.epoch[-1] + 1 if len(history1.epoch) else 0,
    validation_data=val_ds,
    callbacks=callbacks,
)


In [None]:
# 8) Evaluate on test set
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_acc:.4f}")

In [None]:
# 9) Load best model (if needed)
best_model = tf.keras.models.load_model(checkpoint_path)
print("Loaded best model from checkpoint.")