In [None]:
# Core TensorFlow and Keras imports
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.utils import to_categorical

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
])



In [None]:
# Load CIFAR-10 dataset
# CIFAR-10 contains 60,000 32x32 RGB images across 10 classes
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Convert class labels to one-hot encoded vectors
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Preprocessing function:
# - Resize images to 224x224 to match EfficientNet input requirements
# - Normalize pixel values to [0, 1]
def preprocess(image, label):
    image = tf.image.resize(image, (224,224))
    image = image / 255.0
    return image, label

# Create TensorFlow Dataset for efficient input pipeline

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.map(preprocess).shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_ds = test_ds.map(preprocess).batch(32).prefetch(tf.data.AUTOTUNE)


In [None]:
# Baseline CNN model trained from scratch (no pre-trained knowledge)
# Acts as a performance reference point
baseline_model = models.Sequential([
    layers.Conv2D(32, 3, activation='relu', input_shape=(224,224,3)),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

baseline_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

baseline_history = baseline_model.fit(
    train_ds,  # Use the preprocessed dataset
    epochs=10,
    validation_data=test_ds # Use the preprocessed test dataset for validation
)

In [None]:
# Load EfficientNetB0 pre-trained on ImageNet
# include_top=False removes the original ImageNet classifier



base_model = EfficientNetB0(
    weights='imagenet',
    include_top=False,
    input_shape=(224,224,3)
)

# Freeze all layers of the pre-trained model
# This prevents updating ImageNet-learned weights
base_model.trainable = False

tl_model = models.Sequential([
    data_augmentation,      # Agumentation
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.4),    # regularization
    layers.Dense(10, activation='softmax')
])

tl_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
# Train only the newly added classifier layers
tl_frozen_history = tl_model.fit(
    train_ds,
    epochs=5,
    validation_data=test_ds
)


In [None]:
base_model.trainable = True

# Re-create the model after changing trainable status
# This ensures the model graph is properly constructed with the unfrozen base_model
tl_model = models.Sequential([
    data_augmentation,
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(10, activation='softmax')
])

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

tl_model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-6),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Recompile with a very small learning rate
# This avoids destroying pre-trained features (catastrophic forgetting)
tl_finetune_history = tl_model.fit(
    train_ds,
    epochs=15,
    validation_data=test_ds,
    callbacks=[early_stop]
)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10,6))

# Baseline
plt.plot(baseline_history.history['accuracy'], label='Baseline Train')
plt.plot(baseline_history.history['val_accuracy'], label='Baseline Val')

# Transfer Learning - Frozen
#plt.plot(tl_frozen_history.history['accuracy'], label='TL Train (Frozen)')
#plt.plot(tl_frozen_history.history['val_accuracy'], label='TL Val (Frozen)')

# Transfer Learning - Fintetuned
plt.plot(tl_finetune_history.history['accuracy'], label='TL Train (Frozen)')
plt.plot(tl_finetune_history.history['val_accuracy'], label='TL Val (Frozen)')

# Fine-tuned accuracy (continued epochs)
#fine_tune_epochs = range(
    #len(tl_frozen_history.history['accuracy']),
    #len(tl_frozen_history.history['accuracy']) + len(tl_finetune_history.history['accuracy'])
#)

#plt.plot(fine_tune_epochs, tl_finetune_history.history['accuracy'],
#         label='TL Train (Fine-tuned)')

plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Baseline vs Transfer Learning Performance")
plt.legend()
plt.grid(True)
plt.show()



In [None]:
baseline_test_acc = baseline_model.evaluate(test_ds, verbose=0)[1]
tl_test_acc = tl_model.evaluate(test_ds, verbose=0)[1]

plt.figure(figsize=(6,4))
plt.bar(['Baseline CNN', 'Transfer Learning'],
        [baseline_test_acc, tl_test_acc])
plt.title("Test Accuracy Comparison")
plt.ylabel("Accuracy")
plt.show()


In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

def plot_confusion_matrix(model, dataset, title):
    y_true = []
    y_pred = []

    for images, labels in dataset:
        preds = model.predict(images, verbose=0)
        y_pred.extend(np.argmax(preds, axis=1))
        y_true.extend(np.argmax(labels.numpy(), axis=1))

    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.title(title)
    plt.show()

plot_confusion_matrix(baseline_model, test_ds, "Baseline CNN Confusion Matrix")
plot_confusion_matrix(tl_model, test_ds, "Transfer Learning Confusion Matrix")


In [None]:
### Key Observation
# 1. Transfer learning can lead to overfitting on small datasets.
# 2. Data augmentation (flips, rotations, zoom) helps generalize the model.
# 3. Adding dropout layers in the fully connected part reduces overfitting.
# 4. Pre-trained models require careful fine-tuning to avoid memorizing training data.
# 5. Early stopping can further help prevent overfitting.