# Lab 8 - Transfer learning

## Shallow mode

### Imports and vars

In [2]:
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam, RMSprop
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import VGG16, preprocess_input
from keras import models, layers

In [3]:
# --- Constants ---
IMG_ROWS = 32
IMG_COLS = 32
IMG_CHANNELS = 3
BATCH_SIZE = 128
NB_EPOCH = 20
NB_CLASSES = 10
VALIDATION_SPLIT = 0.2
OPTIM = RMSprop()

In [4]:
# --- Load CIFAR-10 ---
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print('X_train shape:', X_train.shape) # (50000, 32, 32, 3)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
Y_train = to_categorical(y_train, NB_CLASSES)
Y_test = to_categorical(y_test, NB_CLASSES)
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
X_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


### Model building

In [5]:
# --- Build the model with pre-trained VGG16 convolutional base ---
conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS))

# Freeze the convolutional base
conv_base.trainable = False
# ? Seems that this'll keep the model's pretraining (the 'convolutional base').

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(NB_CLASSES, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=OPTIM,
              metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


### Training and evaluation

In [6]:
# --- Train ---
model.fit(X_train, Y_train,
          batch_size=BATCH_SIZE,
          epochs=NB_EPOCH,
          validation_split=VALIDATION_SPLIT,
          verbose=1)

Epoch 1/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 45ms/step - accuracy: 0.4296 - loss: 1.6333 - val_accuracy: 0.5531 - val_loss: 1.2954
Epoch 2/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 24ms/step - accuracy: 0.5591 - loss: 1.2637 - val_accuracy: 0.5603 - val_loss: 1.2468
Epoch 3/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - accuracy: 0.5908 - loss: 1.1773 - val_accuracy: 0.5635 - val_loss: 1.2250
Epoch 4/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 28ms/step - accuracy: 0.6023 - loss: 1.1339 - val_accuracy: 0.5884 - val_loss: 1.1894
Epoch 5/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 25ms/step - accuracy: 0.6149 - loss: 1.1000 - val_accuracy: 0.6003 - val_loss: 1.1554
Epoch 6/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 28ms/step - accuracy: 0.6256 - loss: 1.0690 - val_accuracy: 0.5988 - val_loss: 1.1584
Epoch 7/20
[1m313/3

<keras.src.callbacks.history.History at 0x7aa9b973ccd0>

In [7]:
score = model.evaluate(X_test, Y_test,
                       batch_size=BATCH_SIZE,
                       verbose=1)

print("Test score:", score[0])
print("Test accuracy:", score[1])

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.6185 - loss: 1.1327
Test score: 1.1447113752365112
Test accuracy: 0.6136000156402588


## Fine-tuning mode

Fine-tuning mode unfreezes some of the top layers of the convolutional base which it uses for feature extraction, allowing them to learn features in the new data.

### Imports and vars

In [3]:
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam, RMSprop
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras import models, layers

In [4]:
# --- Constants ---
IMG_ROWS = 32
IMG_COLS = 32
IMG_CHANNELS = 3
BATCH_SIZE = 128
NB_EPOCH = 20
NB_CLASSES = 10
VALIDATION_SPLIT = 0.2
OPTIM = RMSprop()

In [5]:
# --- Load CIFAR-10 ---
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
Y_train = to_categorical(y_train, NB_CLASSES)
Y_test = to_categorical(y_test, NB_CLASSES)
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step
X_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


### Freezing and unfreezing

In [6]:
# --- Build the model: VGG16 as convolutional base ---
conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS))

# Initially freeze the entire base
conv_base.trainable = True
set_trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


You can **choose** which layers to unfreeze.

In [7]:
# Example rule: unfreeze from 'block5_conv1' onward
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
        # When we get to here, set trainable to true.
        # This makes all following layers trainable.

    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [8]:
# Check trainable layers
for layer in conv_base.layers:
    print(layer.name, layer.trainable)

input_layer False
block1_conv1 False
block1_conv2 False
block1_pool False
block2_conv1 False
block2_conv2 False
block2_pool False
block3_conv1 False
block3_conv2 False
block3_conv3 False
block3_pool False
block4_conv1 False
block4_conv2 False
block4_conv3 False
block4_pool False
block5_conv1 True
block5_conv2 True
block5_conv3 True
block5_pool True


### Model building

In [9]:
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(NB_CLASSES, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=OPTIM,
              metrics=['accuracy'])

### Training and evaluation

In [10]:
model.fit(X_train, Y_train,
          batch_size=BATCH_SIZE,
          epochs=NB_EPOCH,
          validation_split=VALIDATION_SPLIT,
          verbose=1)

Epoch 1/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 51ms/step - accuracy: 0.0996 - loss: 2.4435 - val_accuracy: 0.0977 - val_loss: 2.3027
Epoch 2/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 32ms/step - accuracy: 0.1005 - loss: 2.3026 - val_accuracy: 0.0980 - val_loss: 2.3027
Epoch 3/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 28ms/step - accuracy: 0.0999 - loss: 2.3027 - val_accuracy: 0.0980 - val_loss: 2.3028
Epoch 4/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.0991 - loss: 2.3026 - val_accuracy: 0.0952 - val_loss: 2.3028
Epoch 5/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.1004 - loss: 2.3026 - val_accuracy: 0.0952 - val_loss: 2.3027
Epoch 6/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.0975 - loss: 2.3027 - val_accuracy: 0.0952 - val_loss: 2.3027
Epoch 7/20
[1m31

<keras.src.callbacks.history.History at 0x7937e12c63d0>

In [11]:
score = model.evaluate(X_test, Y_test,
                       batch_size=BATCH_SIZE,
                       verbose=1)

print("Test score:", score[0])
print("Test accuracy:", score[1])

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.1009 - loss: 2.3026
Test score: 2.3026232719421387
Test accuracy: 0.10000000149011612


# Shallow yielded much better results than fine-tuned in this scenario. Perhaps you should try **both**.

Consider it like shallow mode splits the model into two stages, with the original pretrained convolutional base staying independent and then passed into a classifier, whereas finetuned extends the existing convolutional base.