# Lab 8 - Transfer learning

## Shallow mode

### Imports and vars

In [1]:
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam, RMSprop
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import VGG16, preprocess_input
from keras import models, layers

In [2]:
# GPU init
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

2 Physical GPUs, 1 Logical GPU


In [3]:
# --- Constants ---
IMG_ROWS = 32
IMG_COLS = 32
IMG_CHANNELS = 3
BATCH_SIZE = 128
NB_EPOCH = 20
NB_CLASSES = 10
VALIDATION_SPLIT = 0.2
OPTIM = RMSprop()

In [4]:
# --- Load CIFAR-10 ---
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print('X_train shape:', X_train.shape) # (50000, 32, 32, 3)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
Y_train = to_categorical(y_train, NB_CLASSES)
Y_test = to_categorical(y_test, NB_CLASSES)
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

X_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


### Model building

In [5]:
# --- Build the model with pre-trained VGG16 convolutional base ---
conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS))

# Freeze the convolutional base
conv_base.trainable = False
# ? Seems that this'll keep the model's pretraining (the 'convolutional base').

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(NB_CLASSES, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=OPTIM,
              metrics=['accuracy'])

### Training and evaluation

In [6]:
# --- Train ---
model.fit(X_train, Y_train,
          batch_size=BATCH_SIZE,
          epochs=NB_EPOCH,
          validation_split=VALIDATION_SPLIT,
          verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1a78036add0>

In [7]:
score = model.evaluate(X_test, Y_test,
                       batch_size=BATCH_SIZE,
                       verbose=1)

print("Test score:", score[0])
print("Test accuracy:", score[1])

Test score: 1.1852288246154785
Test accuracy: 0.6071000099182129


## Fine-tuning mode

Fine-tuning mode unfreezes some of the top layers of the convolutional base which it uses for feature extraction, allowing them to learn features in the new data.

### Imports and vars

In [1]:
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam, RMSprop
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras import models, layers

In [2]:
# --- Constants ---
IMG_ROWS = 32
IMG_COLS = 32
IMG_CHANNELS = 3
BATCH_SIZE = 128
NB_EPOCH = 20
NB_CLASSES = 10
VALIDATION_SPLIT = 0.2
OPTIM = RMSprop()

In [3]:
# --- Load CIFAR-10 ---
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
Y_train = to_categorical(y_train, NB_CLASSES)
Y_test = to_categorical(y_test, NB_CLASSES)
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

X_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


### Freezing and unfreezing

In [None]:
# --- Build the model: VGG16 as convolutional base ---
conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS))

# Initially freeze the entire base
conv_base.trainable = True
set_trainable = False

You can **choose** which layers to unfreeze.

In [None]:
# Example rule: unfreeze from 'block5_conv1' onward
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
        # When we get to here, set trainable to true.
        # This makes all following layers trainable.

    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [6]:
# Check trainable layers
for layer in conv_base.layers:
    print(layer.name, layer.trainable)

input_1 False
block1_conv1 False
block1_conv2 False
block1_pool False
block2_conv1 False
block2_conv2 False
block2_pool False
block3_conv1 False
block3_conv2 False
block3_conv3 False
block3_pool False
block4_conv1 False
block4_conv2 False
block4_conv3 False
block4_pool False
block5_conv1 True
block5_conv2 True
block5_conv3 True
block5_pool True


### Model building

In [7]:
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(NB_CLASSES, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=OPTIM,
              metrics=['accuracy'])

### Training and evaluation

In [8]:
model.fit(X_train, Y_train,
          batch_size=BATCH_SIZE,
          epochs=NB_EPOCH,
          validation_split=VALIDATION_SPLIT,
          verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1ea1b7867a0>

In [None]:
score = model.evaluate(X_test, Y_test,
                       batch_size=BATCH_SIZE,
                       verbose=1)

print("Test score:", score[0])
print("Test accuracy:", score[1])

Test score: 2.5911684036254883
Test accuracy: 0.7263999581336975


# Shallow yielded much better results than fine-tuned in this scenario **when using a COLAB GPU**. On the 9070XT, fine-tuned was better.

The 9070XT is verifiably 3-4x faster than Colab's Tesla T4.

Consider it like shallow mode splits the model into two stages, with the original pretrained convolutional base staying independent and then passed into a classifier, whereas finetuned extends the existing convolutional base.