In [1]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Input, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.initializers import glorot_uniform, RandomNormal
import random, os, pickle, copy
import matplotlib.pyplot as plt
import numpy as np
from scipy.linalg import lstsq
%matplotlib inline
savedir = os.path.join('./save/transfer_learning_cifar5_multiple_instantiations')

Using TensorFlow backend.


# SET UP DATA

In [2]:
def map_to_range(arr):
    narr = np.zeros_like(arr, dtype=np.int64)
    vals = np.unique(arr)
    for i, v in enumerate(vals):
        mask = (arr == v)
        narr[mask] = i
    return narr

In [3]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
input_shape = (32, 32, 3)
num_classes = 5

y_test = y_test.astype(np.int64).squeeze()
y_train = y_train.astype(np.int64).squeeze()

# labels found to give roughly equal classification performance
set_1_test_mask = np.isin(y_test, np.array([0, 3, 4, 6, 7])).squeeze()
set_1_train_mask = np.isin(y_train, np.array([0, 3, 4, 6, 7])).squeeze()

set_2_test_mask = np.isin(y_test, np.array([1, 2, 5, 8, 9])).squeeze()
set_2_train_mask = np.isin(y_train, np.array([1, 2, 5, 8, 9])).squeeze()

set_1_train_labels = y_train[set_1_train_mask]
x_train_1 = x_train[set_1_train_mask]
set_1_test_labels = y_test[set_1_test_mask]
x_test_1 = x_test[set_1_test_mask]
set_2_train_labels = y_train[set_2_train_mask]
x_train_2 = x_train[set_2_train_mask]
set_2_test_labels = y_test[set_2_test_mask]
x_test_2 = x_test[set_2_test_mask]

y_train_1 = keras.utils.to_categorical(map_to_range(set_1_train_labels), num_classes)
y_train_2 = keras.utils.to_categorical(map_to_range(set_2_train_labels), num_classes)
y_test_1 = keras.utils.to_categorical(map_to_range(set_1_test_labels), num_classes)
y_test_2 = keras.utils.to_categorical(map_to_range(set_2_test_labels), num_classes)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [4]:
datagen1 = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
    )
datagen1.fit(x_train_1)

datagen2= ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
    )
datagen2.fit(x_train_2)

In [14]:
def set_up_model(lr=1e-3, trainable=[True, True, True, True]):
    keras.backend.clear_session()
    inputs = Input(shape=input_shape)
    x = Conv2D(4, kernel_size=(7,7), strides=(1, 1), activation='relu', padding='same', trainable=trainable[0])(inputs)
    x = MaxPooling2D(pool_size=(2,2), strides=(2,2))(x)
    x = Conv2D(8, kernel_size=(3,3), strides=(1, 1), activation='relu', padding='same', trainable=trainable[1])(x)
    x = MaxPooling2D(pool_size=(2,2), strides=(2,2))(x)
    x = Conv2D(16, kernel_size=(3,3), strides=(1, 1), activation='relu', padding='same', trainable=trainable[2])(x)
    x = MaxPooling2D(pool_size=(2,2), strides=(2,2))(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu', trainable=trainable[3])(x)
    x = Dropout(0.5)(x)
    predictions = Dense(5, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(lr=lr),
                  metrics=['accuracy'])
    return model

# Multiple instantiations of transfer learning

Using first-layer filters trained on dset2 were able to match performance on dset1 compared to first-layer filters trained on dset1, but not the other way around. Doing this procedure 10 times to see if the results are stable.

In [12]:
epochs = 1
batch_size = 128

for i in range(10):
    ##### DSET1 #####
    # Train model from scratch
    model = set_up_model(trainable=[True, True, True, True])
    ckpt_training = ModelCheckpoint(os.path.join(savedir, 'weights_training_dset1_%d.h5'%i), 
                                    monitor='val_loss', 
                                    verbose=0, 
                                    save_best_only=True, 
                                    save_weights_only=False,
                                    mode='auto', 
                                    period=1
                                   )
    
    history_training_dset1 = model.fit_generator(datagen1.flow(x_train_1, y_train_1, batch_size=batch_size),
                                  epochs=epochs,
                                  verbose=2,
                                  validation_data=(x_test_1, y_test_1),
                                  callbacks=[ckpt_training]
                                  )
    with open(os.path.join(savedir, 'history_training_dset1_%d.pkl'%i), 'wb') as f:
        pickle.dump(history_training_dset1.history, f)
    
    # Freeze first layer and load best performing network state
    model = set_up_model(trainable=[False, True, True, True])
    model.load_weights(os.path.join(savedir, 'weights_training_dset1_%d.h5'%i)
    orig_weights = model.get_weights()
    nweights = copy.deepcopy(orig_weights[:2])
    for w in orig_weights[2:]:
        nweights.append(glorot_uniform()(w.shape).eval(session=keras.backend.get_session()))
    model.set_weights(nweights)
    ckpt_transfer = ModelCheckpoint(os.path.join(savedir, 'weights_transfer_dset1_dset2.h5'), 
                                    monitor='val_loss', 
                                    verbose=0, 
                                    save_best_only=True, 
                                    save_weights_only=False, 
                                    mode='auto', 
                                    period=1)
                       
    history_transfer_dset1 = model.fit_generator(datagen2.flow(x_train_2, y_train_2, batch_size=batch_size),
                                                 epochs=epochs,
                                                 verbose=2,
                                                 validation_data=(x_test_2, y_test_2),
                                                 callbacks=[ckpt_dset1_transfer]
                                                 )

    with open(os.path.join(savedir, 'history_transfer_dset1_dset2_%d.pkl'%i), 'wb') as f:
        pickle.dump(history_transfer_dset1.history, f)
                       
                       
                       
                       
    #### DSET2 #####
    # Train model from scratch
    model = set_up_model(trainable=[True, True, True, True])
    ckpt_training = ModelCheckpoint(os.path.join(savedir, 'weights_training_dset2_%d.h5'%i), 
                                    monitor='val_loss', 
                                    verbose=0, 
                                    save_best_only=True, 
                                    save_weights_only=False,
                                    mode='auto', 
                                    period=1
                                   )
    
    history_training_dset2 = model.fit_generator(datagen2.flow(x_train_2, y_train_2, batch_size=batch_size),
                                                 epochs=epochs,
                                                 verbose=2,
                                                 validation_data=(x_test_2, y_test_2),
                                                 callbacks=[ckpt_training]
                                                 )
                       
    with open(os.path.join(savedir, 'history_training_dset2_%d.pkl'%i), 'wb') as f:
        pickle.dump(history_training_dset2.history, f)
    
    # Freeze first layer and load best performing network state
    model = set_up_model(trainable=[False, True, True, True])
    model.load_weights(os.path.join(savedir, 'weights_training_dset2_%d.h5'%i)
    orig_weights = model.get_weights()
    nweights = copy.deepcopy(orig_weights[:2])
    for w in orig_weights[2:]:
        nweights.append(glorot_uniform()(w.shape).eval(session=keras.backend.get_session()))
    model.set_weights(nweights)
    ckpt_transfer = ModelCheckpoint(os.path.join(savedir, 'weights_transfer_dset2_dset1.h5'), 
                                    monitor='val_loss', 
                                    verbose=0, 
                                    save_best_only=True, 
                                    save_weights_only=False, 
                                    mode='auto', 
                                    period=1
                                   )
                       
    history_transfer_dset2 = model.fit_generator(datagen1.flow(x_train_1, y_train_1, batch_size=batch_size),
                                                 epochs=epochs,
                                                 verbose=2,
                                                 validation_data=(x_test_1, y_test_1),
                                                 callbacks=[ckpt_dset1_transfer]
                                                 )

    with open(os.path.join(savedir, 'history_transfer_dset2_dset1_%d.pkl'%i), 'wb') as f:
        pickle.dump(history_transfer_dset2.history, f)

OSError: Unable to open file (unable to open file: name = './save/transfer_learning_cifar5_multiple_instantiations/weights_dset1_original.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [17]:
lr_histories = []
epochs = 150
batch_size = 128
lrs = [1e-2, 5e-3, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6, 1e-6]
for i, lr in enumerate(lrs):
    model = set_up_model(lr=lr)
    history = model.fit_generator(datagen1.flow(x_train_1, y_train_1, batch_size=batch_size),
                                  epochs=epochs, verbose=2, validation_data = (x_test_1, y_test_1))
    lr_histories.append(history.history)
with open(os.path.join(savedir, 'lr_histories.pkl'), 'wb') as f:
    pickle.dump(lr_histories, f)

Epoch 1/1
 - 23s - loss: 12.8444 - acc: 0.1996 - val_loss: 12.8945 - val_acc: 0.2000
Epoch 1/1
 - 24s - loss: 1.7272 - acc: 0.2059 - val_loss: 1.6151 - val_acc: 0.2000
Epoch 1/1
 - 24s - loss: 1.3386 - acc: 0.4301 - val_loss: 1.2022 - val_acc: 0.4946
Epoch 1/1
 - 24s - loss: 1.6129 - acc: 0.2007 - val_loss: 1.6097 - val_acc: 0.2000
Epoch 1/1
 - 24s - loss: 1.3752 - acc: 0.4095 - val_loss: 1.2127 - val_acc: 0.5092
Epoch 1/1
 - 24s - loss: 1.4530 - acc: 0.3612 - val_loss: 1.2466 - val_acc: 0.4946
Epoch 1/1
 - 24s - loss: 1.5843 - acc: 0.2608 - val_loss: 1.4976 - val_acc: 0.3704
Epoch 1/1
 - 24s - loss: 1.6026 - acc: 0.2332 - val_loss: 1.5641 - val_acc: 0.3066
Epoch 1/1
 - 24s - loss: 1.6096 - acc: 0.2086 - val_loss: 1.5879 - val_acc: 0.2004
Epoch 1/1
 - 24s - loss: 1.6134 - acc: 0.2177 - val_loss: 1.6075 - val_acc: 0.2298
Epoch 1/1
 - 25s - loss: 1.6120 - acc: 0.2170 - val_loss: 1.6061 - val_acc: 0.2320
