In [1]:
import os
import numpy as np
import tensorflow as tf
from keras.models import Model, load_model
from keras.layers import Input, Dense, BatchNormalization, Activation, AveragePooling2D, Flatten
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, LearningRateScheduler
from keras.datasets import cifar100
from keras import backend as K
import pickle
import sklearn
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
%matplotlib inline
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
model_type = 'ResNet56v2'
seed = 4

embedding_type = 'glove'
dim = 100
save_dir = 'Data/Embeddings/Full/%s/' % (embedding_type)
data_dir = os.path.join(os.getcwd(), save_dir)

In [3]:
#Loading data and embeddings
label_embeddings = pickle.load(open(os.path.join(data_dir, '%s_%s_labels_to_embeddings.pk' % (embedding_type, dim)), "rb"))
embedding_len = len(label_embeddings[0])

label_embeddings_arr = np.zeros((100, embedding_len))
for i in range(100):
    label_embeddings_arr[i] = label_embeddings[i]
    
save_dir_feat = os.path.join(os.getcwd(), 'saved_models/zsl/%s/type1/seed%s/extracted_feat/' % (model_type, seed))

In [4]:
X_train_feat = np.load(os.path.join(save_dir_feat, 'X_train_feat_cifar100_%s.npy' % (model_type)))
X_test_seen_feat = np.load(os.path.join(save_dir_feat, 'X_test_seen_feat_cifar100_%s.npy' % (model_type)))
X_test_unseen_feat = np.load(os.path.join(save_dir_feat, 'X_test_unseen_feat_cifar100_%s.npy' % (model_type)))
X_test_all_feat = np.load(os.path.join(save_dir_feat, 'X_test_all_feat_cifar100_%s.npy' % (model_type)))

input_shape = X_train_feat[0].shape

In [5]:
y_train = np.load(os.path.join(save_dir_feat, 'y_train_cifar100_%s.npy' % (model_type)))
y_test_seen = np.load(os.path.join(save_dir_feat, 'y_test_seen_cifar100_%s.npy' % (model_type)))
y_test_unseen = np.load(os.path.join(save_dir_feat, 'y_test_unseen_cifar100_%s.npy' % (model_type)))
y_test_all = np.load(os.path.join(save_dir_feat, 'y_test_all_cifar100_%s.npy' % (model_type)))

In [6]:
y_train_embeddings = np.zeros((len(y_train), embedding_len))
for i in range(len(y_train)):
    y_train_embeddings[i] = label_embeddings[int(y_train[i])]
    
y_test_seen_embeddings = np.zeros((len(y_test_seen), embedding_len))
for i in range(len(y_test_seen)):
    y_test_seen_embeddings[i] = label_embeddings[int(y_test_seen[i])]
    
y_test_unseen_embeddings = np.zeros((len(y_test_unseen), embedding_len))
for i in range(len(y_test_unseen)):
    y_test_unseen_embeddings[i] = label_embeddings[int(y_test_unseen[i])]
    
y_test_all_embeddings = np.zeros((len(y_test_all), embedding_len))
for i in range(len(y_test_all)):
    y_test_all_embeddings[i] = label_embeddings[int(y_test_all[i])]

In [7]:
def build_model(input_shape=input_shape, embedding_len=embedding_len):
    inputs = Input(shape=input_shape)

    x = BatchNormalization()(inputs)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=2)(x)
    
    x = Flatten()(x)
    
    x = Dense(1024) (x)
    x = Activation('relu')(x)
    x = BatchNormalization()(x)
    
    x = Dense(512) (x)
    x = Activation('relu')(x)
    x = BatchNormalization()(x)
    
    x = Dense(embedding_len) (x)
    x = Activation('relu')(x)
    x = BatchNormalization()(x)
    
    outputs = Dense(embedding_len,
                    kernel_initializer='he_normal')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    
    return model

In [8]:
batch_size = 128
epochs = 20

In [9]:
model = build_model()
model.compile(loss='cosine_proximity', optimizer='adam', metrics=['mse'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 8, 8, 256)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 8, 8, 256)         1024      
_________________________________________________________________
activation_1 (Activation)    (None, 8, 8, 256)         0         
_________________________________________________________________
average_pooling2d_1 (Average (None, 4, 4, 256)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4096)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              4195328   
_________________________________________________________________
activation_2 (Activation)    (None, 1024)              0         
__________

In [10]:
save_dir = os.path.join(os.getcwd(), 'saved_models/zsl/%s/type1/seed%s/%s_transfer/' % (model_type, seed, embedding_type))
model_name = 'cifar100_%s_model.{epoch:03d}.h5' % (model_type)
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

In [11]:
def lr_schedule(epoch):
    """Learning Rate Schedule
    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.
    # Arguments
        epoch (int): The number of epochs
    # Returns
        lr (float32): learning rate
    """
    lr = 1e-3
    if epoch > 120:
        lr *= 0.5e-3
    elif epoch > 80:
        lr *= 1e-3
    elif epoch > 40:
        lr *= 1e-2
    elif epoch > 20:
        lr *= 1e-1
    print('Learning rate: ', lr)

    return lr

In [12]:
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True)

lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

callbacks = [checkpoint, lr_reducer, lr_scheduler]

In [13]:
model.fit(X_train_feat, y_train_embeddings,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(X_test_seen_feat, y_test_seen_embeddings),
          shuffle=True,
          callbacks=callbacks,
          verbose=1
         )

Train on 40000 samples, validate on 8000 samples
Epoch 1/20
Learning rate:  0.001

Epoch 00001: val_loss improved from inf to -0.67504, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/type1/seed4/glove_transfer/cifar100_ResNet56v2_model.001.h5
Epoch 2/20
Learning rate:  0.001

Epoch 00002: val_loss improved from -0.67504 to -0.68780, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/type1/seed4/glove_transfer/cifar100_ResNet56v2_model.002.h5
Epoch 3/20
Learning rate:  0.001

Epoch 00003: val_loss improved from -0.68780 to -0.70657, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/type1/seed4/glove_transfer/cifar100_ResNet56v2_model.003.h5
Epoch 4/20
Learning rate:  0.001

Epoch 00004: val_loss improved from -0.70657 to -0.71546, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/type1/seed4/glove_transfer/cifar100_ResNet56v2_model.004.h5
Epoch 5/20
L

KeyboardInterrupt: 

In [14]:
best_model_filepath = os.path.join(save_dir, 'cifar100_%s_model.%03d.h5' % (model_type, 9))
best_model = load_model(best_model_filepath)

In [15]:
#Top K Predictions
def pred_top_k(y_test_pred, label_embeddings_arr=label_embeddings_arr, k=5):
    sim_table = cosine_similarity(y_test_pred, label_embeddings_arr)
    top_k_guesses = np.argpartition(sim_table,range(99-k+1,100),axis=1)[:,99-k+1:]
    return sim_table, top_k_guesses

#Top k Accuracy
def calc_top_k_acc(top_k, y_test):
    correct = 0
    for i in range(y_test.shape[0]):
        if np.squeeze(y_test)[i] in top_k[i]:
            correct += 1
    return correct/float(y_test.shape[0])

def evaluate(model, x_test, y_test, label_embeddings_arr, k=5):
    
    y_test_pred = model.predict(x_test)
    sim_table, top_k_guesses = pred_top_k(y_test_pred, label_embeddings_arr, k=k)

    #Top prediction
    label_predictions = sim_table.argmax(axis=1)
    
    #Accuracy
    acc = np.sum((np.squeeze(y_test) == label_predictions)) / float(y_test.shape[0])
    top_k_acc = calc_top_k_acc(top_k_guesses, y_test)
    print("Accuracy: " + str(acc))
    print("Top " + str(k) + " Accuracy: " + str(top_k_acc))

In [16]:
evaluate(best_model, X_test_seen_feat, y_test_seen, label_embeddings_arr)

Accuracy: 0.498
Top 5 Accuracy: 0.661125


In [17]:
# Regular ZSL setting where we only need to consider choose between the 20 unseen classes, rather than all 100

# Just replace the word vectors for "seen" labels with something really far so it won't be close to any predicted vector
unseen_labels = np.unique(y_test_unseen)
label_embeddings_arr_unseen = np.copy(label_embeddings_arr)
for i in range(100):
    if i in unseen_labels:
        continue
    label_embeddings_arr_unseen[i] = np.ones(label_embeddings_arr[0].shape) * 1000

evaluate(best_model, X_test_unseen_feat, y_test_unseen, label_embeddings_arr_unseen) # Significant improvement

Accuracy: 0.24266666666666667
Top 5 Accuracy: 0.5626666666666666


In [18]:
# Although accuracy is still 0%, we seen an improvement in top 5 % accuracy (0% -> 15%). Sign that w2v is useful for ZSL

evaluate(best_model, X_test_unseen_feat, y_test_unseen, label_embeddings_arr)

Accuracy: 0.0004166666666666667
Top 5 Accuracy: 0.186


In [19]:
"""
x = np.linspace(0,99,100) 
y = np.zeros(100)
y_test_pred = model.predict(x_test)

for i in range(100):
    k = i+1
    sim_table, top_k_guesses = pred_top_k(y_test_pred, k=k)
    y[i] = calc_top_k_acc(top_k_guesses, y_test)
    
plt.plot(x,y)
plt.title("Top K Accuracy")
"""

'\nx = np.linspace(0,99,100) \ny = np.zeros(100)\ny_test_pred = model.predict(x_test)\n\nfor i in range(100):\n    k = i+1\n    sim_table, top_k_guesses = pred_top_k(y_test_pred, k=k)\n    y[i] = calc_top_k_acc(top_k_guesses, y_test)\n    \nplt.plot(x,y)\nplt.title("Top K Accuracy")\n'