In [1]:
# Load pretrained model, and generate new transformed dataset by passing images through pretrained model 
import os
import numpy as np
import pickle
import keras
import pandas as pd
from resnetV2 import *

from keras.datasets import cifar100
from keras.layers import Dense, Conv2D, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model, load_model
from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
data_dir = os.path.join(os.getcwd(), 'Data/cifar-100-python/')
train = pickle.load(open(os.path.join(data_dir, 'train'), "rb"), encoding='latin1')
test = pickle.load(open(os.path.join(data_dir, 'test'), "rb"), encoding='latin1')
meta = pickle.load(open(os.path.join(data_dir, 'meta'), "rb"), encoding='latin1')

In [3]:
num_fine = 100
num_coarse = 20

In [4]:
coarse_label_names = meta['coarse_label_names']
fine_label_names = meta['fine_label_names']

x_train = train['data'].reshape((-1, 32, 32, 3))
y_train_coarse = np.array(train['coarse_labels'])
y_train_fine = np.array(train['fine_labels'])

x_test = test['data'].reshape((-1, 32, 32, 3))
y_test_coarse = np.array(test['coarse_labels'])
y_test_fine = np.array(test['fine_labels'])

In [5]:
d = {'coarse': y_train_coarse, 'coarse_cat_name': [coarse_label_names[i] for i in y_train_coarse],
     'fine': y_train_fine, 'fine_cat_name': [fine_label_names[i] for i in y_train_fine]}
df_train = pd.DataFrame(data=d)

In [6]:
d = {'coarse': y_test_coarse, 'coarse_cat_name': [coarse_label_names[i] for i in y_test_coarse],
     'fine': y_test_fine, 'fine_cat_name': [fine_label_names[i] for i in y_test_fine]}
df_test = pd.DataFrame(data=d)

In [7]:
# dictionary that maps each coarse label to the set of fine labels

coarse_to_fine = {}
for i in range(num_coarse):
    coarse_to_fine[i] = np.unique(df_train[df_train['coarse'] == i]['fine'])
    
# print(coarse_to_fine)

In [8]:
# dictionary that maps each coarse label to the fine label that will be unseen in the training set

np.random.seed(seed=0)
unseen_labels_ix = np.random.randint(5, size=num_coarse)

coarse_to_unseen = {}
unseen_fine_labels = []
for i in range(num_coarse):
    coarse_to_unseen[i] = coarse_to_fine[i][unseen_labels_ix[i]]
    unseen_fine_labels.append(coarse_to_fine[i][unseen_labels_ix[i]])

# print(coarse_to_unseen)

In [9]:
# list of label names to exclude from training

[fine_label_names[i] for i in unseen_fine_labels]

['whale',
 'aquarium_fish',
 'sunflower',
 'cup',
 'pear',
 'keyboard',
 'table',
 'butterfly',
 'wolf',
 'bridge',
 'cloud',
 'kangaroo',
 'possum',
 'lobster',
 'baby',
 'dinosaur',
 'mouse',
 'maple_tree',
 'bus',
 'tractor']

In [10]:
df_train['Unseen'] = [label in unseen_fine_labels for label in y_train_fine]
df_test['Unseen'] = [label in unseen_fine_labels for label in y_test_fine]

In [11]:
df_train

Unnamed: 0,coarse,coarse_cat_name,fine,fine_cat_name,Unseen
0,11,large_omnivores_and_herbivores,19,cattle,False
1,15,reptiles,29,dinosaur,True
2,4,fruit_and_vegetables,0,apple,False
3,14,people,11,boy,False
4,1,fish,1,aquarium_fish,True
5,5,household_electrical_devices,86,telephone,False
6,18,vehicles_1,90,train,False
7,3,food_containers,28,cup,True
8,10,large_natural_outdoor_scenes,23,cloud,True
9,11,large_omnivores_and_herbivores,31,elephant,False


In [12]:
train_ix_unseen = df_train[df_train['Unseen']].index.values
train_ix_seen = df_train[~df_train['Unseen']].index.values
test_ix_unseen = df_test[df_test['Unseen']].index.values
test_ix_seen = df_test[~df_test['Unseen']].index.values

print('Unseen train:', len(train_ix_unseen))
print('Seen train:', len(train_ix_seen))
print('Unseen test:', len(test_ix_unseen))
print('Seen test:', len(test_ix_seen))

Unseen train: 10000
Seen train: 40000
Unseen test: 2000
Seen test: 8000


## Zero Shot Learning

In [13]:
# Train on 40000 "seen" train examples
X_train = x_train[train_ix_seen]
y_train = y_train_fine[train_ix_seen]
print ('Train:', len(X_train))

# Test on 8000 "seen" test examples
X_test_seen = x_test[test_ix_seen]
y_test_seen = y_test_fine[test_ix_seen]
print ('Test seen:', len(X_test_seen))

# Test on 12000 "unseen" train+test examples
X_test_unseen = np.concatenate([x_train[train_ix_unseen], x_test[test_ix_unseen]])
y_test_unseen = np.concatenate([y_train_fine[train_ix_unseen], y_test_fine[test_ix_unseen]])
print ('Test unseen:', len(X_test_unseen))

# Test on all 20000 examples left over (generalized ZSL)
X_test_all = np.concatenate([X_test_seen, X_test_unseen])
y_test_all = np.concatenate([y_test_seen, y_test_unseen])
print ('Test all:', len(X_test_all))

input_shape = X_train[0].shape

Train: 40000
Test seen: 8000
Test unseen: 12000
Test all: 20000


In [14]:
def get_one_hot(y, num_classes):
    result = np.zeros((len(y), num_classes))
    result[(np.arange(len(y)), y)] = 1
    return result

y_train_oh = get_one_hot(y_train, num_fine)
y_test_seen_oh = get_one_hot(y_test_seen, num_fine)
y_test_unseen_oh = get_one_hot(y_test_unseen, num_fine)
y_test_all_oh = get_one_hot(y_test_all, num_fine)

In [15]:
# Training parameters
batch_size = 125
epochs = 150
num_classes = 100

data_augmentation = True
subtract_pixel_mean = True

In [16]:
# Model parameter (CIFAR-10)
# ----------------------------------------------------------------------------
#           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
# Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
#           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
# ----------------------------------------------------------------------------
# ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
# ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
# ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
# ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
# ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
# ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
# ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
# ---------------------------------------------------------------------------

version = 2
n = 6

In [17]:
depth = n * 9 + 2
model_type = 'ResNet%dv%d' % (depth, version)

In [18]:
save_dir = os.path.join(os.getcwd(), 'saved_models/zsl/%s/baseline/' % model_type)
model_name = 'cifar100_%s_model.{epoch:03d}.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

In [19]:
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)

lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

callbacks = [checkpoint, lr_reducer, lr_scheduler]

In [20]:
model = resnet_v2(input_shape=input_shape, depth=depth)
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=lr_schedule(0)),
              metrics=['accuracy'])
model.summary()

Learning rate:  0.001
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 32, 32, 16)   448         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 32, 32, 16)   64          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 32, 32, 16)   0           batch_normalization_1[0][0]      
_______________________________________________________________________________________

In [21]:
datagen = getImageDataGenerator(augment=data_augmentation)

In [22]:
history = model.fit_generator(datagen.flow(X_train, y_train_oh, batch_size=batch_size),
                              epochs=epochs, steps_per_epoch=x_train.shape[0] // batch_size,
                              validation_data=(X_test_seen, y_test_seen_oh),
                              verbose=1, workers=4, callbacks=callbacks)

Epoch 1/150
Learning rate:  0.001

Epoch 00001: val_acc improved from -inf to 0.15037, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/baseline/cifar100_ResNet56v2_model.001.h5
Epoch 2/150
Learning rate:  0.001

Epoch 00002: val_acc improved from 0.15037 to 0.15212, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/baseline/cifar100_ResNet56v2_model.002.h5
Epoch 3/150
Learning rate:  0.001

Epoch 00003: val_acc improved from 0.15212 to 0.19263, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/baseline/cifar100_ResNet56v2_model.003.h5
Epoch 4/150
Learning rate:  0.001

Epoch 00004: val_acc improved from 0.19263 to 0.27025, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/baseline/cifar100_ResNet56v2_model.004.h5
Epoch 5/150
Learning rate:  0.001

Epoch 00005: val_acc did not improve from 0.27025
Epoch 6/150
Learning rate:  0.001

Epoch 00006: val_ac


Epoch 00032: val_acc did not improve from 0.44400
Epoch 33/150
Learning rate:  0.001

Epoch 00033: val_acc improved from 0.44400 to 0.45200, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/baseline/cifar100_ResNet56v2_model.033.h5
Epoch 34/150
Learning rate:  0.001

Epoch 00034: val_acc did not improve from 0.45200
Epoch 35/150
Learning rate:  0.001

Epoch 00035: val_acc did not improve from 0.45200
Epoch 36/150
Learning rate:  0.001

Epoch 00036: val_acc did not improve from 0.45200
Epoch 37/150
Learning rate:  0.001

Epoch 00037: val_acc did not improve from 0.45200
Epoch 38/150
Learning rate:  0.001

Epoch 00038: val_acc did not improve from 0.45200
Epoch 39/150
Learning rate:  0.001

Epoch 00039: val_acc improved from 0.45200 to 0.46325, saving model to /home/tliu/Dev/CMU/10-715/10715_Project/saved_models/zsl/ResNet56v2/baseline/cifar100_ResNet56v2_model.039.h5
Epoch 40/150
Learning rate:  0.001

Epoch 00040: val_acc did not improve from 0.46325


Epoch 00068: val_acc did not improve from 0.48463
Epoch 69/150
Learning rate:  0.001

Epoch 00069: val_acc did not improve from 0.48463
Epoch 70/150
Learning rate:  0.001

Epoch 00070: val_acc did not improve from 0.48463
Epoch 71/150
Learning rate:  0.001

Epoch 00071: val_acc did not improve from 0.48463
Epoch 72/150
Learning rate:  0.001

Epoch 00072: val_acc did not improve from 0.48463
Epoch 73/150
Learning rate:  0.001

Epoch 00073: val_acc did not improve from 0.48463
Epoch 74/150
Learning rate:  0.001

Epoch 00074: val_acc did not improve from 0.48463
Epoch 75/150
Learning rate:  0.001

Epoch 00075: val_acc did not improve from 0.48463
Epoch 76/150
Learning rate:  0.001

Epoch 00076: val_acc did not improve from 0.48463
Epoch 77/150
Learning rate:  0.001

Epoch 00077: val_acc did not improve from 0.48463
Epoch 78/150
Learning rate:  0.001

Epoch 00078: val_acc did not improve from 0.48463
Epoch 79/150
Learning rate:  0.001

Epoch 00079: val_acc did not improve from 0.48463
Epo


Epoch 00105: val_acc did not improve from 0.54175
Epoch 106/150
Learning rate:  0.0001

Epoch 00106: val_acc did not improve from 0.54175
Epoch 107/150
Learning rate:  0.0001

Epoch 00107: val_acc did not improve from 0.54175
Epoch 108/150
Learning rate:  0.0001

Epoch 00108: val_acc did not improve from 0.54175
Epoch 109/150
Learning rate:  0.0001

Epoch 00109: val_acc did not improve from 0.54175
Epoch 110/150
Learning rate:  0.0001

Epoch 00110: val_acc did not improve from 0.54175
Epoch 111/150
Learning rate:  0.0001

Epoch 00111: val_acc did not improve from 0.54175
Epoch 112/150
Learning rate:  0.0001

Epoch 00112: val_acc did not improve from 0.54175
Epoch 113/150
Learning rate:  0.0001

Epoch 00113: val_acc did not improve from 0.54175
Epoch 114/150
Learning rate:  0.0001

Epoch 00114: val_acc did not improve from 0.54175
Epoch 115/150
Learning rate:  0.0001

Epoch 00115: val_acc did not improve from 0.54175
Epoch 116/150
Learning rate:  0.0001

Epoch 00116: val_acc did not im


Epoch 00144: val_acc did not improve from 0.54175
Epoch 145/150
Learning rate:  1e-05

Epoch 00145: val_acc did not improve from 0.54175
Epoch 146/150
Learning rate:  1e-05

Epoch 00146: val_acc did not improve from 0.54175
Epoch 147/150
Learning rate:  1e-05

Epoch 00147: val_acc did not improve from 0.54175
Epoch 148/150
Learning rate:  1e-05

Epoch 00148: val_acc did not improve from 0.54175
Epoch 149/150
Learning rate:  1e-05

Epoch 00149: val_acc did not improve from 0.54175
Epoch 150/150
Learning rate:  1e-05

Epoch 00150: val_acc did not improve from 0.54175


In [23]:
best_model_filepath = os.path.join(save_dir, 'cifar100_%s_model.%03d.h5' % (model_type, 82))
best_model = load_model(best_model_filepath)

In [24]:
def top_k_acc(y_pred, y_true, k=5):
    top_k = y_pred.argsort()[:, -k:]
    num_correct = 0
    for i in range(len(top_k)):
        if y_true[i] in top_k[i]:
            num_correct += 1
    
    return num_correct * 1.0 / len(top_k)

In [25]:
scores = best_model.evaluate(X_test_seen, y_test_seen_oh, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

y_test_seen_pred = best_model.predict(X_test_seen)
print('Top 5 accuracy:', top_k_acc(y_test_seen_pred, y_test_seen))

Test loss: 2.8588431401252747
Test accuracy: 0.53675
Top 5 accuracy: 0.811


In [26]:
scores = best_model.evaluate(X_test_unseen, y_test_unseen_oh, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

y_test_unseen_pred = best_model.predict(X_test_unseen)
print('Top 5 accuracy:', top_k_acc(y_test_unseen_pred, y_test_unseen))

Test loss: 16.75167812093099
Test accuracy: 0.0
Top 5 accuracy: 0.0


In [27]:
scores = best_model.evaluate(X_test_all, y_test_all_oh, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

y_test_all_pred = best_model.predict(X_test_all)
print('Top 5 accuracy:', top_k_acc(y_test_all_pred, y_test_all))

Test loss: 11.194544128608703
Test accuracy: 0.2147
Top 5 accuracy: 0.3244


In [28]:
# Regular ZSL setting where we only need to consider choose between the 20 unseen classes, rather than all 100

y_test_unseen_pred = best_model.predict(X_test_unseen)
for i in range (100):
    if i not in unseen_fine_labels:
        y_test_unseen_pred[:, i] = -1
        
print('ZSL Test accuracy: ', np.mean(np.argmax(y_test_unseen_pred, axis=1) == y_test_unseen)) # 5%, pretty much random
print('Top 5 accuracy:', top_k_acc(y_test_unseen_pred, y_test_unseen))

ZSL Test accuracy:  0.0395
Top 5 accuracy: 0.23358333333333334


## Extract Features

In [29]:
print("Extracting layer: %s" % (best_model.get_layer(index=-6).name))
feat_extractor_model = Model(inputs= best_model.input, outputs = best_model.get_layer(index=-6).output)

Extracting layer: add_18


In [30]:
X_train_feat = feat_extractor_model.predict(X_train)
X_test_seen_feat = feat_extractor_model.predict(X_test_seen)
X_test_unseen_feat = feat_extractor_model.predict(X_test_unseen)
X_test_all_feat = feat_extractor_model.predict(X_test_all)

input_shape = X_train_feat[0].shape

In [31]:
save_dir_feat = os.path.join(os.getcwd(), 'saved_models/zsl/%s/extracted_feat/' % (model_type))
if not os.path.isdir(save_dir_feat):
    os.makedirs(save_dir_feat)

In [32]:
# Save X feature arrays
np.save(os.path.join(save_dir_feat, 'X_train_feat_cifar100_%s.npy' % (model_type)), X_train_feat)
np.save(os.path.join(save_dir_feat, 'X_test_seen_feat_cifar100_%s.npy' % (model_type)), X_test_seen_feat)
np.save(os.path.join(save_dir_feat, 'X_test_unseen_feat_cifar100_%s.npy' % (model_type)), X_test_unseen_feat)
np.save(os.path.join(save_dir_feat, 'X_test_all_feat_cifar100_%s.npy' % (model_type)), X_test_all_feat)

In [33]:
# Save y arrays
np.save(os.path.join(save_dir_feat, 'y_train_cifar100_%s.npy' % (model_type)), y_train)
np.save(os.path.join(save_dir_feat, 'y_test_seen_cifar100_%s.npy' % (model_type)), y_test_seen)
np.save(os.path.join(save_dir_feat, 'y_test_unseen_cifar100_%s.npy' % (model_type)), y_test_unseen)
np.save(os.path.join(save_dir_feat, 'y_test_all_cifar100_%s.npy' % (model_type)), y_test_all)