In [1]:
# Load pretrained model, and generate new transformed dataset by passing images through pretrained model 
import os
import numpy as np
import pickle
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.datasets import cifar100
from keras.models import Sequential, Model, load_model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
num_classes = 100
(x_train, y_train), (x_test, y_test) = cifar100.load_data()

label_embeddings = pickle.load(open("Data/Embeddings/CIFAR/CIFAR_100_label_to_embedding_google_news.pk", "rb"))
train_y_embeddings = np.zeros((50000, 300))
test_y_embeddings = np.zeros((10000, 300))

for i in range(y_train.shape[0]):
    train_y_embeddings[i] = label_embeddings[int(y_train[i][0])]

for i in range(y_test.shape[0]):
    test_y_embeddings[i] = label_embeddings[int(y_test[i][0])]

In [3]:
model = Sequential()

model.add(Conv2D(64, (3, 3), padding='same',input_shape=x_train[0].shape))
model.add(Activation('relu'))
model.add(BatchNormalization())
# model.add(Dropout(0.3))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
# model.add(Dropout(0.4))

model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
# model.add(Dropout(0.4))

model.add(Conv2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
# model.add(Dropout(0.4))

model.add(Conv2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
# model.add(Dropout(0.4))

model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
# model.add(Dropout(0.4))

model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
# model.add(Dropout(0.4))

model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
# model.add(Dropout(0.4))

model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(512, input_dim=8192, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(512, input_dim=300, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(300, input_dim=300))

In [4]:
model.compile(loss='cosine_proximity', optimizer='adam', metrics=['mse'])

In [5]:
batch_size = 128
epochs = 3

datagen = ImageDataGenerator(rescale=1./255)
generator_train = datagen.flow(x_train, train_y_embeddings, batch_size=batch_size)
generator_test = datagen.flow(x_test, test_y_embeddings, batch_size=batch_size)

In [6]:
history = model.fit_generator(generator=generator_train,
                                    steps_per_epoch=x_train.shape[0] // batch_size,
                                    epochs=epochs,
                                    validation_data=generator_test,
                                    validation_steps=x_test.shape[0] // batch_size
                                   )

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [7]:
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

In [8]:
pred_test_y = model.predict(x_test)

label_embeddings
label_embeddings_arr = np.zeros((100, 300))
for i in range(100):
    label_embeddings_arr[i] = label_embeddings[i]

In [9]:
#Top K Predictions
def pred_top_k(k=5, pred_test_y=pred_test_y, label_embeddings_arr=label_embeddings_arr):
    sim_table = cosine_similarity(pred_test_y, label_embeddings_arr)
    top_k_guesses = np.argpartition(sim_table,range(99-k+1,100),axis=1)[:,99-k+1:]
    return top_k_guesses


#Top prediction
sim_table = cosine_similarity(pred_test_y, label_embeddings_arr)
label_predictions = sim_table.argmax(axis=1)
label_predictions

k=5
top_k_guesses = pred_top_k(5)


In [10]:
#Accuracy
acc = np.sum((np.squeeze(y_test) == label_predictions)) / float(y_test.shape[0])
print("Accuracy: " + str(acc))

#Top k Accuracy

def acc(top_k=top_k_guesses, test_y=y_test):
    correct = 0
    for i in range(test_y.shape[0]):
        if np.squeeze(test_y)[i] in top_k[i]:
            correct += 1
    return correct/float(test_y.shape[0])

print("Top " + str(k) + " Accuracy: " + str(acc()))

Accuracy: 0.01
Top 5 Accuracy: 0.0495
