# Vector extraction using Inception V3 pre-trained model
## Import necessary packages

In [1]:
from __future__ import division, print_function
from keras import backend as K
from keras.layers import Input
from keras.callbacks import ModelCheckpoint
from keras.layers.core import Activation, Dense, Dropout, Lambda
from keras.layers.merge import Concatenate
from keras.models import Model, load_model
from keras.utils import np_utils
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import itertools
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from keras.optimizers import Adam
from keras.applications.vgg19 import VGG19
from keras.applications.inception_v3 import InceptionV3
from keras.applications.resnet50 import ResNet50

from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
from keras.models import Model
import itertools
import numpy as np
import os
import pandas as pd

from PIL.Image import LANCZOS
from PIL import Image
from PIL import ImageFile
Image.MAX_IMAGE_PIXELS = 1000000000                                                                                              
ImageFile.LOAD_TRUNCATED_IMAGES = True

Using TensorFlow backend.


In [2]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [3]:
all_data = pd.read_csv('/home/nelssalminen/painters/data/all_data_info_custom.csv')
train_data = pd.read_csv('/home/nelssalminen/painters/data/train_info.csv')
test_data = pd.read_csv('/home/nelssalminen/painters/data/test_info.csv')
IMG_DIR = '/home/nelssalminen/painters/data/'
OUTPUT_DIR = '/home/nelssalminen/painters/data/output/'

ALL_IMG_LIST = all_data['new_filename'].tolist()
TRAIN_IMG_LIST = train_data['filename'].tolist()
TEST_IMG_LIST = test_data['new_filename'].tolist()

os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Amount of train images: " + str(len(ALL_IMG_LIST)))
print("Amount of train images: " + str(len(TRAIN_IMG_LIST)))
print("Amount of test images: " + str(len(TEST_IMG_LIST)))

model = InceptionV3(weights='imagenet',include_top=False,pooling='avg')

Amount of train images: 103250
Amount of train images: 79433
Amount of test images: 23818
Instructions for updating:
Colocations handled automatically by placer.


In [None]:
#fvec = open(OUTPUT_DIR + 'inceptionv3-vectors.tsv', "w")
num_vecs = 0 
for image_ in IMG_LIST:
	img = image.load_img(IMG_DIR+image_, target_size=(224, 224))
	x = image.img_to_array(img)
	x = np.expand_dims(x, axis=0)
	x = preprocess_input(x)
	features = model.predict(x)[0]
	# Convert from numpy array to a list of values
	features_arr = np.char.mod('%f', features)

	if num_vecs % 100 == 0:
		print("{:d} vectors generated".format(num_vecs))

	image_vector = ",".join(["{:.5e}".format(v) for v in features.tolist()])
	fvec.write("{:s}\t{:s}\n".format(image_, image_vector))
	num_vecs += 1

# Vector classification
## Create image triples
We start this stage by grouping the images by painter. Once the images are organized, we enumerate through each
group of images per artist and randomly obtain a pair of a reference image and a *similar* image. Following this,
we randomly select an image from a different artist group, creating a pair of the same reference image, but now combined
with a dissimilar image.

In [4]:
def show_img(sid, img_file, img_title):
    plt.subplot(sid)
    plt.title(img_title)
    plt.xticks([])
    plt.yticks([])
    img = imresize(plt.imread(img_file), (512, 512))
    plt.imshow(img)

def get_triples(image_dir, dat, filename_label='filename', path_prefix=''):
        image_groups = {}
        for index, row in dat.iterrows():
            img_name = row[filename_label]
            group_name = row['artist']
            if group_name in image_groups:
                image_groups[group_name].append(path_prefix + img_name)
            else:
                image_groups[group_name] = [path_prefix + img_name]

        num_sims = 0
        image_triples = []
        group_list = sorted(list(image_groups.keys()))
        for i, g in enumerate(group_list):
                if num_sims % 100 == 0:
                        print("Generated {:d} pos + {:d} neg = {:d} total image triples"
                                    .format(num_sims, num_sims, 2*num_sims))
                images_in_group = image_groups[g]
                sim_pairs_it = itertools.combinations(images_in_group, 2)
                # For each similar pair, generate a corresponding different pair
                for ref_image, sim_image in sim_pairs_it:
                    image_triples.append((ref_image, sim_image, 1))
                    num_sims += 1
                    while True:
                            j = np.random.randint(low=0, high=len(group_list), size=1)[0]
                            if j != i:
                                    break
                    dif_image_candidates = image_groups[group_list[j]]
                    k = np.random.randint(low=0, high=len(dif_image_candidates), size=1)[0]
                    dif_image = dif_image_candidates[k]
                    image_triples.append((ref_image, dif_image, 0))
#                     if num_sims % 10000 == 0:
#                         show_img(131, os.path.join(IMG_DIR, sim_image), "sim")
#                         show_img(132, os.path.join(IMG_DIR, ref_image), "ref")
#                         show_img(133, os.path.join(IMG_DIR, dif_image), "dif")
#                         plt.tight_layout()
#                         plt.show()


        print("Generated {:d} pos + {:d} neg = {:d} total image triples"
                    .format(num_sims, num_sims, 2*num_sims))
        return image_triples

Generate the image triples using the previously defined function.

In [9]:
train_val_triples = get_triples(IMG_DIR, train_data, 'filename', 'train/');
test_triples = get_triples(IMG_DIR, test_data, 'new_filename');

Generated 0 pos + 0 neg = 0 total image triples
Generated 5500 pos + 5500 neg = 11000 total image triples
Generated 759100 pos + 759100 neg = 1518200 total image triples
Generated 2213600 pos + 2213600 neg = 4427200 total image triples
Generated 2740300 pos + 2740300 neg = 5480600 total image triples
Generated 2949500 pos + 2949500 neg = 5899000 total image triples
Generated 3064500 pos + 3064500 neg = 6129000 total image triples
Generated 3755000 pos + 3755000 neg = 7510000 total image triples
Generated 4550100 pos + 4550100 neg = 9100200 total image triples
Generated 4648400 pos + 4648400 neg = 9296800 total image triples
Generated 5435200 pos + 5435200 neg = 10870400 total image triples
Generated 5492200 pos + 5492200 neg = 10984400 total image triples
Generated 5773652 pos + 5773652 neg = 11547304 total image triples
Generated 0 pos + 0 neg = 0 total image triples
Generated 0 pos + 0 neg = 0 total image triples
Generated 118000 pos + 118000 neg = 236000 total image triples
Generate

test/36766.jpg


In [None]:
test_only_images = []
for index, row in all_data.iterrows():
    image_name = row['new_filename']
    artist_group = row['artist_group']
    if artist_group == 'test_only':
        test_only_images.append(image_name)
        
sim_pairs_it = itertools.combinations(test_only_images, 2)
for ref_image, sim_image in sim_pairs_it:
    image_triples.append((ref_image, sim_image, 1))
    num_sims += 1
    while True:
            j = np.random.randint(low=0, high=len(group_list), size=1)[0]
            if j != i:
                    break
    dif_image_candidates = image_groups[group_list[j]]
    k = np.random.randint(low=0, high=len(dif_image_candidates), size=1)[0]
    dif_image = dif_image_candidates[k]
    image_triples.append((ref_image, dif_image, 0))

print(len(sim_pairs_it))

Set several configuration and utility variables, including loading the vector files.

In [53]:
def load_vectors(vector_file, prefix_filter=''):
        vec_dict = {}
        fvec = open(vector_file, "r")
        for line in fvec:
                image_name, image_vec = line.strip().split("\t")
                #if prefix_filter != '' and image_name.startswith(prefix_filter):
                vec = np.array([float(v) for v in image_vec.split(",")])
                vec_dict[image_name] = vec
        fvec.close()
        return vec_dict

BATCH_SIZE = 256
NUM_EPOCHS = 35

DATA_CONTAINER = '/home/nelssalminen/painters/data/scratch/'
os.makedirs(DATA_CONTAINER, exist_ok=True)

VECTORIZERS = ["InceptionV3"]
MERGE_MODES = ["Concat", "Euclidean"]

scores = np.zeros((len(VECTORIZERS), len(MERGE_MODES)))

VECTOR_SIZE = 2048
VECTOR_FILE = os.path.join(OUTPUT_DIR, "inceptionv3-vectors_alldata.tsv")

vec_dict = load_vectors(VECTOR_FILE)

In [54]:
print(len(vec_dict))

103250


Split the triples for training, validation and testing based on given ratios.

In [55]:
def train_val_split(triples, splits):
        assert sum(splits) == 1.0
        split_pts = np.cumsum(np.array([0.] + splits))
        indices = np.random.permutation(np.arange(len(triples)))
        shuffled_triples = [triples[i] for i in indices]
        data_splits = []
        for sid in range(len(splits)):
                start = int(split_pts[sid] * len(triples))
                end = int(split_pts[sid + 1] * len(triples))
                data_splits.append(shuffled_triples[start:end])
        return data_splits

train_triples, val_triples = train_val_split(train_val_triples, splits=[0.9, 0.1])
print("Training set size: " + str(len(train_triples)), "Validation set size:" + str(len(val_triples)), "Test set size:" + str(len(test_triples)))

Training set size: 10392573 Validation set size:1154731 Test set size:1138540


In [56]:
def batch_to_vectors(batch, vec_size, vec_dict):
    X1 = np.zeros((len(batch), vec_size))
    X2 = np.zeros((len(batch), vec_size))
    Y = np.zeros((len(batch), 2))
    for tid in range(len(batch)):
        X1[tid] = vec_dict[batch[tid][0]]
        X2[tid] = vec_dict[batch[tid][1]]
        Y[tid] = [1, 0] if batch[tid][2] == 0 else [0, 1]
    return ([X1, X2], Y)


def data_generator(triples, vec_size, vec_dict, batch_size=32):
    while True:
        # shuffle once per batch
        indices = np.random.permutation(np.arange(len(triples)))
        num_batches = len(triples) // batch_size
        for bid in range(num_batches):
            batch_indices = indices[bid * batch_size: (bid + 1) * batch_size]
            batch = [triples[i] for i in batch_indices]
            yield batch_to_vectors(batch, vec_size, vec_dict)
            
train_gen = data_generator(train_triples, VECTOR_SIZE, vec_dict, BATCH_SIZE)
val_gen = data_generator(val_triples, VECTOR_SIZE, vec_dict, BATCH_SIZE)

In [57]:
input_1 = Input(shape=(VECTOR_SIZE,))
input_2 = Input(shape=(VECTOR_SIZE,))
merged = Concatenate(axis=-1)([input_1, input_2])

Define the 10-layer Siamese CNN

In [58]:
fc1 = Dense(2048, kernel_initializer="glorot_uniform")(merged)
fc1 = Dropout(0.2)(fc1)
fc1 = Activation("relu")(fc1)

fc2 = Dense(2048, kernel_initializer="glorot_uniform")(fc1)
fc2 = Dropout(0.2)(fc2)
fc2 = Activation("relu")(fc2)

fc3 = Dense(2048, kernel_initializer="glorot_uniform")(fc2)
fc3 = Dropout(0.2)(fc3)
fc3 = Activation("relu")(fc3)

fc8 = Dense(1024, kernel_initializer="glorot_uniform")(fc3)
fc8 = Dropout(0.2)(fc8)
fc8 = Activation("relu")(fc8)

fc9 = Dense(1024, kernel_initializer="glorot_uniform")(fc8)
fc9 = Dropout(0.2)(fc9)
fc9 = Activation("relu")(fc9)

fc11 = Dense(1024, kernel_initializer="glorot_uniform")(fc9)
fc11 = Dropout(0.2)(fc11)
fc11 = Activation("relu")(fc11)

fc12 = Dense(512, kernel_initializer="glorot_uniform")(fc11)
fc12 = Dropout(0.2)(fc12)
fc12 = Activation("relu")(fc12)

fc13 = Dense(512, kernel_initializer="glorot_uniform")(fc12)
fc13 = Dropout(0.2)(fc13)
fc13 = Activation("relu")(fc13)

fc14 = Dense(128, kernel_initializer="glorot_uniform")(fc13)
fc14 = Dropout(0.2)(fc14)
fc14 = Activation("relu")(fc14)

fc15 = Dense(128, kernel_initializer="glorot_uniform")(fc14)
fc15 = Dropout(0.2)(fc15)
fc15 = Activation("relu")(fc15)

pred = Dense(2, kernel_initializer="glorot_uniform")(fc15)
pred = Activation("softmax")(pred)

model = Model(inputs=[input_1, input_2], outputs=pred)
adam = Adam(lr=.00001)
model.compile(optimizer=adam, loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 2048)         0                                            
__________________________________________________________________________________________________
input_7 (InputLayer)            (None, 2048)         0                                            
__________________________________________________________________________________________________
concatenate_5 (Concatenate)     (None, 4096)         0           input_6[0][0]                    
                                                                 input_7[0][0]                    
__________________________________________________________________________________________________
dense_23 (Dense)                (None, 2048)         8390656     concatenate_5[0][0]              
__________

Define functions for model evaluation

In [59]:
def evaluate_model(model_file, test_gen):
        model_name = os.path.basename(model_file)
        model = load_model(model_file)
        print("=== Evaluating model: {:s} ===".format(model_name))
        ytrue, ypred = [], []
        num_test_steps = len(test_triples) // BATCH_SIZE
        for i in range(num_test_steps):
                (X1, X2), Y = next(test_gen)
                Y_ = model.predict([X1, X2])
                ytrue.extend(np.argmax(Y, axis=1).tolist())
                ypred.extend(np.argmax(Y_, axis=1).tolist())
        accuracy = accuracy_score(ytrue, ypred)
        print("\nAccuracy: {:.3f}".format(accuracy))
        print("\nConfusion Matrix")
        print(confusion_matrix(ytrue, ypred))
        print("\nClassification Report")
        print(classification_report(ytrue, ypred))
        return accuracy
    
def get_model_file(data_dir, vector_name, merge_mode, borf):
        return os.path.join(data_dir, "models", "{:s}-{:s}-{:s}.h5"
                                                .format(vector_name, merge_mode, borf))

In [60]:
best_model_name = get_model_file(DATA_CONTAINER, "inceptionv3r2", "cat", "best")
checkpoint = ModelCheckpoint(best_model_name, save_best_only=True)
train_steps_per_epoch = len(train_triples) // BATCH_SIZE
val_steps_per_epoch = len(val_triples) // BATCH_SIZE
history = model.fit_generator(train_gen, steps_per_epoch=train_steps_per_epoch, 
                              epochs=NUM_EPOCHS, 
                              validation_data=val_gen, validation_steps=val_steps_per_epoch,
                              callbacks=[checkpoint])

final_model_name = get_model_file(DATA_CONTAINER, "inceptionv3r2", "cat", "final")
model.save(final_model_name)
test_gen = data_generator(test_triples, VECTOR_SIZE, vec_dict, BATCH_SIZE)
final_accuracy = evaluate_model(final_model_name, test_gen)

test_gen = data_generator(test_triples, VECTOR_SIZE, vec_dict, BATCH_SIZE)
best_accuracy = evaluate_model(best_model_name, test_gen)

scores[0, 0] = best_accuracy if best_accuracy > final_accuracy else final_accuracy

Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
=== Evaluating model: inceptionv3r2-cat-final.h5 ===

Accuracy: 0.617

Confusion Matrix
[[355732 213494]
 [222201 347005]]

Classification Report
              precision    recall  f1-score   support

           0       0.62      0.62      0.62    569226
           1       0.62      0.61      0.61    569206

   micro avg       0.62      0.62      0.62   1138432
   macro avg       0.62      0.62      0.62   1138432
weighted avg       0.62      0.62      0.62   1138432

=== Evaluating model: inceptionv3r2-cat-best.h5 ===

Accuracy: 0.612

Confusion Matrix
[[371466 197754]
 [243692 325