# Model evaluation

In [None]:
from keras import backend as K
from keras.layers import Input
from keras.callbacks import ModelCheckpoint
from keras.layers.core import Activation, Dense, Dropout, Lambda
from keras.layers.merge import Concatenate
from keras.models import Model, load_model
from keras.utils import np_utils
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import itertools
import matplotlib.pyplot as plt
import numpy as np
import os
from tqdm import tqdm
import pandas as pd
from keras.optimizers import Adam

def load_vectors(vector_file, prefix_filter=''):
    vec_dict = {}
    fvec = open(vector_file, "r")
    for line in tqdm(fvec):
            image_name, image_vec = line.strip().split("\t")
            #if prefix_filter != '' and image_name.startswith(prefix_filter):
            vec = np.array([float(v) for v in image_vec.split(",")])
            vec_dict[image_name] = vec
    fvec.close()
    return vec_dict

def batch_to_vectors(batch, vec_size, vec_dict):
    X1 = np.zeros((len(batch), vec_size))
    X2 = np.zeros((len(batch), vec_size))
    Y = np.zeros((len(batch), 2))
    for tid in range(len(batch)):
        filename, ext = batch[tid][0].strip().split(".")
        painting1 = vec_dict[batch[tid][0]]
        paiting1_a = vec_corner_dict[filename + '_a.' + ext]
        paiting1_b = vec_corner_dict[filename + '_b.' + ext]
        paiting1_c = vec_corner_dict[filename + '_c.' + ext]
        paiting1_d = vec_corner_dict[filename + '_d.' + ext]

        filename, ext = batch[tid][0].strip().split(".")
        painting2 = vec_dict[batch[tid][0]]
        paiting2_a = vec_corner_dict[filename + '_a.' + ext]
        paiting2_b = vec_corner_dict[filename + '_b.' + ext]
        paiting2_c = vec_corner_dict[filename + '_c.' + ext]
        paiting2_d = vec_corner_dict[filename + '_d.' + ext]
        
        X1[tid] = np.concatenate([painting1, paiting1_a, paiting1_b, paiting1_c, paiting1_d], axis=0)
        X2[tid] = np.concatenate([painting2, paiting2_a, paiting2_b, paiting2_c, paiting2_d], axis=0)
        
        Y[tid] = [1, 0] if batch[tid][2] == 0 else [0, 1]
    return ([X1, X2], Y)

def data_generator(triples, vec_size, vec_dict, batch_size=32):
    while True:
        # shuffle once per batch
        indices = np.random.permutation(np.arange(len(triples)))
        num_batches = len(triples) // batch_size
        for bid in range(num_batches):
            batch_indices = indices[bid * batch_size: (bid + 1) * batch_size]
            batch = [triples[i] for i in batch_indices]
            yield batch_to_vectors(batch, vec_size, vec_dict)
            
def evaluate_model(model, test_gen, triple_len):
    ytrue, ypred = [], []
    num_test_steps = triple_len // BATCH_SIZE
    for i in range(num_test_steps):
        (X1, X2), Y = next(test_gen)
        Y_ = model.predict([X1, X2])
        ytrue.extend(np.argmax(Y, axis=1).tolist())
        ypred.extend(np.argmax(Y_, axis=1).tolist())
    accuracy = accuracy_score(ytrue, ypred)
    print("\nAccuracy: {:.3f}".format(accuracy))
    print("\nConfusion Matrix")
    print(confusion_matrix(ytrue, ypred))
    print("\nClassification Report")
    print(classification_report(ytrue, ypred))
    return accuracy

In [None]:
IMG_DIR = '/home/nelssalminen/painters/data/'
OUTPUT_DIR = '/home/nelssalminen/painters/data/output/'
MODEL_DIR = '/home/nelssalminen/painters/data/scratch/models'
CORNER_IMG_DIR = '/home/nelssalminen/painters/fullres_data/output/'

VECTOR_SIZE = 10240
VECTOR_FILE = os.path.join(OUTPUT_DIR, 'xception-vectors_alldata.tsv')
VECTOR_FILE_CORNERS = os.path.join(CORNER_IMG_DIR, "xception-segment-vectors-gpu.tsv")

vec_corner_dict = load_vectors(VECTOR_FILE_CORNERS)
vec_dict = load_vectors(VECTOR_FILE)

In [None]:
MODEL_FILE = os.path.join(MODEL_DIR, 'xception_corner-cat-best.h5')
model = load_model(MODEL_FILE)

In [None]:
all_data = pd.read_csv('/home/nelssalminen/painters/data/all_data_info_custom.csv')
ALL_IMG_LIST = all_data['new_filename'].tolist()

CORRUPTED_IMG_LIST_PATH = os.path.join(CORNER_IMG_DIR, "error_images.tsv")
CORRUPTED_IMG_LIST = []
with open(CORRUPTED_IMG_LIST_PATH) as f:
    for line in f:
        line = line.replace('\n','')
        CORRUPTED_IMG_LIST.append(line)
        
list_error_anyway = []
for imgpath in ALL_IMG_LIST:
    folder_file, ext = imgpath.strip().split(".")
    full_exists = (imgpath in vec_dict)
    a_exists = (str(folder_file + '_a.' + ext) in vec_corner_dict)
    b_exists = (str(folder_file + '_b.' + ext) in vec_corner_dict)
    c_exists = (str(folder_file + '_c.' + ext) in vec_corner_dict)
    d_exists = (str(folder_file + '_d.' + ext) in vec_corner_dict)
    if not(full_exists and a_exists and b_exists and c_exists and d_exists):
        if imgpath not in CORRUPTED_IMG_LIST:
            counter = counter + 1
            list_error_anyway.append(imgpath)

CORRUPTED_IMG_LIST = CORRUPTED_IMG_LIST + list_error_anyway

def get_triples(image_dir, dat, filename_label='filename', path_prefix=''):
    image_groups = {}
    for index, row in dat.iterrows():
        img_name = row[filename_label]
        group_name = row['artist']
        if (path_prefix + img_name) not in CORRUPTED_IMG_LIST:
            if group_name in image_groups:
                image_groups[group_name].append(path_prefix + img_name)
            else:
                image_groups[group_name] = [path_prefix + img_name]
    num_sims = 0
    image_triples = []
    group_list = sorted(list(image_groups.keys()))
    for i, g in enumerate(group_list):
            if num_sims % 100 == 0:
                    print("Generated {:d} pos + {:d} neg = {:d} total image triples"
                                .format(num_sims, num_sims, 2*num_sims))
            images_in_group = image_groups[g]
            sim_pairs_it = itertools.combinations(images_in_group, 2)
            # For each similar pair, generate a corresponding different pair
            for ref_image, sim_image in sim_pairs_it:
                image_triples.append((ref_image, sim_image, 1))
                num_sims += 1
                while True:
                        j = np.random.randint(low=0, high=len(group_list), size=1)[0]
                        if j != i:
                                break
                dif_image_candidates = image_groups[group_list[j]]
                k = np.random.randint(low=0, high=len(dif_image_candidates), size=1)[0]
                dif_image = dif_image_candidates[k]
                image_triples.append((ref_image, dif_image, 0))

    print("Generated {:d} pos + {:d} neg = {:d} total image triples"
                .format(num_sims, num_sims, 2*num_sims))
    return image_triples

test_data_seen = pd.read_csv('/home/nelssalminen/painters/data/test_info-train_only.csv')
test_data_unseen = pd.read_csv('/home/nelssalminen/painters/data/test_info-test_only.csv')
test_data_mix = pd.read_csv('/home/nelssalminen/painters/data/test_info.csv')

test_triples_seen = get_triples(IMG_DIR, test_data_seen, 'new_filename');
test_triples_unseen = get_triples(IMG_DIR, test_data_unseen, 'new_filename');
test_triples_mix = get_triples(IMG_DIR, test_data_mix, 'new_filename');

In [None]:
BATCH_SIZE = 256

test_gen_seen = data_generator(test_triples_seen, VECTOR_SIZE, vec_dict, BATCH_SIZE)
final_accuracy = evaluate_model(model, test_gen_seen, len(test_triples_seen))

test_gen_unseen = data_generator(test_triples_unseen, VECTOR_SIZE, vec_dict, BATCH_SIZE)
final_accuracy = evaluate_model(model, test_gen_unseen, len(test_triples_unseen))

test_gen_mix = data_generator(test_triples_mix, VECTOR_SIZE, vec_dict, BATCH_SIZE)
final_accuracy = evaluate_model(model, test_gen_mix, len(test_triples_mix))