In [6]:
from backbones import Vgg16, resnet50, siamese
from data import rgb2ycbcr, ImageDataLabelGenerator
from models import CenterLossNet
from tensorflow import keras
import numpy as np
from tqdm import tqdm_notebook
from PIL import Image
import pickle
import os

In [2]:

weight_decay = 5e-4
H, W, C = (150, 300, 3)
nb_classes = 5004
lambda_c = 0.2
lr = 6e-4
feature_size = 512
final_active = 'sigmoid'  # for siamese net


In [3]:

train_data_gen = keras.preprocessing.image.ImageDataGenerator(
    samplewise_center=True,
    samplewise_std_normalization=True,
    zca_whitening=False,
    zca_epsilon=1e-6,
    rotation_range=16,
    width_shift_range=0.2,
    height_shift_range=0.1,
    zoom_range=0.2,
    fill_mode='reflect',
    horizontal_flip=True,
    vertical_flip=False,
    preprocessing_function=rgb2ycbcr,
    rescale=1. / 255,
    validation_split=0.1)

model = CenterLossNet(siamese, "./trainSpace/", "CenterLossNet").create_model(
    _compile=True,
    use_weightnorm=False,
    database_init=False,
    load_weights=True,
    weights_path="./trainSpace/weights/CenterLossNet.h5",
    lambda_c=lambda_c).get_embedding()


In [19]:

with open("../Dataset/metadata/p2l.pickle", "rb") as f:
  p2l = pickle.load(f)
with open("../Dataset/metadata/tr_l2ps.pickle", "rb") as f:
  l2ps = pickle.load(f)

def cal_dis(pa, pb):
    a = ps.index(pa)
    b = ps.index(pb)
    dis = np.sqrt(np.sum(np.square(embeds[a]-embeds[b])))
    return dis


In [24]:
ps = [p for p in p2l.keys()]

In [21]:
print(len(ps))

15697


In [25]:
embeds = []
batch_size = 1000
batch_x = []
for i, p in tqdm_notebook(enumerate(ps)):
    img = keras.preprocessing.image.load_img(
        os.path.join("../Dataset/train", p),
        color_mode='rgb',
        target_size=(H,W),
        interpolation="bicubic")
    x = keras.preprocessing.image.img_to_array(
        img, data_format="channels_last")
    if hasattr(img, 'close'):
        img.close()
    x = train_data_gen.standardize(x)
    batch_x.append(x)
    if i%batch_size == 0:
        embeds.append(model.predict(np.array(batch_x)))
        batch_x = []
embeds.append(model.predict(np.array(batch_x)))
embeds = np.concatenate(embeds, axis=0)
print(embeds.shape)


(15697, 512)


In [27]:

match = {}
for p, l in tqdm_notebook(p2l.items()):
    if l in l2ps.keys():
        match[p] = l2ps[l]
unmatch = {}
for p, l in tqdm_notebook(p2l.items()):
    dis_p = []
    for p1, l1 in p2l.items():
        if l1 != l:
            dis_p.append((p1, cal_dis(p, p1)))
    dis_p = sorted(dis_p, key=lambda x:x[1], reverse=True)
    dis_p = dis_p[:10]
    ps1, _ = zip(*dis_p)
    unmatch[p] = ps1



In [29]:
with open("../Dataset/metadata/unmatch.pickle", 'wb') as f:
    pickle.dump(unmatch, f)

In [30]:
print(len(unmatch))

15697
