In [1]:
#Ref: https://cloud.tencent.com/developer/article/1498076
import numpy as np
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet import ResNet152
from tensorflow.keras.applications.vgg19 import preprocess_input as preprocess_input_vgg
from tensorflow.keras.applications.resnet import preprocess_input as preprocess_input_resnet
from tensorflow.keras.preprocessing import image

import h5py
import os
from matplotlib import pyplot as plt
from tqdm import tqdm
import pandas as pd
import re

rootpath='./'
train_path = './train'
test_path = './test'
train_dir = [i for i in os.listdir("train") if re.match(r'(.*?.jpg)',i)]
test_dir = [i for i in os.listdir("test") if re.match(r'(.*?.jpg)',i)]

In [2]:
# init models, load pre-trained weights and exclude top
resnet = ResNet152(weights = 'imagenet',input_shape=(680,490,3),pooling='max', include_top=False)
vgg = VGG19(weights = 'imagenet',input_shape=(680,490,3),pooling='max', include_top=False)

In [3]:
def extract_feature(model,process_input,img):
    img = image.load_img(img,target_size=(680,490))
    img = image.img_to_array(img)
    img = np.expand_dims(img,axis=0)
    img = preprocess_input(img)
    feat = model.predict(img)
    # Normalization
    norm_feat = feat[0] / np.linalg.norm(feat[0])
    return norm_feat

In [None]:
# Store extracted features to h5 file (to avoid out of memory)
feats = []
names = []
index = './vgg_extracted.h5'
for img_path in tqdm(train_dir):
    norm_feat = extract_feature(vgg,preprocess_input_vgg,os.path.join(train_path,img_path))
    img_name = re.sub(".jpg","",img_path)
    feats.append(norm_feat)
    names.append(img_name)
feats = np.array(feats)
output = index
h5f = h5py.File(output,'w')
h5f.create_dataset('feature',data=feats)
h5f.create_dataset('name',data=np.string_(names))
h5f.close()

In [None]:
feats = []
names = []
index = './resnet_extracted.h5'
for img_path in tqdm(train_dir):
    norm_feat = extract_feature(resnet,preprocess_input_resnet,os.path.join(train_path,img_path))
    img_name = re.sub(".jpg","",img_path)
    feats.append(norm_feat)
    names.append(img_name)
feats = np.array(feats)
output = index
h5f = h5py.File(output,'w')
h5f.create_dataset('feature',data=feats)
h5f.create_dataset('name',data=np.string_(names))
h5f.close()

In [None]:
train_csv = pd.read_csv("train.csv")

Find best match by VGG

In [None]:
index = 'vgg_featureCNN.h5'
h5f = h5py.File(index,'r')
feats = h5f['feature'][:]
imgNames = h5f['name'][:]
h5f.close()

In [None]:
for i in test_dir:
    query = os.path.join(test_path,i)
    query_feat = extract_feature(vgg,preprocess_input_vgg,os.path.join(test_path,i))
    scores = np.dot(query_feat,feats.T)
    best = np.argsort(scores)[::-1][0]
    
    s = str(imgNames[best],'utf-8')
    x = train_csv.loc[train_csv["id"]==s]["x"].values[0]
    y = train_csv.loc[train_csv["id"]==s]["y"].values[0]
    f = open('vgg.csv','a', newline='')
    writer = csv.writer(f)
    writer.writerow((i[0],x,y))
    f.close()

Find best match by VGG

In [None]:
index = 'resnet_featureCNN.h5'
h5f = h5py.File(index,'r')
feats = h5f['feature'][:]
imgNames = h5f['name'][:]
h5f.close()

In [None]:
for i in test_dir:
    query = os.path.join(test_path,i)
    query_feat = extract_feature(resnet,preprocess_input_resnet,os.path.join(test_path,i))
    scores = np.dot(query_feat,feats.T)
    best = np.argsort(scores)[::-1][0]
    
    s = str(imgNames[best],'utf-8')
    x = train_csv.loc[train_csv["id"]==s]["x"].values[0]
    y = train_csv.loc[train_csv["id"]==s]["y"].values[0]
    f = open('resnet.csv','a', newline='')
    writer = csv.writer(f)
    writer.writerow((i[0],x,y))
    f.close()