In [1]:
import json
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tqdm
from skimage.feature import ORB
from skimage.color import rgb2gray
import os
import random

# Load data

In [2]:
# map
with open("Data/database/database_lite.json","r") as f:
    m_idx = json.load(f)
    m_imgs = np.array(m_idx["im_paths"])
    m_loc=np.array(m_idx["loc"])

# query
with open("Data/query/query_lite.json","r") as f:
    q_idx=json.load(f)
    q_imgs=np.array(q_idx["im_paths"])
    q_loc=np.array(q_idx["loc"])

# loading the relevance judgements
with h5py.File("Data/london_lite_gt.h5","r") as f:
    fovs = f["fov"][:]
    sim = f["sim"][:].astype(np.uint8)

# Preprocessing

## Create feature vectors for images

In [3]:
def get_descriptors(x_imgs, n_keypoints=10):
    
    # Initialize the ORB descriptor
    descriptor_extractor = ORB(n_keypoints=n_keypoints)
    # Initialize the data structure that will contain all the descriptors
    descriptors = None

    for img_name in x_imgs:
        #img = Image.open(os.path.join('data_image_retrieval/', img_name)).convert()
        #img = np.asarray(img)
        img = plt.imread(os.path.join('Data/', img_name))
        img = rgb2gray(img)

        # Extract ORB descriptors
        descriptor_extractor.detect_and_extract(img)

        descriptors_img = descriptor_extractor.descriptors  # descriptors (the feature vectors)

        if descriptors is None:
            descriptors = descriptors_img
        else:
            descriptors = np.vstack( (descriptors, descriptors_img))

    return descriptors.reshape((len(x_imgs), -1))

In [4]:
def get_feture_dict(m_imgs, q_imgs, n_keypoints=10):
    m_descriptors = get_descriptors(m_imgs, n_keypoints=n_keypoints)
    q_descriptors = get_descriptors(q_imgs, n_keypoints=n_keypoints)
    
    descriptors = np.concatenate((m_descriptors, q_descriptors))
    
    imgs = np.concatenate((m_imgs, q_imgs))

    return dict(zip(imgs, descriptors))

In [5]:
n_keypoints = 10

# Create dictionary for mapping image path to the vector of keypoints
img2vector = get_feture_dict(m_imgs, q_imgs, n_keypoints=n_keypoints)

In [None]:
file_path = q_imgs[random.randint(0,len(q_imgs)-1)]

img2vector[file_path]

array([False, False, False, ..., False, False,  True])

## Create data pairs

In [None]:
X_raw = []
Y = []

for i in range(fovs.shape[0]): #500
    for j in range(fovs.shape[1]): #1000
        X_raw.append((q_imgs[i],m_imgs[j]))
        Y.append(fovs[i][j])


X_raw = np.array(X_raw)
Y = np.array(Y)

print(X_raw.shape)
print(Y.shape)

(500000, 2)
(500000,)


## Split trainging, test and validation datasets

In [None]:
X = np.asarray([(img2vector[q_img], img2vector[m_img]) for q_img, m_img in X_raw])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_rest, y_train, y_rest = train_test_split(X, Y, test_size=0.20, random_state=0)
X_test, X_validate, y_test, y_validate = train_test_split(X_rest, y_rest, test_size=0.50, random_state=0)

print(X_train.shape, X_test.shape, X_validate.shape)

(400000, 2, 2560) (50000, 2, 2560) (50000, 2, 2560)


In [None]:
np.save("./Data/Dataset/X_train.npy", X_train)
np.save("./Data/Dataset/X_test.npy", X_test)
np.save("./Data/Dataset/X_validate.npy", X_validate)

np.save("./Data/Dataset/Y_train.npy", y_train)
np.save("./Data/Dataset/Y_test.npy", y_test)
np.save("./Data/Dataset/Y_validate.npy", y_validate)