The data set is from coco train 2017, there are totally 118,287 images.

In [1]:
import numpy as np
import cv2
import glob
import random
from sklearn.model_selection import train_test_split

Divide the data into two non-overlapping parts: part-A (~30%) and part-B (~70%)

In [2]:
image_name_list = glob.glob('train2017/*.jpg') # the whole image dataset, 118287 images

image_part_A, image_part_B = train_test_split(image_name_list, test_size = 0.7)

# 35486 images in part_A, 82801 images in part_B

Crop out halves for all images in both parts

In [3]:
im_A_left = []
im_A_right = []
hist_A_left = []
hist_A_right = []

H_least = 300   # images larger than (H_least, W_least) are not resized
W_least = 500   # others resized to (H_least, W_least)

for i in range(min(len(image_part_A), 12000)): 
    im = cv2.imread(image_part_A[i])
    
    if im.shape[0] > H_least and im.shape[1] > W_least: # Try to find images that are large enough to have 224x224 crops from the two halves
        left_x = random.randint(0, im.shape[1]//2 - 224)  # Crop out boundaries (rather than simply cut image into halves) to avoid simply continuity clue
        left_y = random.randint(0, im.shape[0] - 224)
        im_l = im[left_y:left_y+224, left_x:left_x+224]  # y:y+h, x:x+w
        im_A_left.append(im_l) 
        
        
        right_x = random.randint(im.shape[1]//2, im.shape[1]-224)
        right_y = random.randint(0, im.shape[0] - 224)
        im_r = im[right_y:right_y+224, right_x:right_x+224]
        #print(im.shape,im_l.shape,im_r.shape)
        im_A_right.append(im_r)
        

        hist_A_left.append(cv2.calcHist([im_l], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))
        hist_A_right.append(cv2.calcHist([im_r], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))  
    
    else:
        im = cv2.resize(im, (W_least, H_least))  # resize the images that are not large enough to (300, 500)
        left_x = random.randint(0, im.shape[1]//2 - 224)  # Crop out boundaries (rather than simply cut image into halves) to avoid simply continuity clue
        left_y = random.randint(0, im.shape[0] - 224)
        im_l = im[left_y:left_y+224, left_x:left_x+224]
        
        im_A_left.append(im_l) # y:y+h, x:x+w
        
        
        right_x = random.randint(im.shape[1]//2, im.shape[1]-224)
        right_y = random.randint(0, im.shape[0] - 224)
        im_r = im[right_y:right_y+224, right_x:right_x+224]
        #print(im.shape,im_l.shape,im_r.shape)
        im_A_right.append(im_r)
        

        hist_A_left.append(cv2.calcHist([im_l], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))
        hist_A_right.append(cv2.calcHist([im_r], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))        

In [4]:
im_B_left = []
im_B_right = []
hist_B_left = []
hist_B_right = []

for i in range(min(len(image_part_B), 28000)): 
    im = cv2.imread(image_part_B[i])
    
    if im.shape[0] > H_least and im.shape[1] > W_least: # Try to find images that are large enough to have 224x224 crops from the two halves
        left_x = random.randint(0, im.shape[1]//2 - 224)  # Crop out boundaries (rather than simply cut image into halves) to avoid simply continuity clue
        left_y = random.randint(0, im.shape[0] - 224)
        im_l = im[left_y:left_y+224, left_x:left_x+224]
        im_B_left.append(im_l) # y:y+h, x:x+w
        
        
        right_x = random.randint(im.shape[1]//2, im.shape[1]-224)
        right_y = random.randint(0, im.shape[0] - 224)
        im_r = im[right_y:right_y+224, right_x:right_x+224]
        im_B_right.append(im_r)
        #print(im.shape,im_l.shape,im_r.shape)

        hist_B_left.append(cv2.calcHist([im_l], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))
        hist_B_right.append(cv2.calcHist([im_r], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))  
    
    else:
        im = cv2.resize(im, (W_least, H_least))  # resize the images that are not large enough to (300, 500)
        left_x = random.randint(0, im.shape[1]//2 - 224)  # Crop out boundaries (rather than simply cut image into halves) to avoid simply continuity clue
        left_y = random.randint(0, im.shape[0] - 224)
        im_l = im[left_y:left_y+224, left_x:left_x+224]
        im_B_left.append(im_l) # y:y+h, x:x+w
        
        
        right_x = random.randint(im.shape[1]//2, im.shape[1]-224)
        right_y = random.randint(0, im.shape[0] - 224)
        im_r = im[right_y:right_y+224, right_x:right_x+224]
        im_B_right.append(im_r)
        #print(im.shape,im_l.shape,im_r.shape)

        hist_B_left.append(cv2.calcHist([im_l], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))
        hist_B_right.append(cv2.calcHist([im_r], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))  

3.	Build a task case for each image in part-A:

o	randomly pick a half (left/right) as the reference image and the other half as the ground-truth target image 

o	from all images in part-B (only consider the "correct" side, e.g. if the reference image is from the left side, use only right side of the part-B images), randomly sample (#Choices-1) of them that are among the top K in terms of color histogram similarity from the GT target image. 

o	save the task as task_i = <ref_img, <choices>, gt_idx>, where <choices> are shuffled (#Choices-1) image halves from part-B together with the target image, and 'gt_idx' is the index of the target image within <choices>. 




In [5]:
num_ref = len(im_A_left)
num_B = len(im_B_left)
compare_method = 3 # means that the smaller the compareHist, the similar
num_choice = 10
K = 100

task_ref = []  # 
task_tgt = []  #
task_choices_minus_1 = []

for i_ref in range(num_ref):
    lr = np.random.randint(2)
    if lr == 0:  # pick left half as the reference image
        task_ref.append(-1*i_ref)  # negative value means ref images are picked from left
        task_tgt.append(i_ref)     # positive value means tgt images are picked from right
        
        hist_Comp = []
        for j in range(num_B):     # ref in A_left, choices in B_right
            halfi_halfj = cv2.compareHist(hist_A_left[i_ref], hist_B_right[j], compare_method)
            hist_Comp.append(halfi_halfj)
        
        # the top K in terms of color histogram similarity from the GT target image.
        matchest_topK = np.argpartition(np.array(hist_Comp),K)[:K]  
        # randomly sample (#Choices-1) among the top K
        arg_choices_minus_1 = np.random.choice(K, num_choice-1, replace=False)
        choices_minus_1 = matchest_topK[arg_choices_minus_1]
        task_choices_minus_1.append(choices_minus_1)      # choices in B_right, positive
    
    else:        # pick right half as the reference image
        task_ref.append(i_ref)          # positive value means ref images are picked from right
        task_tgt.append(-1*i_ref)       # negative value means tgt images are picked from left
        
        hist_Comp = []
        for j in range(num_B):         # ref in A_right, choices in B_left
            halfi_halfj = cv2.compareHist(hist_A_right[i_ref], hist_B_left[j], compare_method)
            hist_Comp.append(halfi_halfj)
        
        matchest_topK = np.argpartition(np.array(hist_Comp),K)[:K]  
        arg_choices_minus_1 = np.random.choice(K, num_choice-1, replace=False)
        choices_minus_1 = matchest_topK[arg_choices_minus_1]
        task_choices_minus_1.append(choices_minus_1 * (-1))  # choices in B_left, negative value

In [6]:
for i in range(min(num_ref,5)):
    print(task_ref[i], task_tgt[i], task_choices_minus_1[i])

0 0 [25273  9669 13902 25968 18037  6039  9597  5832 25923]
1 -1 [ -8825  -3223 -21979  -1810  -2898  -8666 -11967 -27008 -15255]
-2 2 [ 1085 18456 10811  5219 15971 18980 18273  3249  2850]
3 -3 [-18211 -15821 -22868 -16873  -5372 -18096 -24643 -17479 -24265]
4 -4 [-13210 -14021 -13638 -24850  -7478  -8205  -5089  -3521  -5538]


A few task samples once you are done with this. We might also want to use more carefully picked images (indoor scenes, e.g. from topic models, or scenes in general) later, depending on how challenging this initial benchmark looks like. 

A good folder structure can be having one folder for each sample containing the reference image together with all choices: 
sample_id/
 - reference.png
 - choice_0.png
 - choice_1.png
 ...
 - choice_9.png

The ground-truth can be saved in a single .txt file with two columns: sample_id and true_choice_idx. 


In [7]:
task_num = len(im_A_left)
samples_num = 30 # take samples_num of samples to show the results
import os
if not os.path.exists('./task'):
    os.mkdir('./task')

In [8]:
for i in range(min(task_num, samples_num)):
    if task_ref[i] < 0 or (task_ref[i] == 0 and sum(task_choices_minus_1[i])>0): # ref in A_left, ground_truth in A_right, target choices in B_right
        
        if not os.path.exists('./task/sample_'+str(i)):
            os.mkdir('./task/sample_'+str(i))
            
        cv2.imwrite('./task/sample_'+str(i)+"/reference" + ".jpg", im_A_left[i])
        cv2.imwrite('./task/sample_'+str(i)+"/choice_" + str(num_choice-1) + ".jpg", im_A_right[i])
        
        for choice_i in range(num_choice-1):
            cv2.imwrite('./task/sample_'+str(i)+"/choice_"+str(choice_i)+ ".jpg", im_B_right[task_choices_minus_1[i][choice_i]])
        
    
    elif task_ref[i] > 0 or (task_ref[i] == 0 and sum(task_choices_minus_1[i])<0) : # ref in A_right, ground_truth in A_left, target choices in B_left
        if not os.path.exists('./task/sample_'+str(i)):
            os.mkdir('./task/sample_'+str(i))
            
        cv2.imwrite('./task/sample_'+str(i)+"/reference" + ".jpg", im_A_right[i])
        cv2.imwrite('./task/sample_'+str(i)+"/choice_" + str(num_choice-1) + ".jpg", im_A_left[i])
        
        for choice_i in range(num_choice-1):
            cv2.imwrite('./task/sample_'+str(i)+"/choice_"+str(choice_i)+ ".jpg", im_B_left[task_choices_minus_1[i][choice_i] * (-1)])
                