In [1]:
import numpy as np
import cv2
import glob
import random
from sklearn.model_selection import train_test_split

Divide the whole dataset into train set (58K) and test set (60K)

Divide each set into two non-overlapping parts: part-A (~30%) and part-B (~70%)

the whole image dataset, 118287 images            //
train - 57960 images, test - 60327 images         //
train_part_A - 17388, train_part_B - 40572 images       //
test_part_A - 18098, test_part_B - 42229 images

In [2]:
train_part_A = np.load('train_part_A.npy')  #train_part_A - 29757 images
train_part_B = np.load('train_part_B.npy')  #train_part_B - 69435 images   

#test_part_A = np.load('test_part_A.npy')    #test_part_A - 30972 images
#test_part_B = np.load('test_part_B.npy')    #test_part_B - 72270 images


Crop out halves for all images in both parts

In [3]:
im_A_left = []
im_A_right = []
hist_A_left = []
hist_A_right = []

SIZE = 224
margin = 0.1
H_least = int(SIZE * (1+margin))
W_least = H_least * 2  


for i in range(max(len(train_part_A), 120)): 
    im = cv2.imread(train_part_A[i])
    
    im = cv2.resize(im, (W_least, H_least))  # resize the images that are not large enough to (300, 500)
    left_x = random.randint(0, im.shape[1]//2 - SIZE)  # Crop out boundaries (rather than simply cut image into halves) to avoid simply continuity clue
    left_y = random.randint(0, im.shape[0] - SIZE)
    im_l = im[left_y:left_y+SIZE, left_x:left_x+SIZE]
        
    im_A_left.append(im_l) # y:y+h, x:x+w
        
        
    right_x = random.randint(im.shape[1]//2, im.shape[1]-SIZE)
    right_y = random.randint(0, im.shape[0] - SIZE)
    im_r = im[right_y:right_y+SIZE, right_x:right_x+SIZE]
    #print(im.shape,im_l.shape,im_r.shape)
    im_A_right.append(im_r)
        

    hist_A_left.append(cv2.calcHist([im_l], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))
    hist_A_right.append(cv2.calcHist([im_r], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))
    
    
print(len(im_A_left))   

29757


In [4]:
im_B_left = []
im_B_right = []
hist_B_left = []
hist_B_right = []

for i in range(max(len(train_part_B), 280)): 
    im = cv2.imread(train_part_B[i])    
    im = cv2.resize(im, (W_least, H_least))  # resize the images that are not large enough to (300, 500)
    left_x = random.randint(0, im.shape[1]//2 - 224)  # Crop out boundaries (rather than simply cut image into halves) to avoid simply continuity clue
    left_y = random.randint(0, im.shape[0] - 224)
    im_l = im[left_y:left_y+224, left_x:left_x+224]
    im_B_left.append(im_l) # y:y+h, x:x+w
        
        
    right_x = random.randint(im.shape[1]//2, im.shape[1]-224)
    right_y = random.randint(0, im.shape[0] - 224)
    im_r = im[right_y:right_y+224, right_x:right_x+224]
    im_B_right.append(im_r)
        #print(im.shape,im_l.shape,im_r.shape)

    hist_B_left.append(cv2.calcHist([im_l], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))
    hist_B_right.append(cv2.calcHist([im_r], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]))  
    
print(len(hist_B_left))  

69435


3.	Build a task case for each image in part-A:

o	randomly pick a half (left/right) as the reference image and the other half as the ground-truth target image 

o	from all images in part-B (only consider the "correct" side, e.g. if the reference image is from the left side, use only right side of the part-B images), randomly sample (#Choices-1) of them that are among the top K in terms of color histogram similarity from the GT target image. 

o	save the task as task_i = <ref_img, <choices>, gt_idx>, where <choices> are shuffled (#Choices-1) image halves from part-B together with the target image, and 'gt_idx' is the index of the target image within <choices>. 




In [5]:
num_ref = len(im_A_left)
print(num_ref)
num_B = len(im_B_left)
compare_method = 3 # means that the smaller the compareHist, the similar
num_choice = 10
K = 100

task_ref = []  # 
task_tgt = []  #
task_choices_minus_1 = []

for i_ref in range(num_ref):
    lr = np.random.randint(2)
    if lr == 0:  # pick left half as the reference image
        task_ref.append(-1*i_ref)  # negative value means ref images are picked from left
        task_tgt.append(i_ref)     # positive value means tgt images are picked from right
        
        hist_Comp = []
        for j in range(num_B):     # ref in A_left, choices in B_right
            halfi_halfj = cv2.compareHist(hist_A_left[i_ref], hist_B_right[j], compare_method)
            hist_Comp.append(halfi_halfj)
        
        # the top K in terms of color histogram similarity from the GT target image.
        matchest_topK = np.argpartition(np.array(hist_Comp),K)[:K]  
        # randomly sample (#Choices-1) among the top K
        arg_choices_minus_1 = np.random.choice(K, num_choice-1, replace=False)
        choices_minus_1 = matchest_topK[arg_choices_minus_1]
        task_choices_minus_1.append(choices_minus_1)      # choices in B_right, positive
    
    else:        # pick right half as the reference image
        task_ref.append(i_ref)          # positive value means ref images are picked from right
        task_tgt.append(-1*i_ref)       # negative value means tgt images are picked from left
        
        hist_Comp = []
        for j in range(num_B):         # ref in A_right, choices in B_left
            halfi_halfj = cv2.compareHist(hist_A_right[i_ref], hist_B_left[j], compare_method)
            hist_Comp.append(halfi_halfj)
        
        matchest_topK = np.argpartition(np.array(hist_Comp),K)[:K]  
        arg_choices_minus_1 = np.random.choice(K, num_choice-1, replace=False)
        choices_minus_1 = matchest_topK[arg_choices_minus_1]
        task_choices_minus_1.append(choices_minus_1 * (-1))  # choices in B_left, negative value

29757


In [6]:
for i in range(min(num_ref,5)):
    print(task_ref[i], task_tgt[i], task_choices_minus_1[i])

0 0 [-23134  -2334 -57183 -44087 -23188 -18203 -30691 -67231 -15543]
1 -1 [-52323 -24988 -60274 -14685  -3763 -33755 -68610   -141 -41427]
2 -2 [-18096 -59367 -27658 -53748 -48471 -15750 -61249  -1025  -4429]
3 -3 [-46939 -67862 -30855 -51176 -42730 -40369 -21936  -2395 -10888]
4 -4 [-39997 -44314 -35535 -44420 -33556  -1347  -8764 -67861 -41674]


In [7]:
'''np.save('train_im_A_left', im_A_left)
np.save('train_im_B_left', im_B_left)
np.save('train_im_A_right', im_A_right)
np.save('train_im_B_right', im_B_right)

np.save('train_hist_A_left', hist_A_left)
np.save('train_hist_B_left', hist_B_left)
np.save('train_hist_A_right', hist_A_right)
np.save('train_hist_B_right', hist_B_right)

np.save('train_task_choices_minus_1',task_choices_minus_1 )'''
# all deleted 

"np.save('train_im_A_left', im_A_left)\nnp.save('train_im_B_left', im_B_left)\nnp.save('train_im_A_right', im_A_right)\nnp.save('train_im_B_right', im_B_right)\n\nnp.save('train_hist_A_left', hist_A_left)\nnp.save('train_hist_B_left', hist_B_left)\nnp.save('train_hist_A_right', hist_A_right)\nnp.save('train_hist_B_right', hist_B_right)\n\nnp.save('train_task_choices_minus_1',task_choices_minus_1 )"

In [8]:
'''np.save('train_task_ref.npy',task_ref )
np.save('train_task_tgt.npy',task_tgt )'''

"np.save('train_task_ref.npy',task_ref )\nnp.save('train_task_tgt.npy',task_tgt )"

A few task samples once you are done with this. We might also want to use more carefully picked images (indoor scenes, e.g. from topic models, or scenes in general) later, depending on how challenging this initial benchmark looks like. 

A good folder structure can be having one folder for each sample containing the reference image together with all choices: 
sample_id/
 - reference.png
 - choice_0.png
 - choice_1.png
 ...
 - choice_9.png

The ground-truth can be saved in a single .txt file with two columns: sample_id and true_choice_idx. 


In [9]:
task_num = len(im_A_left)
print(task_num)

samples_num = 16 # take samples_num of samples to show the results
len_sample_id = 9  # e.g 000000666

gt_train = []

import os
if not os.path.exists('./task_train'):
    os.mkdir('./task_train')

29757


In [10]:
for i in range(max(task_num, samples_num)):
    str_i = '0'*(len_sample_id-len(str(i))) + str(i) 
    
    if task_ref[i] < 0 or (task_ref[i] == 0 and sum(task_choices_minus_1[i])>0): # ref in A_left, ground_truth in A_right, target choices in B_right
        if not os.path.exists('./task_train/'+str_i):
            os.mkdir('./task_train/'+str_i)
            
        cv2.imwrite('./task_train/'+str_i+"/reference" + ".jpg", im_A_left[i])
        gt_id = random.randint(0, num_choice-1) # randomly set up gt_id from {0, 1, 2, ...., num_choice-1}
        
        cv2.imwrite('./task_train/'+str_i+"/choice_" + str(gt_id) + ".jpg", im_A_right[i])
        gt_train.append([str_i, str(gt_id)])
        
        for choice_i in range(num_choice):
            if choice_i < gt_id:
                cv2.imwrite('./task_train/'+str_i+"/choice_"+str(choice_i)+ ".jpg", im_B_right[task_choices_minus_1[i][choice_i]])
            elif choice_i > gt_id:
                cv2.imwrite('./task_train/'+str_i+"/choice_"+str(choice_i)+ ".jpg", im_B_right[task_choices_minus_1[i][choice_i-1]])
        
    
    elif task_ref[i] > 0 or (task_ref[i] == 0 and sum(task_choices_minus_1[i])<0) : # ref in A_right, ground_truth in A_left, target choices in B_left
        if not os.path.exists('./task_train/'+str_i):
            os.mkdir('./task_train/'+str_i)
            
        cv2.imwrite('./task_train/'+str_i+"/reference" + ".jpg", im_A_right[i])
        gt_id = random.randint(0, num_choice-1) # randomly set up gt_id from {0, 1, 2, ...., num_choice-1}
        
        cv2.imwrite('./task_train/'+str_i+"/choice_" + str(gt_id) + ".jpg", im_A_left[i])
        gt_train.append([str_i, str(gt_id)])
        
        for choice_i in range(num_choice):
            if choice_i < gt_id:
                cv2.imwrite('./task_train/'+str_i+"/choice_"+str(choice_i)+ ".jpg", im_B_left[task_choices_minus_1[i][choice_i] * (-1)])
            elif choice_i > gt_id:
                cv2.imwrite('./task_train/'+str_i+"/choice_"+str(choice_i)+ ".jpg", im_B_left[task_choices_minus_1[i][choice_i-1] * (-1)])  

In [11]:
print(gt_train[:16])
print(len(gt_train))

[['000000000', '1'], ['000000001', '8'], ['000000002', '9'], ['000000003', '0'], ['000000004', '7'], ['000000005', '9'], ['000000006', '4'], ['000000007', '2'], ['000000008', '9'], ['000000009', '6'], ['000000010', '5'], ['000000011', '4'], ['000000012', '1'], ['000000013', '6'], ['000000014', '7'], ['000000015', '6']]
29757


In [12]:
import csv

with open("gt_train.csv","w+") as my_csv:            # writing the file as my_csv
    csvWriter = csv.writer(my_csv,delimiter=',')  # using the csv module to write the file
    csvWriter.writerows(gt_train)     