In [1]:
#ref: WorkSheet 7 & 8
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
from tqdm import tqdm
import pandas as pd
import re
import csv
rootpath='./'
train_path = './train'
test_path = './test'

In [3]:
train_dir = [i for i in os.listdir("train") if re.match(r'(.*?.jpg)',i)]
test_dir = [i for i in os.listdir("test") if re.match(r'(.*?.jpg)',i)]

In [4]:
sift = cv2.SIFT_create()
orb = cv2.ORB_create()

In [7]:
def orb_res():
    kps = {}
    dess = {}
    for i in tqdm(train_dir):
        target_img = cv2.imread(os.path.join(train_path,i),0)
        kp, des =orb.detectAndCompute(target_img,None)
        kps[i] = kp
        dess[i] = des
    return kps,dess

In [8]:
def orb_find_top(img_path, target,num):
    res = {}
    img = cv2.imread(os.path.join(test_path,img_path), 0)
    kp1,des1 = orb.detectAndCompute(img,None)
    for i in target:
        target_img = cv2.imread(os.path.join(train_path,i),0)
        kp2 = kps[i]
        des2 = dess[i]
        matcher = cv2.BFMatcher()
        if(len(kp1)>=2 and len(kp2)>=2):
            matches = matcher.knnMatch(des1,des2,k=2)
            matchesMask = []
            good = []
            for m,n in matches:
                if m.distance < 0.7*n.distance:
                    good.append(m)
                    matchesMask.append([1,0]) # Match
                else:
                    matchesMask.append([0,0]) # Mismatch           
            res[i] = len(good)/len(matchesMask)
        else:
            res[i]=0
    return sorted(res.items(), key = lambda x:(x[1],x[0]), reverse=True)[:num]

In [9]:
# img_test = "IMG4318_2.jpg"
# top_50 = orb_find_top(img_test,train_dir,200)
# top_10 = find_top_n(img_test, top_50,50)
# b = find_best(top_10,img_test)[0]
# plt.figure(figsize=(15, 15))
# img = cv2.imread(os.path.join(train_path,b))
# img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
# plt.imshow(img)
# plt.axis('off')
# plt.show()

In [10]:
# for i in top_10:
#     plt.figure(figsize=(15, 15))
#     img = cv2.imread(os.path.join(train_path,i[0]))
#     img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
#     plt.imshow(img)
#     plt.axis('off')
#     plt.show()

In [11]:
def sift_res():
    kps = {}
    dess = {}
    for i in tqdm(train_dir):
        target_img = cv2.imread(os.path.join(train_path,i),0)
        kp, des =sift.detectAndCompute(target_img,None)
        kps[i] = kp
        dess[i] = des
    return kps,dess

In [12]:
def find_top_n(img_path, target,num):
    res = {}
    img = cv2.imread(os.path.join(test_path,img_path),cv2.IMREAD_GRAYSCALE)
    kp1,des1 = sift.detectAndCompute(img,None)
    for i in target:
        target_img = cv2.imread(os.path.join(train_path,i[0]),cv2.IMREAD_GRAYSCALE)
        kp2 = kps_sift[i[0]]
        des2 = dess_sift[i[0]]
        # FLANN parameters and initialize
        FLANN_INDEX_KDTREE = 1
        index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
        search_params = dict(checks=50)   # or pass empty dictionary
        flann = cv2.FlannBasedMatcher(index_params,search_params)
        if(len(kp1)>=2 and len(kp2)>=2):
            # Matching descriptor using KNN algorithm
            matches = flann.knnMatch(des1, des2, k=2) 
            # Create a mask to draw all good matches
            matchesMask = []
            # Store all good matches as per Lowe's Ratio test.
            good = []
            for m,n in matches:
                if m.distance < 0.7*n.distance:
                    good.append(m)
                    matchesMask.append([1,0]) # Match
                else:
                    matchesMask.append([0,0]) # Mismatch
            # Print total number of good matches between two images
            res[i[0]] = len(good)/len(matchesMask)
    return sorted(res.items(), key = lambda x:(x[1],x[0]), reverse=True)[:num]

In [27]:
def find_best(top_n,img_path):
    ret = []
    for i in top_n:
        img = cv2.imread(os.path.join(test_path,img_path),0)
        candidate = cv2.imread(os.path.join(train_path,i[0]),0)
        kp1, des1 = sift.detectAndCompute(img,None)
        kp2 = kps_sift[i[0]]
        des2 = dess_sift[i[0]]
        # FLANN parameters and initialize
        FLANN_INDEX_KDTREE = 1
        index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
        search_params = dict(checks=50)   # or pass empty dictionary
        flann = cv2.FlannBasedMatcher(index_params,search_params)
        # Matching descriptor using KNN algorithm
        matches = flann.knnMatch(des1,des2,k=2)
        # Create a mask to draw all good matches
        matchesMask = []
        # Store all good matches as per Lowe's Ratio test.
        good = []
        for m,n in matches:
            if m.distance < 0.7*n.distance:
                good.append(m)
                matchesMask.append([1,0]) # Match
            else:
                matchesMask.append([0,0]) # Mismatch
        # Now we set a condition that at least N matches (defined by MIN_MATCH_NUM) are required to find the object. 
        MIN_MATCH_NUM = 4
        if len(good)> MIN_MATCH_NUM:
            # If enough matches are found, we extract the positions of the matched keypoints in both images. 
            # They are passed to find the perspective transformation. 

            # Estimate homography between two images
            ptsA = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1,1,2)
            ptsB = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1,1,2)
            H, status = cv2.findHomography(ptsA, 
                                           ptsB, 
                                           cv2.RANSAC, 
                                           ransacReprojThreshold = 5, 
                                           maxIters = 10) # try to change maxIters and see the effect
            success = status.ravel().tolist()
            #Threshold to avoid match wall
            if(success.count(1)/len(good) > 0.15):
                ret.append(i[0])
                if len(ret)>1:
                    return ret
    if len(ret)<1:
        ret.append(top_n[0][0])
        ret.append(top_n[1][0])
    return ret

In [26]:
train_csv = pd.read_csv("train.csv")
image_names = pd.read_csv("imagenames.csv").values.flatten()
def main_func(hash_num=1000,flann_num=50):
    errors = []
    for test_file in tqdm(test_dir):
        #test_file+=".jpg"
        #pHash_top = rank_pic_by_phash(test_file,hash_num,hash_values)
        orb_top = orb_find_top(test_file,train_dir,200)
        print(orb_top)
        flann_top = find_top_n(test_file,orb_top,flann_num)
        best_match = find_best(flann_top,test_file)
        if len(best_match)==1:
            best_match = re.sub(".jpg","",best_match[0])
            x = train_csv.loc[train_csv["id"]==best_match]["x"].values[0]
            y = train_csv.loc[train_csv["id"]==best_match]["y"].values[0]
            f = open('test.csv','a')
            writer = csv.writer(f)
            test_file = re.sub(".jpg","",test_file)
            writer.writerow((test_file,x,y,best_match,0))
            f.close()
        elif len(best_match)==2:
            best_match1 = re.sub(".jpg","",best_match[1])
            best_match = re.sub(".jpg","",best_match[0])
            x = train_csv.loc[train_csv["id"]==best_match]["x"].values[0]
            y = train_csv.loc[train_csv["id"]==best_match]["y"].values[0]
            f = open('test.csv','a')
            writer = csv.writer(f)
            test_file = re.sub(".jpg","",test_file)
            writer.writerow((test_file,x,y,best_match,best_match1))
            f.close()
        else:
            errors.append(test_file)
            print(test_file)
    return errors

In [17]:
kps, dess = orb_res()
kps_sift, dess_sift = sift_res()

100%|██████████| 7500/7500 [01:11<00:00, 105.29it/s]
100%|██████████| 7500/7500 [06:42<00:00, 18.62it/s]


In [25]:
err = main_func()

  0%|          | 0/2 [00:00<?, ?it/s]

[('IMG4243_5.jpg', 0), ('IMG4243_4.jpg', 0), ('IMG4243_3.jpg', 0), ('IMG4243_2.jpg', 0), ('IMG4243_1.jpg', 0), ('IMG4242_5.jpg', 0), ('IMG4242_4.jpg', 0), ('IMG4242_3.jpg', 0), ('IMG4242_2.jpg', 0), ('IMG4242_1.jpg', 0), ('IMG4241_5.jpg', 0), ('IMG4241_4.jpg', 0), ('IMG4241_3.jpg', 0), ('IMG4241_2.jpg', 0), ('IMG4241_1.jpg', 0), ('IMG4240_5.jpg', 0), ('IMG4240_4.jpg', 0), ('IMG4240_3.jpg', 0), ('IMG4240_2.jpg', 0), ('IMG4240_1.jpg', 0), ('IMG4239_5.jpg', 0), ('IMG4239_4.jpg', 0), ('IMG4239_3.jpg', 0), ('IMG4239_2.jpg', 0), ('IMG4239_1.jpg', 0), ('IMG4238_5.jpg', 0), ('IMG4238_4.jpg', 0), ('IMG4238_3.jpg', 0), ('IMG4238_2.jpg', 0), ('IMG4238_1.jpg', 0), ('IMG4237_5.jpg', 0), ('IMG4237_4.jpg', 0), ('IMG4237_3.jpg', 0), ('IMG4237_2.jpg', 0), ('IMG4237_1.jpg', 0), ('IMG4236_5.jpg', 0), ('IMG4236_4.jpg', 0), ('IMG4236_3.jpg', 0), ('IMG4236_2.jpg', 0), ('IMG4236_1.jpg', 0), ('IMG4235_5.jpg', 0), ('IMG4235_4.jpg', 0), ('IMG4235_3.jpg', 0), ('IMG4235_2.jpg', 0), ('IMG4235_1.jpg', 0), ('IMG4234

  0%|          | 0/2 [00:13<?, ?it/s]

[]





IndexError: list index out of range