# Import Requirements

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys

import os
import pandas as pd
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial import cKDTree
from skimage.feature import plot_matches
from skimage.measure import ransac
from skimage.transform import AffineTransform
from PIL import Image
import imagehash
import time
import tensorflow as tf

from tensorflow.python.platform import app
from delf import feature_io

cmd_args = None

_DISTANCE_THRESHOLD = 0.8


# Setup for recognition

Train list based on hash

In [None]:
train = pd.read_csv("train_with_hash.csv")

In [None]:
train.head()

In [None]:
train_list = [(row[6],row[4]) for row in train.itertuples()]

In [None]:
train_list[:10]

In [None]:
train_list.sort()

In [None]:
train_list[:10]

In [None]:
df_tl = pd.DataFrame(train_list,columns=['Hash','landmark_id'])

In [None]:
df_tl.head()

In [None]:
df_tl.to_csv("train_list.csv")

Test list based on hash

In [None]:
test = pd.read_csv("test_with_hash.csv")

In [None]:
test.head()

In [None]:
test_list = [(row[4],row[2]) for row in test.itertuples()]

In [None]:
test_list[:10]

In [None]:
test_list.sort()

In [None]:
test_list[5000:5010]

In [None]:
df_test_list = pd.DataFrame(test_list,columns=['Hash','id'])

In [None]:
df_test_list.to_csv("test_list.csv")

# Delf for comparison

Function to compare 2 files using DELF features

In [None]:
def delf_compare(file1,file2):
    try:
        # Read features.
        locations_1, _, descriptors_1, _, _ = feature_io.ReadFromFile(file1)
        num_features_1 = locations_1.shape[0]
        locations_2, _, descriptors_2, _, _ = feature_io.ReadFromFile(file2)
        num_features_2 = locations_2.shape[0]

        # Find nearest-neighbor matches using a KD tree.
        d1_tree = cKDTree(descriptors_1)
        _, indices = d1_tree.query(
          descriptors_2, distance_upper_bound=_DISTANCE_THRESHOLD)

        # Select feature locations for putative matches.
        locations_2_to_use = np.array([
          locations_2[i,]
          for i in range(num_features_2)
          if indices[i] != num_features_1
        ])
        locations_1_to_use = np.array([
          locations_1[indices[i],]
          for i in range(num_features_2)
          if indices[i] != num_features_1
        ])

        # Perform geometric verification using RANSAC.
        _, inliers = ransac(
          (locations_1_to_use, locations_2_to_use),
          AffineTransform,
          min_samples=3,
          residual_threshold=20,
          max_trials=1000)

        answer = sum(inliers)
        confidence = (2*answer)/(num_features_1+num_features_2)
        #print ('Of %d and %d ,found %d inliers' % (num_features_1,num_features_2,answer))
        return (answer,confidence)
    except:
        return (0,0.01)

In [None]:
df1 = pd.read_csv("train_list.csv")

In [None]:
df1.head()

In [None]:
train_list = df1[['Hash','landmark_id']].values.tolist()

In [None]:
train_list[:10]

In [None]:
df2 = pd.read_csv("test_list.csv")

In [None]:
df2.head()

In [None]:
test_list = df2[['Hash','id']].values.tolist()

In [None]:
test_list[:5]

# Performing Landmark Recognition

In [None]:
test_delf = "/beegfs/ss8464/code/data/query_features/"
train_delf = "/beegfs/ss8464/code/train_features/"

In [None]:
train = pd.read_csv("train_with_hash.csv")

In [None]:
if (os.path.isfile("test_landmarks.csv")):
    landmarks = pd.read_csv("test_landmarks.csv",names = ["index","id","landmarks"])
    del landmarks["index"]
    landmarks = landmarks[1:]
    test_landmarks = landmarks[["id","landmarks"]].values.tolist()
else:
    test_landmarks = []

Determining the closest estimates for an image and determining the landmark class.

In [None]:
def get_landmark(Hash,Id,pos):
    while(Hash>train_list[pos][0]):
        pos += 1
    d = {}
    for x in range(1,min(1000,pos)):
        h,i = train_list[pos-x]
        if(i in d.keys()):
            d[i] += (1/x)
        else:
            d[i] = (1/x)
    for x in range(1,min(1000,len(train_list)-pos)):
        h,i = train_list[pos+x]
        if(i in d.keys()):
            d[i] += (1/x)
        else:
            d[i] = (1/x)
    temp = [(d[i],i) for i in d.keys()]
    temp.sort()
    temp = temp[:10]
    mxval = 0
    mxcon = 0.01
    for val,lm in temp:
        Id1 = train[train['landmark_id']==lm]['id'].iloc[0]
        check,confidence = delf_compare(test_delf+Id+".delf",train_delf+Id1+".delf")
        if(check>=20):
            return (str(lm)+" "+str(confidence),pos)
        if(check>mxval):
            mxval = check
            answer = str(lm)+" "+str(confidence)
        if(mxval<5):
            answer = "-1 0.01"
    return (answer,pos)

All the test images classified using DELF features in a loop

In [None]:
pos = 0
start = time.time()
for i in range(len(test_landmarks),len(test_list)):
    if(i%100==0):
        landmarks = pd.DataFrame(test_landmarks,columns=["id","landmarks"])
        landmarks.to_csv("test_landmarks.csv")
        print (time.time()-start)
    Hash,Id = test_list[i]
    if (not os.path.isfile(test_delf+str(Id)+".delf")):
        test_landmarks.append([Id,-2])
        continue
    H_prev,Id_prev = test_list[i-1]
    try:
        check,confidence = delf_compare(test_delf+str(Id)+".delf",test_delf+str(Id_prev)+".delf")
        if(check>15):
            test_landmarks.append([Id,str(test_landmarks[-1][1])+" "+str(confidence)])
        else:
            landmark,pos = get_landmark(Hash,Id,pos)
            test_landmarks.append([Id,landmark])
    except:
        landmark,pos = get_landmark(Hash,Id,pos)
        test_landmarks.append([Id,landmark])