# Demo_OrbMatch

## Run name

In [1]:
import time

project_name = 'Google_LandMark_Rec'
step_name = 'Demo_OrbMatch'
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
run_name = project_name + '_' + step_name + '_' + time_str
print('run_name: ' + run_name)
t0 = time.time()

run_name: Google_LandMark_Rec_Demo_OrbMatch_20180509_025627


## Import PKGs

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
from IPython.display import display

import os
import gc
import math
import shutil
import zipfile
import pickle
import h5py
from PIL import Image

from tqdm import tqdm
from multiprocessing import cpu_count

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

  from ._conv import register_converters as _register_converters


## Project folders

In [3]:
cwd = os.getcwd()
input_folder = os.path.join(cwd, 'input')
output_folder = os.path.join(cwd, 'output')
model_folder = os.path.join(cwd, 'model')

org_train_folder = os.path.join(input_folder, 'org_train')
org_test_folder = os.path.join(input_folder, 'org_test')
train_folder = os.path.join(input_folder, 'data_train')
val_folder = os.path.join(input_folder, 'data_val')
test_folder = os.path.join(input_folder, 'data_test')
test_sub_folder = os.path.join(test_folder, 'test')

train_csv_file = os.path.join(input_folder, 'train.csv')
test_csv_file = os.path.join(input_folder, 'test.csv')
sample_submission_folder = os.path.join(input_folder, 'sample_submission.csv')

## Preview csv

In [5]:
train_csv = pd.read_csv(train_csv_file)
print('train_csv.shape is {0}.'.format(train_csv.shape))
display(train_csv.head(2))

test_csv = pd.read_csv(test_csv_file)
print('test_csv.shape is {0}.'.format(test_csv.shape))
display(test_csv.head(2))

train_csv.shape is (1225029, 3).


Unnamed: 0,id,url,landmark_id
0,cacf8152e2d2ae60,http://static.panoramio.com/photos/original/70...,4676
1,0a58358a2afd3e4e,http://lh6.ggpht.com/-igpT6wu0mIA/ROV8HnUuABI/...,6651


test_csv.shape is (117703, 2).


Unnamed: 0,id,url
0,000088da12d664db,https://lh3.googleusercontent.com/-k45wfamuhT8...
1,0001623c6d808702,https://lh3.googleusercontent.com/-OQ0ywv8KVIA...


In [6]:
train_id = train_csv['id']
train_landmark_id = train_csv['landmark_id']

id_2_landmark_id_dict = dict(zip(train_id, train_landmark_id))
print('len(id_2_landmark_id_dict)=%d' % len(id_2_landmark_id_dict))

index = 0
print('id: %s, \tlandmark_id:%s' % (train_id[index], id_2_landmark_id_dict[train_id[index]]))
index = 1
print('id: %s, \tlandmark_id:%s' % (train_id[index], id_2_landmark_id_dict[train_id[index]]))

len(id_2_landmark_id_dict)=1225029
id: cacf8152e2d2ae60, 	landmark_id:4676
id: 0a58358a2afd3e4e, 	landmark_id:6651


## OrbMatch

In [4]:
class OrbMatch(object):
    def __init__(
        image_file, 
        original_folder, 
        n_features=500, 
        is_crossCheck = True, 
        n_matches=100, 
        min_distance=60, 
        min_good_match=50,
        n_class = 14951,
        top=3,
    ):
        self._image_file = image_file
        self._original_folder = original_folder
        
        self._n_features = n_features
        self._is_crossCheck = is_crossCheck
        self._n_matches = n_matches
        self._min_distance = min_distance
        self._min_good_match = min_good_match
        self._n_class = n_class
        self._top = top
        
        self._key_point = None
        self._destance = None
        
        self._clf = cv2.ORB_create(self._n_features)
        self._bf  = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=self._is_crossCheck)
#         self._class_weight = class_weight
    
    def get_class_weight():
        pass
        
    def image_detect_and_compute(image_file):
        """Detect and compute interest points and their descriptors."""
        img = cv2.imread(image_file)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        kp, des = self._clf.detectAndCompute(img, None)
        return des
        
    def match():
        img = cv2.imread(image_file)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        des1 = self.image_detect_and_compute(self._image_file)
        matched_image_classes = np.zeros(self._n_class)
        
        for file_name in os.listdir(self._original_folder):
            image_file = os.path.join(self._original_folder, file_name)
            print(image_file)
            des2 = self.image_detect_and_compute(image_file)

            matches = self._bf.match(des1, des2)
#             matches = sorted(matches, key = lambda x: x.distance) # Sort matches by distance.  Best come first.
            matches = list(filter(lambda x: x.distance < self._min_distance, matches))
            print(len(matches))
            if len(matches) < self._min_good_match:
                class_indx = get_class_indx(file_name)
                matched_image_classes[class_indx] = matched_image_classes[class_indx] + 1
        matched_image_classes = matched_image_classes / sum(matched_image_classes)
        best_matches = np.argsort(matched_image_classes)[:self._top]
        return best_matches