Load needed libraries

In [None]:
from google.colab import drive, files
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install opencv-contrib-python==4.4.0.44

Collecting opencv-contrib-python==4.4.0.44
  Downloading opencv_contrib_python-4.4.0.44-cp37-cp37m-manylinux2014_x86_64.whl (55.7 MB)
[K     |████████████████████████████████| 55.7 MB 1.2 MB/s 
Installing collected packages: opencv-contrib-python
  Attempting uninstall: opencv-contrib-python
    Found existing installation: opencv-contrib-python 4.1.2.30
    Uninstalling opencv-contrib-python-4.1.2.30:
      Successfully uninstalled opencv-contrib-python-4.1.2.30
Successfully installed opencv-contrib-python-4.4.0.44


In [None]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
from sklearn.cluster import MeanShift, estimate_bandwidth
import warnings
warnings.filterwarnings('ignore')

STEP A: 
Test on scene image: {e1.png, e2.png, e3.png, e4.png, e5.png}

Use product images: {0.jpg, 1.jpg, 11.jpg, 19.jpg, 24.jpg, 26.jpg, 25.jpg}

In [None]:
scene_paths = ["e1.png","e2.png","e3.png","e4.png","e5.png"]
model_paths = ["0.jpg","1.jpg","11.jpg","19.jpg","24.jpg","26.jpg","25.jpg"]


def load_images(paths,dir):
  return [cv2.cvtColor(cv2.imread("/content/gdrive/My Drive/CV Project/"+dir+path),cv2.COLOR_BGR2RGB) for path in paths]
  
# Load scene and model images
train_images = load_images(scene_paths,"scenes/")
query_images = load_images(model_paths,"models/")

# Compute keypoints and descriptors
sift = cv2.SIFT_create()

train_features = {index:sift.detectAndCompute(train_image,None) for index,train_image in enumerate(train_images)}
query_features = {index:sift.detectAndCompute(query_image,None) for index,query_image in enumerate(query_images)}

In [None]:
train_features[0][0]

In [None]:
def compute_matches(FLANN_INDEX_KDTREE,trees,checks,k,lowe,des_query,des_train):
  index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = trees)
  search_params = dict(checks = checks)
  flann = cv2.FlannBasedMatcher(index_params, search_params)
  matches = flann.knnMatch(des_query,des_train,k=2)
  # store all the good matches as per Lowe's ratio test.
  good = []
  for m,n in matches:
      if m.distance < lowe*n.distance:
          good.append(m)
  return good

# Get the dominant color in the image with Kmeans clustering
def get_dom_color(img_in):
  Z = img_in.reshape((-1,3))
  Z = np.float32(Z)
  # define criteria, number of clusters(K) and apply kmeans()
  criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
  K = 1
  ret,label,center=cv2.kmeans(Z,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
  # Now convert back into uint8, and make original image
  center = np.uint8(center)
  res = center[label.flatten()]
  res2 = res.reshape((img_in.shape))
  return res2[0][0]

def get_roi(x1,x2,x3,x4,y1,y2,y3,y4,img):
  top_left_x = int(max(0,min([x1,x2,x3,x4])))
  top_left_y = int(max(0, min([y1,y2,y3,y4])))
  bot_right_x = int(max([x1,x2,x3,x4]))
  bot_right_y = int(max([y1,y2,y3,y4]))
  return img[top_left_y:bot_right_y, top_left_x:bot_right_x]

def cluster_kp(keypoints, quantile = 0.22):
      x = np.array([keypoint.pt[0] for keypoint in keypoints]).reshape(-1,1) 
      bandwidth = estimate_bandwidth(x, quantile=0.22)
      ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True)
      ms.fit(x)
      return ms.labels_, len(np.unique(ms.labels_))


In [None]:
def object_retrieve(img_query, img_train, query_features, train_features, min_match_count = 50, COLOR_DIFF_THRESHOLD = 50, verbose = False):
    # Partition the keypoints of train image into clusters
    labels,n_clusters = cluster_kp(train_features[0])
    # We analyze each cluster 
    for i in range(n_clusters):
      # Compute the matches on the subset of keypoints
      key_train_local = np.array(train_features[0])[labels == i]
      des_train_local = np.array(train_features[1])[labels == i]
      good = compute_matches(0,5,50,2,0.55,query_features[1],des_train_local)
      # If it's a good match, then proceed with the detection
      if len(good)>min_match_count:
          src_pts = np.float32([query_features[0][m.queryIdx].pt for m in good ]).reshape(-1,1,2)
          dst_pts = np.float32([key_train_local[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
          M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 2)
          if M is None:
              print ("No Homography was found")
          else:
              h,w,_ = img_query.shape
              # Generate and plot the rectangle into the target image
              pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
              dst = cv2.perspectiveTransform(pts,M)
              roi = get_roi(dst[0][0][0],dst[1][0][0],dst[2][0][0],dst[3][0][0],dst[0][0][1],dst[1][0][1],dst[2][0][1],dst[3][0][1],img_train)
              # If we can manage to obtain a rectangle, then we proceed
              if len(roi):
                # check if the euclidean distance between the colors (mean) of the two rectangle is lower than the chosen threshold
                color_diff = np.linalg.norm(roi.mean(axis = 0).mean(axis = 0) - img_query.mean(axis = 0).mean(axis = 0))
                if color_diff <= COLOR_DIFF_THRESHOLD:
                  print("The object was found")
                  return True
                else :
                  print("A match was discarded")
      else:
          print("Not enough matches were found")

    return False

pred_labels = {}
# Iterating among all scenes and looking for the query object.
for index_train  in range(len(train_images)):
  scene_labels = []
  for index_query in range(len(query_images)):
    scene_labels.append(object_retrieve(query_images[index_query], train_images[index_train], query_features[index_query], train_features[index_train]))
  pred_labels[index_train] = scene_labels




In [None]:
true_labels = {
    0: [1,0,1,0,0,0,0],
    1: [0,0,0,0,1,1,1],
    2: [1,1,1,0,0,0,0],
    3: [1,0,1,0,0,1,1],
    4: [0,0,0,1,0,0,1],
}
print(true_labels == pred_labels)

True
