# Color search algorithm for image matching

In [12]:
import os
import cv2
import pickle
import numpy as np
from glob import glob
from ntpath import basename
from sklearn.cluster import MiniBatchKMeans
from tqdm.notebook import tqdm

In [2]:
# Load saved files

with open('../../saved_data/13 Jun/file_color_map.pkl', 'rb') as f:
    file_color_map = pickle.load(f)
    
with open('../../saved_data/13 Jun/kmeans_clt.pkl', 'rb') as f:
    kmeans_clt = pickle.load(f)

In [4]:
# Test algorithm for images

def jaccard_score(list_1, list_2):
    intersection = set(list_1).intersection(list_2)
    union = set(list_1).union(list_2)
    return len(intersection)/len(union)

paths = list(file_color_map.keys())
correct = 0

for i, path in tqdm(enumerate(paths)):
    # Read and prepare the image
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.reshape(img.shape[0]*img.shape[1], -1)
    # Fit mini batch kmeans and generate colors
    clt = MiniBatchKMeans(n_clusters=5)
    clt.fit(img)
    # Use kmeans_clt to predict color classes
    labels = kmeans_clt.predict(clt.cluster_centers_)
    # Find the image with maximum intersection
    max_index = np.argmax([jaccard_score(v, labels) for v in file_color_map.values()])
    if max_index == i:
        correct += 1
        
print("Accuracy: {:.2f}".format(correct*100./len(paths)))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Accuracy: 1.97


In [14]:
def list_join(l):
    return ''.join([str(k) for k in l])

color_vectors = list(file_color_map.values())
color_file_map = {list_join(v): k for k, v in file_color_map.items()}

def diagnose(paths, num_samples):
    idx = np.random.choice(np.arange(0, len(paths)), size=num_samples)
    for i in idx:
        img = cv2.imread(paths[i])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.reshape(img.shape[0]*img.shape[1], -1)
        clt = MiniBatchKMeans(n_clusters=5)
        clt.fit(img)
        
        labels = kmeans_clt.predict(clt.cluster_centers_)
        max_index = np.argmax([jaccard_score(v, labels) for v in file_color_map.values()])
        
        print("Input vector: {}".format(labels))
        print("Matched vector: {} ({})".format(
            color_vectors[max_index], color_file_map[list_join(color_vectors[max_index])]
        ))
        print("Target vector: {} ({})".format(
            color_vectors[i], color_file_map[list_join(color_vectors[i])]
        ))
        print("Calculated Jaccard score: {}".format(jaccard_score(labels, color_vectors[max_index])))
        print("\n------------------------------------------\n")
        
        
# Diagnose
diagnose(paths, num_samples=5)

Input vector: [3 2 0 1 2]
Matched vector: [1, 0, 2, 3, 2] (../../data/Snacks/700191.jpg)
Target vector: [1, 3, 2, 2, 0] (../../data/IceCreams/120464.jpg)
Calculated Jaccard score: 1.0

------------------------------------------

Input vector: [1 3 3 4 2]
Matched vector: [1, 2, 1, 3, 4] (../../data/Snacks/700245.jpg)
Target vector: [3, 1, 4, 3, 2] (../../data/HairCare/300208.jpg)
Calculated Jaccard score: 1.0

------------------------------------------

Input vector: [1 4 2 1 3]
Matched vector: [1, 2, 1, 3, 4] (../../data/Snacks/700245.jpg)
Target vector: [1, 2, 3, 4, 1] (../../data/HairCare/300290.jpg)
Calculated Jaccard score: 1.0

------------------------------------------

Input vector: [1 4 4 3 2]
Matched vector: [1, 2, 1, 3, 4] (../../data/Snacks/700245.jpg)
Target vector: [4, 1, 4, 3, 2] (../../data/Soaps/200269.jpg)
Calculated Jaccard score: 1.0

------------------------------------------

Input vector: [1 4 2 3 1]
Matched vector: [1, 2, 1, 3, 4] (../../data/Snacks/700245.jpg)
T