# Performing PCA on colorspace 
Let's see if PCA can capture the color information we need.

In [24]:
import os
import cv2
import pickle
import numpy as np
from tqdm.notebook import tqdm
from imutils import resize
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA

In [9]:
# Image paths
test_path = '../../data/test_images/test_image_1.jpeg'
path_1 = '../../data/Biscuits/400018.jpg'
path_2 = '../../data/DentalCare/100004.jpg'

test_img = cv2.imread(test_path, 0)
img_1 = cv2.imread(path_1, 0)
img_2 = cv2.imread(path_2, 0)

img_1 = resize(img_1, width=960)
img_2 = resize(img_2, width=960)

In [15]:
pca = PCA(n_components=30)
test_img_pca = pca.fit_transform(test_img)
img_1_pca = pca.fit_transform(img_1)
img_2_pca = pca.fit_transform(img_2)

test_img_pca = test_img_pca/np.linalg.norm(test_img_pca)
img_1_pca = img_1_pca/np.linalg.norm(img_1_pca)
img_2_pca = img_2_pca/np.linalg.norm(img_2_pca)

In [22]:
with open('../../saved_data/25 Jun/all_paths.pkl', 'rb') as f:
    all_paths = pickle.load(f)

In [28]:
dist_dict = {}

for path in tqdm(all_paths):
    img = cv2.imread(path, 0)
    img = resize(img, height=960)
    img_pca = pca.fit_transform(img)
    dist = np.linalg.norm(test_img_pca - img_pca)
    dist_dict.update({path: dist})
    
sorted_paths = sorted(list(dist_dict.keys()), key=lambda x: dist_dict[x])
sorted_imgs = [cv2.imread(p, 0) for p in sorted_paths[:5]]

plt.imshow(test_img, cmap='gray')
plt.show()

print("\nMatched images ...")
fig = plt.figure()
for i in range(5):
    fig.add_subplot(1, 5, i+1)
    plt.imshow(sorted_imgs[i], cmap='gray')
plt.tight_layout()
plt.show()