In [1]:
import numpy as np
import cv2 as cv
import os

In [2]:
path = "images-photographes"

In [3]:
def load_dataset(dir_sc, images_per_class=None):
    inames = []
    ilabels = []
    cnames = sorted(os.listdir(dir_sc))
    for ilabel, cl in enumerate(cnames):
        dir_cl = os.path.join(dir_sc, cl)
        for iname in os.listdir(dir_cl)[:images_per_class]:
            inames.append(os.path.join(cl, iname))
            ilabels.append(ilabel)
    ilabels = np.array(ilabels)
    return inames, ilabels, cnames


In [9]:
def ComputeSift(I,p):
    gray= cv.cvtColor(I,cv.COLOR_BGR2GRAY)
    sift = cv.SIFT_create()
    kp, des = sift.detectAndCompute(gray,None)
    img = cv.drawKeypoints(gray,kp,I,flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
    cv.imwrite(os.path.join('Sifts',p), img)
    #print(os.path.join('Sifts',p))
    #cv.imshow('First Image',img)
    #cv.waitKey()
    #cv.destroyAllWindows()
    return kp, des

In [5]:
def ComputeSiftDataset(inames):
    keypoints = []
    descriptors = []
    for x in inames:
        p = os.path.join(path, x)
        #print(p)
        img = cv.imread(p)
        kp, des = ComputeSift(img, x)
        keypoints.append(kp)
        descriptors.append(des)
    
    return keypoints, descriptors
        
    

In [7]:
inames, ilabels, class_names = load_dataset(path)

In [8]:
class_names

['daido-moriyama',
 'dorothea-lange',
 'felix-nadar',
 'henri-cartier-bresson',
 'irving-penn',
 'malik-sidibe',
 'marc-riboud',
 'seidou-keita',
 'shomei-tomatsu',
 'walter-evans']

In [17]:
ilabels.shape

(280,)

In [10]:
keypoints, descriptors = ComputeSiftDataset(inames)

In [16]:
len(descriptors)

280

In [13]:
from sklearn.cluster import KMeans

In [21]:
def compute_split(length, seed=1337, pc=0.80):
    train_ids = np.random.RandomState(seed=seed).choice(
        length,
        size=int(length * pc),
        replace=False)
    test_ids = np.array(list(set(np.arange(length)) - set(train_ids)))
    return train_ids, test_ids

In [22]:
def compute_visual_dict(sift, n_clusters=1000, n_init=1, verbose=1):
    # reorder data
    dim_sift = sift[0].shape[-1]
    sift = [s.reshape(-1, dim_sift) for s in sift]
    sift = np.concatenate(sift, axis=0)
    # remove zero vectors
    keep = ~np.all(sift==0, axis=1)
    sift = sift[keep]
    # randomly pick sift
    ids, _ = compute_split(sift.shape[0], pc=0.05)
    sift = sift[ids]

    zeros_vect = np.zeros((128))
    kmeans = KMeans(n_clusters=n_clusters).fit(sift)
    centers = kmeans.cluster_centers_
    np.append(centers, zeros_vect)
    vdict = centers

    return vdict

In [None]:
vdict = compute_visual_dict(descriptors)
vdict

In [None]:
vdict.shape

In [None]:
import matplotlib.pyplot as plt
def display_images(images):
    n_images,w,h = images.shape
    n = int(np.ceil(np.sqrt(n_images)))
    im = np.zeros((n*w, n*h))
    for k in range(n_images):
        i = k % n
        j = k // n
        im[i*w:i*w+w, j*h:j*h+h] = images[k]

    mplt.figure(figsize=(0.7*n,0.7*n))
    mplt.gray()
    mplt.imshow(im)
    mplt.axis('off')
    mplt.show()
    

In [None]:
indexes = np.random.randint(0, len(inames), 30)
sifts = [sifts_list_by_image[i] for i in indexes]
chosen_inames = [inames[i] for i in indexes]

regions, sifts = get_regions_and_sifts(dir_sc, chosen_inames, sifts) # Compute SIFT and regions from 30 random images
display_images(regions[np.random.choice(len(regions), 100)]) # Show 100 random regions

centers = list(range(20))
for center in centers:
  center_vect = vdict[center]
  dist = ((sifts - center_vect)**2).sum(axis=1)
  # get best 100
  top100 = dist.argsort()[:100]
  top100_regions = regions[top100]
  display_images(top100_regions) # Show 100 random regions