In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from random import choice
import math

from tensorflow.keras.preprocessing import image

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

from matplotlib.offsetbox import OffsetImage,AnnotationBbox


In [None]:
# If you're working with Colab mount your drive or skip this step
from google.colab import drive
drive.mount('/content/drive')

In [None]:
scraped_images_folder = '/set/the/path/to/your/scraped/images/'
feature_file = '/path/to/feature-file.npz'


In [None]:
default_metric = 'cosine'

In [None]:
# there are a bunch of available metrics:
# ‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, ‘dice’, ‘euclidean’, ‘hamming’, ‘jaccard’, 
# ‘jensenshannon’, ‘kulsinski’, ‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’,
# ‘sokalsneath’, ‘sqeuclidean’, ‘wminkowski’, ‘yule’

### Load Features

In [None]:
feature_dict = np.load(feature_file)
feature_dict = dict(feature_dict) # load dictionary into ram for speed

In [None]:
legalize_it = True # the filter is switched on (True) or off (False)

if legalize_it:
    copyright_file = '/set/the/path/to/the/file/is_public.json'
    with open(copyright_file) as json_file:
        is_public = json.load(json_file)
    new_feature_dict = {}
    for image_name in feature_dict.keys():
        if is_public[image_name]:
            new_feature_dict[image_name] = feature_dict[image_name]
    feature_dict = new_feature_dict

In [None]:
image_names = np.array(list(feature_dict.keys()))
features = np.array(list(feature_dict.values()))

In [None]:
def compute_distances(feature,neighbours,metric = default_metric):
    """ computes the distances between 'feature' and 'neighbours'. """
    if len(feature.shape) == 1:
        feature = feature[np.newaxis,:]
    if len(neighbours.shape) == 1:
        neighbours = neighbours[np.newaxis,:]
    
    distances = cdist(feature,neighbours,metric = metric)[0]
    return distances

In [None]:
def find_neighbours(image_name,image_group,N,mode='nearest',metric =default_metric,
                    feature_dict = feature_dict):
    """ finds N neighbours for in image_name in image_group.
        
        image_name:   filename of reference image
        image_group:  list of images to search neighbours in
        N:            number of neigbours to return
        mode:         can be 'nearest' of 'farthest'
        metrix:       distance metric to use
        feature_dict: mapping form filenames to features
        """
    # make sure mode is correct:
    assert mode in ['nearest','farthest'], "mode must one of 'nearest' or 'farthest' "
    reference_feature = feature_dict[image_name]
    # convert group to array to ensure ordering
    group_array = np.array(image_group)
    group_features = np.array([feature_dict[i] for i in group_array if i != image_name])
    distances = compute_distances(reference_feature,group_features,metric)
    order = np.argsort(distances)
    if mode == 'nearest':
        neighbour_inds = order[:N]
    elif mode == 'farthest':
        neighbour_inds = order[-N:]
    neighbours = [group_array[ni] for ni in neighbour_inds]
    return neighbours 

### Plot Feature-Neighbours

In [None]:
# random image
random_image = choice(image_names)
im_name = os.path.join(scraped_images_folder, random_image.strip("'"))
im = image.load_img(im_name, target_size=(224,224))
plt.imshow(im)
plt.axis(False)
plt.title(random_image)

In [None]:
reference_image = 'imagename.jpg'
group = image_names
N = 10
mode = 'nearest'  # 'nearest' or 'farthest'
neighbours = find_neighbours(reference_image,group,N=N,mode = mode)

images = [reference_image] + neighbours
plt.figure(figsize=(30, 25))
for i,image_name in enumerate(images):
    plt.subplot(1,N+1,i+1)
    im_name = os.path.join(scraped_images_folder, image_name.strip("'"))
    im = image.load_img(im_name, target_size=(224,224))
    plt.imshow(im)
    plt.axis(False)
    plt.title(image_name)

## Walkthrough the latent space
From a start to an end image a path is created in the feature room. At *N* intermediate steps, an adjacent image is displayed in the feature space. To create some variance, the parameter *N_neighbours* can be used to control how strictly the closest neighbour is selected (*N_neighbours = 1*: the closest one is taken, *N_neighbours > 1*: it is randomly selected from the *N* closest ones). Since often the same image was selected as neighbor at consecutive points, the parameter *unique* was added. If this parameter is set to *True*, the function is forced to select the next image that has not yet been selected.

In [None]:
def find_neighbours_on_path(start_image,end_image,N,N_neighbours=1,
                            metric = default_metric,unique = True):
    start_feature = feature_dict[start_image]
    end_feature = feature_dict[end_image]
    path_images = []
    path_inds = [np.where(image_names==start_image)[0],np.where(image_names==end_image)[0]]
    for i in range(1,N+1):
        w2 = i/(N+1)
        w1 =  1 -  w2
        feature = start_feature * w1 + end_feature * w2
        distances =  compute_distances(feature,features,metric)
        order = np.argsort(distances)
        if unique:
            order = [o for o in order if o not in path_inds]
        candidates = order[:N_neighbours]
        chosen = choice(candidates)
        path_inds.append(chosen)
        path_images.append(image_names[chosen])
    return [start_image] + path_images + [end_image]

In [None]:
# random image
random_image = choice(image_names)
im_name = os.path.join(scraped_images_folder, random_image.strip("'"))
im = image.load_img(im_name, target_size=(224,224))
plt.imshow(im)
plt.axis(False)
plt.title(random_image)

### Linear representation


In [None]:
start_image = 'imagename.jpg' # random = choice(image_names)
end_image = 'imagename.jpg'
N = 9 # intermediate steps
N_neighbours = 1
metric = 'cosine' # choose metric
unique = True # function is forced to select the next image that has not yet been selected

image_path = find_neighbours_on_path(start_image,end_image,N,N_neighbours,metric,unique)

In [None]:
plt.figure(figsize=(30, 25))
for i,image_name in enumerate(image_path):
    plt.subplot(1,N+2,i+1)
    im_name = os.path.join(scraped_images_folder, image_name.strip("'"))
    im = image.load_img(im_name, target_size=(224,224))
    plt.imshow(im)
    plt.axis(False)
    plt.title(image_name)

### Representation as scatterplot in 2D projection of the latent space

In [None]:
start_image = 'imagename.jpg' # random = choice(image_names)
end_image = 'imagename.jpg'
N = 100 # intermediate steps
N_neighbours = 5
metric = 'cityblock' # choose metric
unique = True # function is forced to select the next image that has not yet been selected

image_path = find_neighbours_on_path(start_image,end_image,N,N_neighbours,metric,unique)

In [None]:
randseed = 0 # set the initialization of the random number generator
np.random.seed(randseed)
projection = 'tsne' # 'pca' or 'tsne'
zoom = 0.5  # size of the images

path_features = np.array([feature_dict[img] for img in image_path])

if projection == 'pca':
    transformer = PCA(n_components = 2)
elif projection == 'tsne':
    transformer = TSNE(n_components = 2)
transformed_features = transformer.fit_transform(path_features)

plt.figure(figsize = (30,30))
ax = plt.subplot(111)
for pos,image_name in zip(transformed_features,image_path):
    im_name = os.path.join(scraped_images_folder, image_name.strip("'"))
    img = image.load_img(im_name, target_size=(224,224))
    im = OffsetImage(img, zoom=zoom)
    ab = AnnotationBbox(im, pos, xycoords='data', frameon=False)
    ax.add_artist(ab)
    ax.update_datalim(np.column_stack(pos))
    ax.autoscale()
plt.axis(False)