# ***MODOAP - Image Similarity***

This notebook calculates the distance/similarity between one given image and each image of a dataset stored in a Google Drive.

Two steps to follow :
1. Creation of a similarity model from the images of the dataset
2. Submit a local or web image and request for the n nearest neighbors in the model

The code is based on the library turicreate : https://github.com/apple/turicreate

**The runtime must be set to GPU (Runtime -> Change Runtime Type -> GPU)**

In [None]:
#@markdown ### Connect to Google Drive and install libraries

#@markdown At first start you may need to restart the runtime if asked.

from google.colab import drive
import os

if not os.path.exists("/content/drive/My Drive"):
  drive.mount('/content/drive')
else : print("Drive already mounted")

!apt install libnvrtc8.0
!pip install turicreate

import turicreate as tc
from IPython.display import display
from IPython.display import HTML
from PIL import Image
from io import BytesIO
import glob
import random
import base64
import pandas as pd
from zipfile import ZipFile
from shutil import copy
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from google.colab import files
from google.colab import drive
from time import gmtime, strftime
import shutil

def get_thumbnail(path):
  i = Image.open(path)
  i.thumbnail((150, 150), Image.LANCZOS)
  return i
  
def get_thumbnail_from_image(img):
  i = img.copy()
  i.thumbnail((150, 150), Image.LANCZOS)
  return i

def image_base64(im):
  if isinstance(im, str):
    im = get_thumbnail(im)
  with BytesIO() as buffer:
    im.save(buffer, 'jpeg')
    return base64.b64encode(buffer.getvalue()).decode()

def image_formatter(im):
  return f'<img style="display:inline;margin:1px" src="data:image/jpeg;base64,{image_base64(im)}">'
  
def preview_images(reference_data, num_previews=30):
  images = list(map(lambda x:image_formatter(get_thumbnail(x)), reference_data[0:num_previews]['path']))
  display(HTML(''.join(images)))

# Use all GPUs (default)
tc.config.set_num_gpus(-1) 

# Use only 1 GPU
#tc.config.set_num_gpus(1)

# Use CPU
#tc.config.set_num_gpus(0)

# Step 1

In [None]:

#@markdown ### Create a similarity model from a dataset
#@markdown Enter a path to a folder containing the image files :
corpus = "/content/drive/MyDrive/EDUC/Image_SImilarity_WS/images_kagan" #@param {type:"string"}
#@markdown Enter a path to a destination folder to save the model  :

destination_modele = "/content/drive/MyDrive/EDUC/Image_SImilarity_WS/modele2" #@param {type:"string"}

if not os.path.exists(destination_modele):
  os.makedirs(destination_modele)
# Load images from a folder
reference_data = tc.image_analysis.load_images(corpus)
reference_data = reference_data.add_row_number()
# From the path-name, create a label column
reference_data['label'] = reference_data['path'].apply(lambda path: path.split('/')[-2])
reference_data.groupby('label', [tc.aggregate.COUNT]).sort("Count", ascending = False)
# Save the SFrame for future use
reference_data.save(os.path.join(destination_modele,'reference_data.sframe'))
reference_data.groupby('label', [tc.aggregate.COUNT]).sort("Count", ascending = False)
reference_data.head()

# create an image similarity model using the data
model = tc.image_similarity.create(reference_data)
# saving the model
model.save(os.path.join(destination_modele,'image_similarity.model'))

In [None]:
#@markdown ### Import a model
#@markdown If you've already created a model you can import its two files.

#@markdown Enter the file paths :

sframe = "" #@param {type:"string"}
modele = "" #@param {type:"string"}

reference_data = tc.load_sframe(sframe)
model = tc.load_model(modele)

In [None]:
#@markdown ### Dataset preview (optional)

#@markdown Number of images to preview :
img = 40 #@param {type:"slider", min:0, max:40, step:1}

preview_images(reference_data, int(img))

# Step 2

In [None]:
#@markdown ### Request the N nearest neighbors of an image

#@markdown Enter the url of a local or web-based image :
image_url = "" #@param {type:"string"}

#@markdown Example :
#@markdown https://apprendre-la-photo.fr/wp-content/uploads/2011/06/photo-portrait_apprendre-la-photo_laurent-breillat-6-947x511.jpg

#@markdown Enter the value of N : the number of similar images to show
N = "3"#@param {type:"string"}

nb_images = N
print("Image request : ")
sample_image = tc.Image(image_url)
si_height = sample_image.height 
si_width = sample_image.width
if int(si_height) > 600 :
  sample_image = tc.image_analysis.resize(sample_image, 320, 240)
  display(sample_image)
else : 
  display(sample_image)

print("-----------------------------------------")
query_results = model.query(sample_image, k=int(nb_images))

similar_rows = query_results[query_results['query_label'] == 0]['reference_label']
similar_rows_data = reference_data.filter_by(similar_rows, 'id')

print("--------------------------------------------------------------")
print("Showing {} nearest neighbors : ".format(nb_images))
print("--------------------------------------------------------------")
if os.path.isfile(similar_rows_data[0]["path"]) :
  for idi in similar_rows :
    chem1 = similar_rows_data[similar_rows_data['id'] == idi]['path'][0]   
    piil = Image.open(chem1)
    display(piil)
else :
  for row in similar_rows_data :
    print(row["path"])
  print("(Problem in importation) ")
