In [77]:
import os

import requests
from PIL import Image
import imagehash
import faiss
import numpy as np
import pandas as pd
from faiss import IndexBinaryFlat

In [98]:
def load_index(filename:str = 'faiss_index', hash_size: int = 16) -> IndexBinaryFlat:
    d = hash_size**2
    try:
        return faiss.read_index_binary(f'{filename}_{d}')
    except RuntimeError:
        return faiss.IndexBinaryFlat(d)

def save_index(index: IndexBinaryFlat, filename:str = 'faiss_index', hash_size: int = 16) -> None:
    d = hash_size**2
    faiss.write_index_binary(index, f'{filename}_{d}')

def hash_image(im: Image, hash_size: int) -> np.ndarray:
    im_hash = imagehash.average_hash(im, hash_size=hash_size)
    return np.packbits(np.array(im_hash.hash).reshape(1,hash_size**2), axis=1)

def check_duplicate(index: IndexBinaryFlat, img_hash: np.ndarray, thresh: int) -> bool:
    D, I = index.search(img_hash, thresh)
    print(I)
    print(D)
    print('---------------------------------')
    return len(D) > 0

In [99]:
hash_size = 8
index_name = 'faiss_index'
index = load_index(index_name, hash_size)
hashes = []
photos_names = map(lambda x: (x, int(x.split('.')[0])), os.listdir('images/test2/'))
for name, photo_index in sorted(photos_names, key=lambda x: x[1]):
    file = Image.open(f"images/test2/{name}")
    image_hash = hash_image(file, hash_size)
    index.add(image_hash)
    hashes.append(image_hash)
hashes = np.array(hashes)

In [100]:
for i in hashes:
    check_duplicate(index, i, 2)

[[0 1]]
[[0 7]]
---------------------------------
[[1 0]]
[[0 7]]
---------------------------------
[[2 8]]
[[0 3]]
---------------------------------
[[3 9]]
[[0 4]]
---------------------------------
[[4 1]]
[[ 0 18]]
---------------------------------
[[5 4]]
[[ 0 18]]
---------------------------------
[[6 4]]
[[ 0 20]]
---------------------------------
[[7 1]]
[[ 0 21]]
---------------------------------
[[8 2]]
[[0 3]]
---------------------------------
[[9 3]]
[[0 4]]
---------------------------------
[[10  9]]
[[0 4]]
---------------------------------
[[11 12]]
[[0 8]]
---------------------------------
[[12 11]]
[[0 8]]
---------------------------------
[[13 12]]
[[ 0 20]]
---------------------------------
[[14 12]]
[[ 0 15]]
---------------------------------
[[15 12]]
[[ 0 18]]
---------------------------------
