## Importing libaries

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import pandas as pd
import pickle
import numpy as np
from fastai.vision import *
from fastai.callbacks.hooks import *
import matplotlib.pyplot as plt
from lshash import LSHash
from PIL import Image
from tqdm import tqdm_notebook
from pathlib import Path
pd.set_option('display.max_columns', 500)

In [None]:
import sys
# Ensure tatrec package is in the path
sys.path.append(os.path.join(Path.cwd(), "..", "tatrec"))

In [None]:
from tatrec.notebook_funcs import (get_data_from_folder, print_data_classes_size, 
                                   plot_similar_tats_idx)
from tatrec.config import (path_models, path_train_chicago, path_train_cnn, 
                           path_data_clean, path_models_chicago, path_cleaned_chicago)

In [None]:
bs = 128   # batch size
arch = models.resnet50

## Using Locality Sensitive hashing to find near similar images

In [None]:
## Loading Feature dictionary
feature_dict = pickle.load(open(path_cleaned_chicago/"feature_dict.pkl",'rb'))

In [None]:
## Locality Sensitive Hashing
# params
k = 30 # hash size
L = 5 # number of tables
d = 512 # Dimension of Feature vector
lsh = LSHash(hash_size=k, input_dim=d, num_hashtables=L)

In [None]:
# LSH on all the images
for img_path, vec in tqdm_notebook(feature_dict.items()):
    lsh.index(vec.flatten(), extra_data=img_path)

In [None]:
## Exporting as pickle
lsh_model_name = 'lsh' + str(k) + '-' + str(L) + '.pkl'
lsh_save_path = path_models_chicago/lsh_model_name
if not os.path.isfile(lsh_save_path):
    pickle.dump(lsh, open(path_models_chicago/lsh_model_name, "wb"))

## Visualizing Output

In [None]:
# Use a sampling of tattoos to visual differences in the parameters
output_tats = [438, 1203, 1231, 1285, 1349, 1548, 1870, 2194, 3387, 4585, 5251, 6248]

In [None]:
for item in output_tats:
    print(item)
    plot_similar_tats_idx(item, feature_dict, lsh, 5, "hamming")

In [None]:
for item in output_tats:
    print(item)
    plot_similar_tats_idx(item, feature_dict, lsh, 5, "l1norm")

In [None]:
for item in output_tats:
    print(item)
    plot_similar_tats_idx(item, feature_dict, lsh, 5, "euclidean")