## Importing libaries

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import pandas as pd
import pickle
import numpy as np
from fastai.vision import *
from fastai.callbacks.hooks import *
import matplotlib.pyplot as plt
from lshash import LSHash
from PIL import Image
from tqdm import tqdm_notebook
from pathlib import Path
pd.set_option('display.max_columns', 500)

In [None]:
import sys
# Ensure tatrec package is in the path
sys.path.append(os.path.join(Path.cwd(), "..", "tatrec"))

In [None]:
from tatrec.notebook_funcs import get_data_from_folder, print_data_classes_size, plot_similar_tats
from tatrec.config import path_models, path_train_chicago, path_train_cnn, path_data_clean

In [None]:
bs = 128   # batch size
arch = models.resnet50

## Loading Data and Model

In [None]:
tfms = get_transforms(flip_vert = False)

In [None]:
# Use same dataset used to train the model originally
data = get_data_from_folder(path_train_cnn, bs, 64, tfms)

In [None]:
learn = cnn_learner(data, arch, metrics=error_rate)

In [None]:
learn.load(path_models / "tatrec-stage-2-1")

In [None]:
data = get_data_from_folder(path_train_chicago, bs, 64, tfms)
learn.data = data
learn.purge()

In [None]:
print_data_classes_size(data)

In [None]:
## Show sample data
data.show_batch(rows=4, figsize=(10,9), hide_axis=False) 

## Extracting Feature

Creating a hook right after convolutional part of resnet 50 and max pooling layer which generates a 4096 length vector for a particular image of 256*256.

In [None]:
# this is a hook (used for saving intermediate computations)
# used to extract before the last FC layer for use in similarity matching
class SaveFeatures():
    features=None
    def __init__(self, m): 
        self.hook = m.register_forward_hook(self.hook_fn)
        self.features = None
    def hook_fn(self, module, input, output): 
        out = output.detach().cpu().numpy()
        if isinstance(self.features, type(None)):
            self.features = out
        else:
            self.features = np.row_stack((self.features, out))
    def remove(self): 
        self.hook.remove()
        
sf = SaveFeatures(learn.model[1][5]) ## Output before the last FC layer

Creating Feature Vector

In [None]:
# By running this feature vectors would be saved in sf variable initated above
preds = learn.get_preds(data.train_ds)[0]

In [None]:
# Shows the percentage of tattoos that are recognized as not a tattoo
print((1 - len(np.where(preds[:,1] < 0.5)))/len(preds))

Converting in a dictionary of {img_path:featurevector}

In [None]:
img_path = [str(x) for x in (list(data.train_ds.items))]
feature_dict = dict(zip(img_path,sf.features))

In [None]:
## Exporting as pickle
pickle.dump(feature_dict, open(path_data_clean/"feature_dict.pkl", "wb"))

## Using Locality Sensitive hashing to find near similar images

In [None]:
## Loading Feature dictionary
feature_dict = pickle.load(open(path_data_clean/"feature_dict.pkl",'rb'))

In [None]:
## Locality Sensitive Hashing
# params
k = 30 # hash size
L = 5  # number of tables
d = 512 # Dimension of Feature vector
lsh = LSHash(hash_size=k, input_dim=d, num_hashtables=L)

# LSH on all the images
for img_path, vec in tqdm_notebook(feature_dict.items()):
    lsh.index(vec.flatten(), extra_data=img_path)

In [None]:
## Exporting as pickle
pickle.dump(lsh, open(path_models/'lsh.pkl', "wb"))

## Visualizing Output

In [None]:
## Loading Feature and lsh dictionaries
feature_dict = pickle.load(open(path_data_clean/'feature_dict.pkl','rb'))
lsh = pickle.load(open(path_models/'lsh.pkl','rb'))

In [None]:
extract_item = 438

In [None]:
plot_similar_tats(extract_item, feature_dict, lsh, 5)