# Generating embeddings using VGG16

In [9]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

#from tensorflow.keras.applications.resnet50 import ResNet50
#from tensorflow.keras.applications.resnet50 import preprocess_input

from tensorflow.keras.preprocessing import image

from tensorflow.keras import Model

import numpy as np
from os import path

import tensorflow as tf
import keras

print('Keras', keras.__version__)
print('Tensorflow', tf.__version__)

Keras 2.2.4
Tensorflow 2.0.0-beta1


### Loading dataset

In [10]:
import pandas as pd

testing_sign = pd.read_csv( '../dataset/signatures/X_test.csv' )
print( testing_sign.shape )
testing_sign.head()

(10019, 3)


Unnamed: 0,Anchor,Positive,Negative
0,original_45_3.png,original_45_19.png,forgeries_45_19.png
1,original_37_18.png,original_37_15.png,forgeries_37_15.png
2,original_24_11.png,original_24_24.png,forgeries_24_24.png
3,original_33_20.png,original_33_3.png,forgeries_33_3.png
4,original_2_9.png,original_2_22.png,forgeries_2_22.png


### Loading model

Create new model using base model and remove last fully connected layer (classification layer). Thus, we are generating a embeding (vector representation of input image).


In [11]:
base = VGG16( weights='imagenet')
#base = ResNet50( weights='imagenet' )
base.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [12]:
from tensorflow.keras import Model

model = Model(base.input, [base.layers[-2].output])

In [13]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

In [15]:
def load_image( img_path ):
    
    from tensorflow.keras.preprocessing import image
    from tensorflow.keras.layers import Input
    
    img = image.load_img( img_path, target_size=(224, 224) )
    img = image.img_to_array( img )
    img = np.expand_dims( img, axis=0 )
    img = preprocess_input( img )
    
    return img

In [16]:
from tqdm import tqdm
from sklearn.metrics.pairwise import euclidean_distances

genuine_signatures_path = '../dataset/signatures/full_org/'
forged_signatures_path = '../dataset/signatures/full_forg/'

results_df = pd.DataFrame( columns = ['PositiveDistances', 'NegativeDistances'] )

for index, row in tqdm( testing_sign.iterrows() ): 

    anchor = load_image( genuine_signatures_path + row['Anchor'] )
    positive = load_image( genuine_signatures_path + row['Positive'] )
    negative = load_image( forged_signatures_path + row['Negative'] )
    
    anchor_emb = model.predict( anchor )
    positive_emb = model.predict( positive )
    negative_emb = model.predict( negative )
    
     # distance between the anchor and the positive
    positive_dist = euclidean_distances(anchor_emb, positive_emb)

    # distance between the anchor and the negative
    negative_dist = euclidean_distances(anchor_emb, negative_emb)
    
    results_df = results_df.append( { 'PositiveDistances' : round( positive_dist[0][0], 2 ),
                                      'NegativeDistances' : round( negative_dist[0][0], 2 )
                                    }, ignore_index = True )

10019it [2:39:37,  1.08it/s]


In [17]:
results_df.head()

Unnamed: 0,PositiveDistances,NegativeDistances
0,30.26,34.630001
1,20.610001,34.419998
2,34.639999,39.029999
3,38.84,47.080002
4,27.950001,40.209999


In [18]:
results_df.describe()

Unnamed: 0,PositiveDistances,NegativeDistances
count,10019.0,10019.0
mean,26.603494,38.555126
std,7.090846,7.354767
min,8.56,16.870001
25%,21.389999,33.295
50%,25.85,38.25
75%,31.005,43.555
max,57.790001,65.980003


In [19]:
results_df.to_csv( 'signatures_similarity_vgg.csv' ,  index='False' )