## Generating embeddings using our TNN

In [7]:
import keras
import tensorflow as tf

print('Keras', keras.__version__)
print('Tensorflow', tf.__version__)

Keras 2.2.4
Tensorflow 2.0.0-beta1


In [13]:
IMAGE_SHAPE = (128, 128)

## Auxiliary methods

**Method for processing input images**

In [16]:
def processing_image( image_path, image_shape ):
    
    import cv2
    
    '''
    Method for generating input image
    Args:
        image_path - full path for image
        image_shape - tuple (w, h) with new image width and hight
    Return:
        processed_image - a binary image with size (w, h)
    '''
    # Read image as grayscale
    image = cv2.imread( image_path, cv2.IMREAD_GRAYSCALE )
    
    # Binarize image through Otsu's thresholding. 
    # Background will be back and signature will be white.
    _, image_bin = cv2.threshold( image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU )
    
    # Compute signature bounding box
    points = cv2.findNonZero( image_bin )
    x, y, w, h = cv2.boundingRect( points )
    
    # Crop image according to boundingbox
    proc_image = image_bin[y:y+h, x:x+w]
    
    resized_image = cv2.resize( proc_image, image_shape )
    
    return resized_image

**Method for computing Euclidian distance between vectors (embedings)**

In [55]:
def distance( embedings ):
    
    from sklearn.metrics.pairwise import euclidean_distances
    
    lenght = embedings.shape[-1]
    
    anchor   = embedings[:, 0               : int(lenght*1/3) ]
    positive = embedings[:, int(lenght*1/3) : int(lenght*2/3) ]
    negative = embedings[:, int(lenght*2/3) : int(lenght*3/3) ]
    
     # distance between the anchor and the positive
    positive_dist = euclidean_distances(ancho, positive)

    # distance between the anchor and the negative
    negative_dist = euclidean_distances(anchor, negative)
    
    return positive_dist, negative_dist    

## Loading model

**Custom function**   
When loading a model which we used a custom functin, we have to pass this function(s) in the load_model function.

In [5]:
def triplet_loss( labels, embedings, marging = 0.4 ):
    """
    Implementation of the triplet loss function
    Args:
        labels -- true labels, required when you define a loss in Keras. In fact, we don't need it in this function.
        embedings -- python list containing three objects:
                  anchor   -- the embedings for the anchor data
                  positive -- the embedings for the positive data (similar to anchor)
                  negative -- the embedings for the negative data (different from anchor)
    Returns:
    loss -- real number, value of the loss
    """
    lenght = embedings.shape[-1]
    
    anchor   = embedings[:, 0               : int(lenght*1/3) ]
    positive = embedings[:, int(lenght*1/3) : int(lenght*2/3) ]
    negative = embedings[:, int(lenght*2/3) : int(lenght*3/3) ]

    # distance between the anchor and the positive
    positive_dist = K.sum( K.square(anchor - positive), axis = 1 )

    # distance between the anchor and the negative
    negative_dist = K.sum( K.square(anchor - negative), axis = 1 )

    # compute loss
    loss = K.maximum( (positive_dist - negative_dist + marging), 0.0 )
 
    return loss

**Loading model and printing its summary**

In [8]:
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K

# load model.
# We have to pass the custom functions in the load_model function
model = load_model( 'trained_model.h5', custom_objects={'triplet_loss': triplet_loss} )

# summarize model.
model.summary()

W0718 12:48:08.989509 11908 deprecation.py:323] From C:\Users\gefausti\AppData\Local\Continuum\anaconda3\envs\ML-Env-Py37\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
anchor_input (InputLayer)       [(None, 128, 128, 1) 0                                            
__________________________________________________________________________________________________
positive_input (InputLayer)     [(None, 128, 128, 1) 0                                            
__________________________________________________________________________________________________
negative_input (InputLayer)     [(None, 128, 128, 1) 0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 128)          70399616    anchor_input[0][0]               
                                                                 positive_input[0][0]         

## Loading data

In [36]:
import pandas as pd
testing_sign = pd.read_csv( '../../dataset/signatures/X_test.csv' )
print( testing_sign.shape )
testing_sign.head()

(10019, 3)


Unnamed: 0,Anchor,Positive,Negative
0,original_45_3.png,original_45_19.png,forgeries_45_19.png
1,original_37_18.png,original_37_15.png,forgeries_37_15.png
2,original_24_11.png,original_24_24.png,forgeries_24_24.png
3,original_33_20.png,original_33_3.png,forgeries_33_3.png
4,original_2_9.png,original_2_22.png,forgeries_2_22.png


In [37]:
genuine_signatures_path = '../../dataset/signatures/full_org/'
forged_signatures_path = '../../dataset/signatures/full_forg/'

In [43]:
from tqdm import tqdm

results_df = pd.DataFrame( columns = ['PositiveDistances', 'NegativeDistances'] )

for index, row in tqdm( testing_sign.iterrows() ):

    anchor = processing_image( genuine_signatures_path + row['Anchor'], IMAGE_SHAPE )   
    positive = processing_image( genuine_signatures_path + row['Positive'], IMAGE_SHAPE )   
    negative = processing_image( forged_signatures_path + row['Negative'], IMAGE_SHAPE )   

    anchor = anchor / 255.
    positive = positive / 255.
    negative = negative / 255.
    
    embedings = model.predict( [ anchor.reshape(-1, IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1) ,
                                 positive.reshape(-1, IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1),
                                 negative.reshape(-1, IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1) ] )
    
    p_dist, n_dist = distance( embedings )
    
    results_df = results_df.append( { 'PositiveDistances' : round( p_dist[0][0], 2 ),
                                      'NegativeDistances' : round( n_dist[0][0], 2 )
                                    }, ignore_index=True )

10019it [3:54:48,  1.33it/s] 


In [46]:
results_df.describe()

Unnamed: 0,PositiveDistances,NegativeDistances
count,10019.0,10019.0
mean,2.660647,5.543197
std,1.046132,2.105239
min,0.62,1.21
25%,1.81,3.92
50%,2.56,5.61
75%,3.4,7.025
max,7.04,13.75


**Saving results**

In [47]:
results_df.to_csv( 'signatures_similarity_tnn.csv' ,  index='False' )

## Testing it in another dataset

In [49]:
sample_signatures_df = pd.read_csv( '../../dataset/sample_Signature/sample_signatures.csv' )
sample_signatures_df.head()

Unnamed: 0,PersonID,GenuineSignature,ForgedSignature
0,1,"NFI-00101001.png,NFI-00102001.png,NFI-00103001...","NFI-00301001.png,NFI-00302001.png,NFI-00303001..."
1,2,"NFI-00201002.png,NFI-00202002.png,NFI-00203002...","NFI-00301002.png,NFI-00302002.png,NFI-00303002..."
2,3,"NFI-00301003.png,NFI-00302003.png,NFI-00303003...","NFI-00401003.png,NFI-00402003.png,NFI-00403003..."
3,4,"NFI-00401004.png,NFI-00402004.png,NFI-00403004...","NFI-08801004.png,NFI-08802004.png,NFI-08803004..."
4,5,"NFI-00501005.png,NFI-00502005.png,NFI-00503005...","NFI-00301005.png,NFI-00302005.png,NFI-00303005..."


In [53]:
results_2_df = pd.DataFrame( columns = ['PersonID', 'PositiveDistances', 'NegativeDistances'] )

genuine_img_path = '../../dataset/sample_Signature/genuine/'
forged_img_path = '../../dataset/sample_Signature/forged/'

for idx in tqdm(sample_signatures_df.index):
    
    genuine_signatures = sample_signatures_df.loc[ idx, 'GenuineSignature'].split(',')
    forged_signatures = sample_signatures_df.loc[ idx, 'ForgedSignature'].split(',')
    
    anchor = processing_image( genuine_img_path + genuine_signatures[0], IMAGE_SHAPE )
        
    for positive, negative in zip(genuine_signatures[1:], forged_signatures[1:]):
        
        positive = processing_image( genuine_img_path + positive, IMAGE_SHAPE) 
        negative = processing_image( forged_img_path + negative, IMAGE_SHAPE )
        
        embedings = model.predict( [ anchor.reshape(-1, IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1) ,
                                     positive.reshape(-1, IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1),
                                     negative.reshape(-1, IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1) ] )
        
        p_dist, n_dist = distance( embedings )
        
        results_2_df = results_2_df.append( {'PersonID' : idx+1, 
                                             'PositiveDistances' : round(p_dist[0][0], 2), 
                                             'NegativeDistances' : round(n_dist[0][0], 2)
                                             }, ignore_index=True )

100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:40<00:00,  4.45s/it]


In [54]:
results_2_df.describe()

Unnamed: 0,PersonID,PositiveDistances,NegativeDistances
count,119.0,119.0,119.0
mean,15.546218,1628.456135,1905.50034
std,8.713663,927.667119,967.386199
min,1.0,275.220001,257.579987
25%,8.0,939.495026,1163.175049
50%,16.0,1328.079956,1758.0
75%,23.0,2158.869995,2563.644897
max,30.0,4438.839844,4149.470215
