# Siamese Convolutional Neural Network<br>(Contrastive Loss)

In [None]:
from model import siamese_CNN_contrastive

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pickle
import numpy as np
from pandas import DataFrame

import tensorflow as tf
import keras.backend as K
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping

# plotting
from tensorflow.keras.utils import plot_model
import pydotplus as pydot
import matplotlib.pyplot as plt
%matplotlib inline

## Setting up datasets

In [None]:
def load_pickle(file):
    with open(file, 'rb') as f:
        return pickle.load(f)

In [None]:
def load_dataset(i):
    print("\nLoading dataset...", end="")

    data = load_pickle(PATHS[i][0])  # training data

    pairs = load_pickle(PATHS[i][1])  # pairs of data
    pairs = [pairs[0], pairs[1]]

    targets = load_pickle(PATHS[i][2])  # targets of the data

    print("dataset {0} loaded successfully!\n".format(PATHS.index(PATHS[i])))

    return data, pairs, targets

In [None]:
def data_shapes():
    print("\nNumber of classes               : ", data.shape[0])
    print("Original signatures             : ", len(data[0][0]))
    print("Forged signatures               : ", len(data[0][1]))
    print("Image shape                     : ", data[0][0][0].shape)
    print("Total number of pairs           : ", pairs[0].shape[0])
    print("Number of pairs for each class  : ", pairs[0].shape[0]//data.shape[0])
    print("Targets shape                   : ", targets.shape)
    print()

In [None]:
def plot_13(id1, id2, id3):
    fig, ax = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(8,8))

    ax[0].imshow(pairs[0][id1])
    ax[1].imshow(pairs[1][id2])
    ax[2].imshow(pairs[1][id3])
    # subplot titles
    ax[0].set_title('Anchor image of class {0}'.format(id1//42))
    ax[1].set_title('Target: {0}'.format(targets[id2]))
    ax[2].set_title('Target: {0}'.format(targets[id3]))

    fig.tight_layout()
    plt.show()

## Setting up models

In [None]:
def model_setup_contrastive(verbose=False):

    if IS_TPU:
        # instantiating the model in the strategy scope
        # creates the model on the TPU
        with tpu_strategy.scope():
            model = siamese_CNN_contrastive((224, 224, 1))
    else:
        model = siamese_CNN_contrastive((224, 224, 1))

    if verbose:
        model.summary()

        tf.keras.utils.plot_model(
            model,
            show_shapes=True,
            show_layer_names=True,
            to_file="model_plot_contrastive.png"
        )

    return model

In [None]:
def model_setup_triplets(verbose=False):

    if IS_TPU:
        # instantiating the model in the strategy scope
        # creates the model on the TPU
        with tpu_strategy.scope():
            model = siamese_CNN_triplets((224, 224, 1))
    else:
        model = siamese_CNN_triplets((224, 224, 1))

    if verbose:
        model.summary()

        tf.keras.utils.plot_model(
            model,
            show_shapes=True,
            show_layer_names=True,
            to_file="model_plot_triplets.png"
        )

    return model

In [None]:
def model_setup_quadruplets(verbose=False):

    if IS_TPU:
        # instantiating the model in the strategy scope
        # creates the model on the TPU
        with tpu_strategy.scope():
            model = siamese_CNN_quadruplets((224, 224, 1))
    else:
        model = siamese_CNN_quadruplets((224, 224, 1))

    if verbose:
        model.summary()

        tf.keras.utils.plot_model(
            model,
            show_shapes=True,
            show_layer_names=True,
            to_file="model_plot_quadruplets.png"
        )

    return model

## Training

In [None]:
def model_training(model, weights_name):
    print("\nStarting training!\n")

    # hyperparameters
    EPOCHS = 100  # number of epochs
    BS = 128  # batch size

    # callbacks
    callbacks = [EarlyStopping(monitor='val_loss', patience=3, verbose=1,)]

    history = model.fit(
        pairs, targets,
        batch_size=BS,
        epochs=EPOCHS,
        verbose=1,
        callbacks=callbacks,
        validation_split=0.3,
    )

    ALL_HISTORY.append(history)

    print("\nSaving weight for model...", end="")
    siamese_contrastive.save_weights('./weights/{0}.h5'.format(weights_name))
    print("saved successfully!")

## Evaluation

In [None]:
def compute_accuracy_roc(predictions, labels):
    """Compute ROC accuracyand threshold.

    Also, plot FAR-FRR curves and P-R curves for input data.
    
    Args:
        predictions -- np.array : array of predictions.
        labels -- np.array : true labels (0 or 1).
        plot_far_frr -- bool : plots curves of True.
    
    Returns:
        max_acc -- float : maximum accuracy of model.
        best_thresh --float : best threshold for the model.
    """
    dmax = np.max(predictions)
    dmin = np.min(predictions)

    nsame = np.sum(labels == 1)  #similar
    ndiff = np.sum(labels == 0)  #different

    step = 0.01
    max_acc = 0
    best_thresh = -1

    frr_plot = []
    far_plot = []
    pr_plot = []
    re_plot = []

    ds = []
    for d in np.arange(dmin, dmax+step, step):
        idx1 = predictions.ravel() <= d  # guessed genuine
        idx2 = predictions.ravel() > d  # guessed forged

        tp = float(np.sum(labels[idx1] == 1))
        tn = float(np.sum(labels[idx2] == 0))
        fp = float(np.sum(labels[idx1] == 0))
        fn = float(np.sum(labels[idx2] == 1))

        tpr = float(np.sum(labels[idx1] == 1)) / nsame       
        tnr = float(np.sum(labels[idx2] == 0)) / ndiff
        
        
        acc = 0.5 * (tpr + tnr)
        pr = tp / (tp + fp)
        re = tp / (tp + fn)
       
        if (acc > max_acc):
            max_acc, best_thresh = acc, d

        far = fp / (fp + tn)
        frr = fn / (fn + tp)
        frr_plot.append(frr)
        pr_plot.append(pr)
        re_plot.append(re)
        far_plot.append(far)
        ds.append(d)

    plot_metrics = [ds, far_plot, frr_plot, pr_plot, re_plot]

    return max_acc, best_thresh, plot_metrics

In [None]:
def model_evaluation(model):
    print("\nEvaluating model...", end="")

    pred = model.predict(pairs)
    acc, thresh, plot_metrics = compute_accuracy_roc(pred, targets)
    
    print("evaluation finished!\n")

    ACCURACIES.append(acc)
    THRESHOLDS.append(thresh)
    PLOTS.append(plot_metrics)

## Visualizing models

In [None]:
def visualize_history():
    losses = ['loss', 'val_loss']

    fig, ax = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(13,4))
    for i in range(3):
        for x in losses:
            ax[i].plot(ALL_HISTORY[i].history[x])
            ax[i].set_title('Losses')

        ax[i].legend(losses)
        ax[i].grid(True)

    plt.tight_layout()

In [None]:
def evaluation_plots(metrics):
    ds = metrics[0]
    far_plot = metrics[1]
    frr_plot = metrics[2]
    pr_plot = metrics[3]
    re_plot = metrics[4]

    fig = plt.figure(figsize=(15,7))
    # error rate
    ax = fig.add_subplot(121)
    ax.plot(ds, far_plot, color='red')
    ax.plot(ds, frr_plot, color='blue')
    ax.set_title('Error rate')
    ax.legend(['FAR', 'FRR'])
    ax.set(xlabel = 'Thresholds', ylabel='Error rate')

    # precision-recall curve
    ax1 = fig.add_subplot(122)
    ax1.plot(ds, pr_plot, color='green')
    ax1.plot(ds, re_plot, color='magenta')
    ax1.set_title('P-R curve')
    ax1.legend(['Precision', 'Recall'])
    ax.set(xlabel = 'Thresholds', ylabel='Error rate')

    plt.show()

## Everything put together

In [None]:
# paths to datasets
PATHS = [
    [
        'data\\pickle-files\\cedar_pairs1_train.pickle',
        'data\\pickle-files\\cedar_pairs1_pairs.pickle',
        'data\\pickle-files\\cedar_pairs1_targets.pickle'
    ],
    [
        'data\\pickle-files\\bengali_pairs1_train.pickle',
        'data\\pickle-files\\bengali_pairs1_pairs.pickle',
        'data\\pickle-files\\bengali_pairs1_targets.pickle'
    ],
    [
        'data\\pickle-files\\hindi_pairs1_train.pickle',
        'data\\pickle-files\\hindi_pairs1_pairs.pickle',
        'data\\pickle-files\\hindi_pairs1_targets.pickle'
    ]
]

# for kaggle
# PATHS = [
#     [
#         '../usr/lib/preprocess/cedar_pairs1_train.pickle',
#         '../usr/lib/preprocess/cedar_pairs1_pairs.pickle',
#         '../usr/lib/preprocess/cedar_pairs1_targets.pickle'
#     ],
#     [
#         '../usr/lib/preprocess/bengali_pairs1_train.pickle',
#         '../usr/lib/preprocess/bengali_pairs1_pairs.pickle',
#         '../usr/lib/preprocess/bengali_pairs1_targets.pickle'
#     ],
#     [
#         '../usr/lib/preprocess/hindi_pairs1_train.pickle',
#         '../usr/lib/preprocess/hindi_pairs1_pairs.pickle',
#         '../usr/lib/preprocess/hindi_pairs1_targets.pickle'
#     ]
# ]

# evaluation
ALL_HISTORY = []
ACCURACIES = []
THRESHOLDS = []
PLOTS = []

In [None]:
IS_TPU = False

if IS_TPU:
    # detect and init the TPU
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

    # instantiate a distribution strategy
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
for i in range(3):
    data, pairs, targets = load_dataset(i)

    data_shapes()

    for bs in range(0, 3*42, 42):
        plot_13(0+bs, 20+bs, 41+bs)
        print()

    if i == 0:
        siamese_contrastive = model_setup(True)
        model_training(siamese_contrastive, 'siamese_contrastive_cedar')

    elif i == 1:
        siamese_contrastive = model_setup()
        model_training(siamese_contrastive, 'siamese_contrastive_bengali')

    elif i == 2:
        siamese_contrastive = model_setup()
        model_training(siamese_contrastive, 'siamese_contrastive_hindi')

    model_evaluation(siamese_contrastive)

    del data
    del pairs
    del targets

In [None]:
visualize_history()

In [None]:
df = DataFrame.from_dict({'Accuracies': ACCURACIES,
                          'Thresholds': THRESHOLDS})
df.index = ['Cedar', 'BhSig260 Bengali', 'BhSig260 Hindi']
df

In [None]:
for met in PLOTS:
    evaluation_plots(met)