# Siamese Convolutional Neural Network<br>(Triplet Loss)

In [None]:
from preprocess import SiameseTriplets
from triplet_utils import *

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pickle
from datetime import datetime, timedelta
import numpy as np
from pandas import DataFrame

import tensorflow as tf
import keras.backend as K
from keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping

from sklearn.metrics import roc_curve, roc_auc_score
import wandb

# plotting
from tensorflow.keras.utils import plot_model
import pydotplus as pydot
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:

project_path = './{0}/'.format(projectName)
model_path = '../azurenb_temp/{0}/'.format(projectName)

if not path.exists(project_path):
    os.mkdir(project_path)

if not path.exists(model_path):
    os.mkdir(model_path)

In [None]:
if not os.path.exists('./weights'):
    os.makedirs('./weights')
    print("Weights directory created")
else:
    print("Weights directory exists")

## Setting up datasets

In [None]:
def data_shapes(data):
    print("\nNumber of classes   : ", data.train_images.shape[0])
    print("Original signatures : ", len(data.train_images[0][0]))
    print("Forged signatures   : ", len(data.train_images[0][1]))
    print("Image shape         : ", data.train_images[0][0][0].shape)
    print()

In [None]:
def plot_triplets(data):
    fig, ax = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(8,8))

    ax[0].imshow(data.triplets[0][0])
    ax[1].imshow(data.triplets[1][0])
    ax[2].imshow(data.triplets[2][0])
    # subplot titles
    ax[0].set_title('Anchor')
    ax[1].set_title('Positive')
    ax[2].set_title('Negative')

    fig.tight_layout()
    plt.show()

In [None]:
def model_training(model, weights_name):
    print("\nStarting training!\n")

    # hyperparameters
    EPOCHS = 100  # number of epochs
    BS = 128  # batch size

    # callbacks
    callbacks = [EarlyStopping(monitor='val_loss', patience=3, verbose=1,)]

    history = model.fit(
        pairs, targets,
        batch_size=BS,
        epochs=EPOCHS,
        verbose=1,
        callbacks=callbacks,
        validation_split=0.3,
    )

    ALL_HISTORY.append(history)

    print("\nSaving weight for model...", end="")
    siamese_contrastive.save_weights('./weights/{0}.h5'.format(weights_name))
    print("saved successfully!")

## Everything put together

In [None]:
# DATA_PATH = "../input/handwritten-signature-datasets/CEDAR/CEDAR"  # path to dataset (kaggle)
# SAVE_PATH = "./"                                                   # path to save pickle files (kaggle)

DATA_PATH = "data\\CEDAR"             # path to dataset
SAVE_PATH = "data\\pickle-files"      # path to save pickle files

CLASSES = len(os.listdir(DATA_PATH))  # number of classes
NAME = "CEDAR"

# size of images
SIZE = 224
CHANNELS = 1
INPUT_SHAPE = (SIZE, SIZE, CHANNELS)

# evaluation
ALL_HISTORY = []
ACCURACIES = []
THRESHOLDS = []
PLOTS = []

TO_RESET = True

In [None]:
IS_TPU = False

if IS_TPU:
    # detect and init the TPU
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

    # instantiate a distribution strategy
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
# loading dataset
dataset = SiameseTriplets(name=NAME,
                          data_path=DATA_PATH,
                          save_path=SAVE_PATH,
                          channels=CHANNELS,
                          size=SIZE,
                          reset=reset)

In [None]:
data_shapes(dataset)    # seeing dataset
plot_triplets(dataset)  # plotting dataset

In [None]:
# Hyper parameters
alpha1 = 1
alpha2 = 0.5
embeddingsize = 10
nb_test_class = 10

evaluate_every = 1000 # interval for evaluating on one-shot tasks
n_iter = 10000        # No. of training iterations
log_every = 50
sample_batch_size = 16

optimizer = Adam(lr = 0.00006)

In [None]:
# defining the embedding and network
embedding = embedding_net(embeddingsize, INPUT_SHAPE)
siamese_network3 = build_triplet_model(INPUT_SHAPE, embedding, margin=1)

siamese_network3.compile(loss=None,optimizer=optimizer)

siamese_network3.summary()
plot_model(siamese_network3, show_shapes=True, show_layer_names=True, to_file='TRIPLETmodel.png')

In [None]:
projectName = "CEDAR_triplet_loss"

# wandb config
wandb.init(project=projectName)
wandb.config.alpha1 = alpha1
wandb.config.alpha2 = alpha2
wandb.config.sample_batch_size = sample_batch_size
wandb.config.learningrate = K.eval(optimizer.lr) 

In [None]:
print("Starting training process!")
print("-------------------------------------")

t_start = time.time()
for i in range(1, n_iter+1):
    microtask_start = time.time()
    triplets = dataset.generate_triplets(16)
    timetogetbatch = time.time()-microtask_start
    
    microtask_start = time.time()
    loss = siamese_network3.train_on_batch(triplets, None)
    timebatch3 = time.time()-microtask_start

    microtask_start = time.time()

    n_iteration += 1

    if i % log_every == 0:
        wandb.log({'loss3x': loss}, step=n_iteration)

    if i % evaluate_every == 0:
        elapsed_minutes = (time.time()-t_start)/60.0
        rate = i/elapsed_minutes
        eta = datetime.now() + timedelta(minutes=(n_iter-i)/rate)
        eta = eta + timedelta(hours=0) #french time

        print("[{3}] iteration {0}: {1:.1f} iter/min, Train Loss: {2} , eta : {4}".format(
            i, rate, loss, n_iteration, eta.strftime("%Y-%m-%d %H:%M:%S")
        ))

        network3_train.save_weights('{1}3x-temp_weights_{0:08d}.h5'.format(n_iteration, model_path))

# Final save
network3_train.save_weights('{1}3x-temp_weights_{0:08d}.h5'.format(n_iteration, model_path))
print("Done !")

In [None]:
def compute_accuracy_roc(predictions, labels):
    """Compute ROC accuracyand threshold.

    Also, plot FAR-FRR curves and P-R curves for input data.
    
    Args:
        predictions -- np.array : array of predictions.
        labels -- np.array : true labels (0 or 1).
        plot_far_frr -- bool : plots curves of True.
    
    Returns:
        max_acc -- float : maximum accuracy of model.
        best_thresh --float : best threshold for the model.
    """
    dmax = np.max(predictions)
    dmin = np.min(predictions)

    nsame = np.sum(labels == 1)  #similar
    ndiff = np.sum(labels == 0)  #different

    step = 0.01
    max_acc = 0
    best_thresh = -1

    frr_plot = []
    far_plot = []
    pr_plot = []
    re_plot = []

    ds = []
    for d in np.arange(dmin, dmax+step, step):
        idx1 = predictions.ravel() <= d  # guessed genuine
        idx2 = predictions.ravel() > d   # guessed forged

        tp = float(np.sum(labels[idx1] == 1))
        tn = float(np.sum(labels[idx2] == 0))
        fp = float(np.sum(labels[idx1] == 0))
        fn = float(np.sum(labels[idx2] == 1))

        tpr = float(np.sum(labels[idx1] == 1)) / nsame       
        tnr = float(np.sum(labels[idx2] == 0)) / ndiff
        
        
        acc = 0.5 * (tpr + tnr)
        pr = tp / (tp + fp)
        re = tp / (tp + fn)
       
        if (acc > max_acc):
            max_acc, best_thresh = acc, d

        far = fp / (fp + tn)
        frr = fn / (fn + tp)
        frr_plot.append(frr)
        pr_plot.append(pr)
        re_plot.append(re)
        far_plot.append(far)
        ds.append(d)

    plot_metrics = [ds, far_plot, frr_plot, pr_plot, re_plot]

    return max_acc, best_thresh, plot_metrics

In [None]:
losses = ['loss', 'val_loss']

fig = plt.figure()

for x in losses:
    plt.plot(ALL_HISTORY[0].history[x])

plt.title('Losses')
plt.legend(losses)
plt.grid(True)
plt.tight_layout()