## Define the Siamese Model, Train and Evaluate Model 

#### Load the Images datasets

In [None]:
# Load the libraries
import os
from siamese import Siamese
from L1Dist import L1Dist
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.metrics import Precision, Recall
    

# Create the constants for the data folders
ANC_PATH = os.path.join('data', 'anchor')
POS_PATH = os.path.join('data', 'positive')
NEG_PATH = os.path.join('data', 'negative')

# Constants
BATCH_SIZE     = 256
PRE_FETCH_SIZE = 64
LEARNING_RATE  = 1e-4
EPOCHS = 50

#### Set GPU Growth 

In order to avoid OutOfMemory error we have to use GPU power.

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')

for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

gpus

#### Load the data for the training

In [None]:
# Load 400 of the paths for each category (helps build a clean data pipeline)
anchor_itt   = tf.data.Dataset.list_files(ANC_PATH + '/*.jpg').take(5000) # type: ignore
positive_itt = tf.data.Dataset.list_files(POS_PATH + '/*.jpg').take(5000) # type: ignore
negative_itt = tf.data.Dataset.list_files(NEG_PATH + '/*.jpg').take(5000) # type: ignore

#### Data Preprocessing  

In [None]:
# Normalize the image values from [0,255] to [0,1] for a better Gradient Descend process

def normalize(image_path):
    # Load image
    byte_img = tf.io.read_file(image_path)
    img = tf.io.decode_jpeg(byte_img)

    # Normalize in [0,1] and resize to 100x100x3 for the model
    img = tf.image.resize(img, (100,100))
    img = img / 255.0

    return img

#### Create labeled dataset

In [None]:
# Create a dataset that includes both positive and negative examples
positive = tf.data.Dataset.zip((anchor_itt, positive_itt, tf.data.Dataset.from_tensor_slices(tf.ones(len(anchor_itt))))) # type: ignore
negative = tf.data.Dataset.zip((anchor_itt, negative_itt, tf.data.Dataset.from_tensor_slices(tf.zeros(len(anchor_itt))))) # type: ignore

dataset = tf.data.Dataset.concatenate(positive, negative)

#### Create the Train-Test datasets

In [None]:
# Create a normalize function for the dataset object that has type (path, path, label)
def data_normalize(input_img, val_image, label):
    return(normalize(input_img), normalize(val_image), label)

In [None]:
# Normalize all the data of the dataset and build the data pipeline (need to shuffle)
dataset = dataset.map(data_normalize)
dataset = dataset.cache()
dataset = dataset.shuffle(buffer_size=1024)

In [None]:
# Train-Test split
train, test = tf.keras.utils.split_dataset(dataset, left_size=0.8)

# Create the batch size to train and test the model
train = train.batch(BATCH_SIZE)
train = train.prefetch(PRE_FETCH_SIZE)

test = test.batch(BATCH_SIZE)
test = test.prefetch(PRE_FETCH_SIZE)

### Define the Siamese Model

In [None]:
# Create the Siamese model
siamese_model = Siamese.siamese_model()
siamese_model.summary()

### Training

In [None]:
# Basic Info for training

# Define the loss and optimizer model
binary_cross_loss = tf.losses.BinaryCrossentropy()
opt = tf.optimizers.Adam(LEARNING_RATE)

# Create checkpoint directory
os.makedirs('training_checkpoints')
checkpoints_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoints_dir, "checkpoint")
checkpoint = tf.train.Checkpoint(opt=opt, siamese_model=siamese_model)


In [None]:
@tf.function
def train_step(batch):  
    # Record all of our operations 
    with tf.GradientTape() as tape:   

        # Get anchor and positive/negative image
        X = batch[:2]
        # Get label
        y = batch[2]
        
        # Forward pass
        y_pred = siamese_model(X, training=True)
        # Calculate loss
        loss = binary_cross_loss(y, y_pred)
        
    # Calculate gradients
    grad = tape.gradient(loss, siamese_model.trainable_variables)
    
    # Calculate updated weights and apply to siamese model
    opt.apply_gradients(zip(grad, siamese_model.trainable_variables))
    
    # Return loss
    return loss

In [None]:
# Define the train function
def train_model(train_data, epochs):

    # Loop through epochs
    for epoch in range(1, epochs+1):
        loss_per_batch = []
        print('Results for Epoch: {}'.format(epoch))
        progress_bar = tf.keras.utils.Progbar(len(train_data))


        # Loop through batches
        for idx, batch in enumerate(train_data):
            loss_b = train_step(batch)
            loss_per_batch.append(loss_b)
            progress_bar.update(idx+1)

        print('The loss for the epoch: {0} is {1}'.format(epoch, sum(loss_per_batch)/len(loss_per_batch)))
        # Save the checkpoints
        if epoch % 10 == 0:
            checkpoint.save(file_prefix=checkpoint_prefix)


In [None]:
train_model(train_data=train, epochs=EPOCHS)

### Evaluate Model

In this part we have to take into consideration the goal of the model. The most important metric for a face verification system in precision, so we are going to use precision and recall as the accuracy metrics we want to evaluate.

In [None]:
# Evaluate the model on the test data
test_input, test_val, y_true = test.as_numpy_iterator().next()

y_pred = siamese_model.predict([test_input, test_val])

y_true = [int(x) for x in y_true]
y_pred = [1 if prediction > 0.5 else 0 for prediction in y_pred]
print('{0}\n{1}'.format(y_true, y_pred))

# Create the Precision object
p = Precision()
p.update_state(y_true, y_pred)
precision = p.result().numpy()

# Create the Precision object
r = Recall()
r.update_state(y_true, y_pred)
recall = r.result().numpy()

print("Precision: {0}\nRecall: {1}".format(precision, recall))

### Save Model

In [None]:
siamese_model.save('siameseModel.h5')

### Reload Model

In [None]:
model = tf.keras.models.load_model('siameseModel.h5', custom_objects={'siamese': Siamese, 'L1Dist': L1Dist})
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])