In [None]:
import os
import torch
import numpy as np
import tensorflow as tf
import dataloader as dset
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Multiply
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import MaxPooling2D
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from data_augmenter import enrich_and_shuffle_dataset
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import GlobalAveragePooling2D, ReLU, Flatten

In [None]:
def configure_gpu():
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            print("Memory growth set successfully")
        except RuntimeError as e:
            print("Failed to set memory growth: ", e)

configure_gpu()

In [None]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'

LMDB_PATH_HOST="..\dataset\lmdb.hwr_40-1.0"
TRN_DATA="..\dataset\data\lines.filtered_max_width.tst.55.shuf"
TST_DATA="..\dataset\datalines.filtered_max_width.tst.55"

In [None]:
def create_pairs(images, labels):
    imagePairs = []
    labelPairs = []

    unique_labels = np.unique(labels)
    numclasses = len(unique_labels)
    idx = {label: np.where(labels == label)[0] for label in unique_labels}

    for ind in range(len(images)):
        currImage = images[ind]
        label = labels[ind]

        # Choose a positive pair from the same class
        if len(idx[label]) > 1:  # Check if there is more than one image in the class
            indB = np.random.choice([i for i in idx[label] if i != ind])
            indImage = images[indB]
            imagePairs.append([currImage, indImage])
            labelPairs.append([1])
        else:
            # Skip if no other images are in the same class
            continue

        # Choose a negative pair from different class
        different_classes = [l for l in unique_labels if l != label]
        if different_classes:
            diss_label = np.random.choice(different_classes)
            diss_idx = np.random.choice(idx[diss_label])
            diss_image = images[diss_idx]
            imagePairs.append([currImage, diss_image])
            labelPairs.append([0])

    return (np.array(imagePairs), np.array(labelPairs))

In [None]:
train_set = dset.DatasetFromLMDB(lmdb_path=LMDB_PATH_HOST, labels_path=TRN_DATA)

X = []
y = []

for index in range(len(train_set)):
    image, label, image_name = train_set[index]
    X.append(image)
    y.append(label.item())

X_tensor = torch.stack(X)
X = X_tensor.numpy()
X = np.transpose(X, (0, 2, 3, 1))
y = np.array(y)

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

X_train, y_train = enrich_and_shuffle_dataset(X_train, y_train)

(X_train, y_train) = create_pairs(X_train, y_train)
(X_test, y_test) = create_pairs(X_test, y_test)
(X_val, y_val) = create_pairs(X_val, y_val)

# Creating and Training the Model

In [None]:
def cosine_distance(vecs):
    (imgA, imgB) = vecs
    imgA_normalized = imgA / K.sqrt(K.maximum(K.sum(K.square(imgA), axis=1, keepdims=True), K.epsilon()))
    imgB_normalized = imgB / K.sqrt(K.maximum(K.sum(K.square(imgB), axis=1, keepdims=True), K.epsilon()))

    dot_product = K.sum(imgA_normalized * imgB_normalized, axis=1, keepdims=True)

    cosine_similarity = dot_product
    cosine_distance = 1 - cosine_similarity

    return cosine_distance

def contrastiveLoss(y, y_preds, margin=1):
    y = tf.cast(y, y_preds.dtype)
    y_preds_squared = K.square(y_preds)
    margin_squared = K.square(K.maximum(margin - y_preds, 0))
    loss = K.mean(y * y_preds_squared + (1 - y) * margin_squared)
    return loss
    
def siamese_model(input_shape, embeddingDim=48):
    vgg16_base = VGG16(include_top=False, weights='imagenet', input_shape=input_shape)

    for layer in vgg16_base.layers[:-4]:
        layer.trainable = False
    for layer in vgg16_base.layers[-4:]:
        layer.trainable = True

    inputs = Input(input_shape)
    x = vgg16_base(inputs)

    x = GlobalAveragePooling2D()(x)
    outputs = Dense(embeddingDim, activation='relu')(x) 

    model = Model(inputs, outputs)
    return model

In [None]:
image_shape = (200, 50, 3)
batch_size = 32
epochs = 25

imageA = Input(shape = image_shape) 
imageB = Input(shape = image_shape)

model_build = siamese_model(image_shape)
modelA = model_build(imageA)
modelB = model_build(imageB)

distance = Lambda(cosine_distance)([modelA, modelB])
model = Model(inputs=[imageA, imageB], outputs=distance)

In [None]:
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.compile(loss = contrastiveLoss, optimizer="adam")

history = model.fit(
    [X_train[:, 0], X_train[:, 1]], y_train[:],
    validation_data=([X_val[:, 0], X_val[:, 1]], y_val[:]),
    batch_size = batch_size,
    epochs = epochs,
    callbacks=[early_stopping_callback])

# Evaluation

In [None]:
from sklearn.metrics import roc_curve, auc

batch_size = 32
predictions = model.predict([X_test[:, 0], X_test[:, 1]], batch_size=batch_size)

similarity_scores = np.exp(-np.array(predictions))

# Compute ROC curve and ROC area
fpr, tpr, _ = roc_curve(y_test, similarity_scores)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()