In [46]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import os
from PIL import Image
import numpy as np
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model

**Model**

In [47]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, Model

def create_model(batch_size,epochs,X_train_pairs,y_train_pairs,X_val_pairs,y_val_pairs):

    img_A_inp = Input((64, 64), name='img_A_inp')
    img_B_inp = Input((64, 64), name='img_B_inp')

    def get_cnn_block(depth):
        return Sequential([
            Conv2D(depth, 3, 1),
            BatchNormalization(),
            ReLU()
        ])

    DEPTH = 64
    cnn = Sequential([
        Reshape((64, 64, 1)),
        get_cnn_block(DEPTH),
        get_cnn_block(DEPTH * 2),
        get_cnn_block(DEPTH * 4),
        GlobalAveragePooling2D(),
        Dense(64, activation='relu')
    ])

    feature_vector_A = cnn(img_A_inp)
    feature_vector_B = cnn(img_B_inp)

    concat = Concatenate()([feature_vector_A, feature_vector_B])

    dense = Dense(64, activation='relu')(concat)
    dropout = Dropout(0.5)(dense)  # Add dropout regularization
    output = Dense(1, activation='sigmoid')(dropout)

    model = Model(inputs=[img_A_inp, img_B_inp], outputs=output)

    model.summary()

    es = EarlyStopping(patience=3)

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(x=[X_train_pairs[:, 0, :, :], X_train_pairs[:, 1, :, :]],
          y=y_train_pairs,
          validation_data=([X_val_pairs[:, 0, :, :],
                            X_val_pairs[:, 1, :, :]],
                           y_val_pairs),
          epochs=epochs,
          batch_size=batch_size,
          callbacks=[es])


    return model

**function to create paired images dataset and corresponding label**

In [48]:
import itertools

def make_paired_dataset(X, y):
  X_pairs, y_pairs = [], []

  tuples = [(x1, y1) for x1, y1 in zip(X, y)]

  for t in itertools.product(tuples, tuples):
    pair_A, pair_B = t
    img_A, label_A = pair_A
    img_B, label_B = pair_B

    new_label = int(label_A == label_B)

    X_pairs.append([img_A, img_B])
    y_pairs.append(new_label)

  X_pairs = np.array(X_pairs)
  y_pairs = np.array(y_pairs)

  # Reshape X_pairs to match the desired shape (40000, 2, 64, 64)
  # X_pairs = np.array(X_pairs.tolist()).reshape(-1, 2, 64, 64)

  return X_pairs, y_pairs

**Creating training and validation dataset**

In [49]:
import os
import random
import pandas as pd

def createDataset(train_data_dir,size):

    # Set the path to your dataset directory
    dataset_path = train_data_dir

    # Get a list of all folders (person IDs) in the dataset
    person_folders = os.listdir(dataset_path)

    # Initialize an empty list to store the dataset rows
    dataset = []

    for i in range(0,size):

        # Randomly select two folders
        selected_folders = random.sample(person_folders, 5)

        # print(selected_folders)

    # Iterate over each selected folder
    # for folder in selected_folders:
        # Get the list of image filenames in the current folder
        image_files_1 = os.listdir(os.path.join(dataset_path, selected_folders[0]))

        # image_files_2 = os.listdir(os.path.join(dataset_path, selected_folders[0]))

        # Randomly select two images from the current folder
        # print(image_files_1)
        image_1 = random.sample(image_files_1,3)
        # image_2 = random.sample(image_files_2, 1)
        # print(selected_folders)

        image = cv2.imread(dataset_path+'/'+selected_folders[0]+'/'+image_1[0], cv2.IMREAD_GRAYSCALE)

        reshaped_image=cv2.resize(image, (64,64),interpolation=cv2.INTER_LANCZOS4)

        # Create a row for the dataset with image paths and label
        row = {
            "img_1": reshaped_image.reshape(64,64),

            "label": dataset_path+'/'+selected_folders[0]+'/'+image_1[0]
            }

        # Append the row to the dataset
        dataset.append(row)

    # Convert the dataset list to a Pandas DataFrame
    df = pd.DataFrame(dataset)

    # Save the dataset to a CSV file
    # df.to_csv("train_dataset_real.csv", index=False)
    return df


**Training the model**

In [50]:
def train_model(batch_size, epochs,dataset_path,test_size,val_size):

    df_train=createDataset(dataset_path,test_size)
    df_val=createDataset(dataset_path,val_size)

    X_train_pairs, y_train_pairs = make_paired_dataset(df_train.iloc[:,0],df_train.iloc[:,1])
    X_val_pairs, y_val_pairs = make_paired_dataset(df_val.iloc[:,0],df_val.iloc[:,1])

    model=create_model(batch_size,epochs,X_train_pairs,y_train_pairs,X_val_pairs,y_val_pairs)

    return model



**Function to validate the model**

In [51]:
def validate_model(model,val_path,validation_images_file):
    val=pd.read_csv(val_path)
    rows,columns=val.shape

    dataset=[]
    values=[]
    labels=[]
    pred=[]


    for row in range(rows):
        path1=val.iloc[row,0]
        path2=val.iloc[row,1]
        img1=cv2.imread(validation_images_file+'/'+path1,cv2.IMREAD_GRAYSCALE)
        img2=cv2.imread(validation_images_file+'/'+path2,cv2.IMREAD_GRAYSCALE)

        imgA=cv2.resize(img1,(64,64),interpolation=cv2.INTER_LANCZOS4)
        imgB=cv2.resize(img2,(64,64),interpolation=cv2.INTER_LANCZOS4)
        value=model.predict([imgA.reshape((1, 64,64)),imgB.reshape((1, 64,64))]).flatten()[0]
        label=val.iloc[row,2]
        values.append(value)

        labels.append(label)
        if(label>0.0001):
          pred.append(1)
        else:
          pred.append(0)
        # Create a row for the dataset with image paths and label
        row = {
            "img1_name": path1,

            'img2_name':path2,

            "label": pred[row],

            'proba':values[row]


            }

        # Append the row to the dataset
        dataset.append(row)

    # Convert the dataset list to a Pandas DataFrame
    result = pd.DataFrame(dataset)

    # Save the dataset to a CSV file
    result.to_csv("result.csv", index=False)

    return result

In [53]:
def create_new_model(batch_size,epochs,train_dataset_path,validation_csv_path,validation_images_file,train_size,val_size):
  model=train_model(batch_size, epochs,train_dataset_path,train_size,val_size)
  model.save('saved_model')
  validation_results=validate_model(model,validation_csv_path,validation_images_file)

In [54]:
# model.save('saved_model')
# validation_results=validate_model(model,'/content/gdrive/MyDrive/dataset/val.csv')