In [None]:
import numpy as np
import pandas
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
from tensorflow.keras import layers, Sequential
from tqdm import tqdm

In [None]:
dir_path = r"/kaggle/input/celebrity-face-image-dataset/Celebrity Faces Dataset"
images = []
nums = []
img_size = 64
for i, name in tqdm(enumerate(os.listdir(dir_path))):
    folder_path = os.path.join(dir_path, name)
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        img_array = cv2.imread(img_path)
        img_array = cv2.resize(img_array, (img_size, img_size))
        img_array = img_array[:, :, ::-1] / 255.0
        images.append(img_array)
        nums.append(i)
images = np.array(images, dtype = 'float32').reshape(-1, img_size, img_size, 3)
nums = np.array(nums, dtype = 'float32')
images.shape, nums.shape

In [None]:
class DistanceLayer(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__()
    def call(self, vec1, vec2):
        return tf.square(vec1 - vec2)
    
class SiameseNetwork(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.encoder = Sequential([
            layers.Conv2D(32, kernel_size = (3, 3), strides = 1, padding = 'same', activation = 'relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D(pool_size = (2, 2), strides = 1),
            
            layers.Conv2D(32, kernel_size = (3, 3), strides = 1, padding = 'same', activation = 'relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D(pool_size = (2, 2), strides = 1),
          
            layers.Conv2D(32, kernel_size = (3, 3), strides = 1, padding = 'same', activation = 'relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D(pool_size = (2, 2), strides = 1),
            
            layers.Flatten(),
            layers.Dense(128, activation = 'relu'),
            layers.BatchNormalization(),
            layers.Dense(16)
        ])
        self.get_distance = DistanceLayer()
        self.output_layer = layers.Dense(1, activation = 'sigmoid')
        
    def call(self, args):
        x1, x2 = args
        embedding1, embedding2 = self.encoder(x1), self.encoder(x2)
        distance = self.get_distance(embedding1, embedding2)
        out = self.output_layer(distance)
        return out

In [None]:
X1 = []
X2 = []
y = []
same = 0
not_same = 0
for i in tqdm(range(len(images) - 1)):
        X1.append(images[i])
        X2.append(images[i + 1])
        y.append(np.float32(1.0))
        
        X1.append(images[i])
        X2.append(images[len(images) - i - 1])
        y.append(np.float32(0.0))
        
X1 = np.array(X1)
X2 = np.array(X2)
y = np.array(y)
X1.shape, X2.shape, y.shape, y.sum()

In [None]:
for i in range(5):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X1[i])
    plt.xticks([])
    plt.yticks([])
    
    plt.subplot(2, 5, i + 5 + 1)
    plt.imshow(X2[i])
    plt.xticks([])
    plt.yticks([])
plt.show()

In [None]:
model = SiameseNetwork()
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
model.fit([X1, X2], y, epochs = 25)

In [None]:
y_pred = model.predict([X1, X2])
for i in range(7):
    plt.subplot(2, 7, i + 1)
    plt.imshow(X1[i])
    plt.title('Same' if y[i] == 1 else 'Not Same')
    plt.xticks([])
    plt.yticks([])
    
    plt.subplot(2, 7, i + 7 + 1)
    plt.imshow(X2[i])
    plt.title(f"{round(y_pred[i].item(), 3)}")
    plt.xticks([])
    plt.yticks([])
plt.tight_layout()
plt.show()