In [1]:
import cv2
import os
import numpy as np
import itertools
from matplotlib import pyplot as plt

In [None]:
pip install deepface

In [2]:
import tensorflow as tf
from tensorflow import keras
from deepface import DeepFace

2024-02-16 10:12:29.400986: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
tf.config.list_physical_devices()

2024-02-16 09:27:34.036196: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2024-02-16 09:27:34.036230: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: s4m-g531gt-ubuntu
2024-02-16 09:27:34.036237: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: s4m-g531gt-ubuntu
2024-02-16 09:27:34.036330: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 520.61.5
2024-02-16 09:27:34.036349: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 520.61.5
2024-02-16 09:27:34.036354: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:309] kernel version seems to match DSO: 520.61.5


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

# 1. Dataset

In [4]:
train_path = os.path.join('dataset', 'train')
dev_path = os.path.join('dataset', 'dev')
test_path = os.path.join('dataset', 'test')

In [5]:
IMG_SIZE = 128
INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
BUFFER_SIZE = 1234
BATCH_SIZE = 256

In [6]:
def create_positive_pairs(dataset_path):
    positive_pairs = []
    for person_folder in os.listdir(dataset_path):
        images = os.listdir(os.path.join(dataset_path, person_folder))
        pairs = list(itertools.combinations(images, 2))
        positive_pairs.extend([(person_folder, pair[0], person_folder, pair[1]) for pair in pairs])
    return np.array(positive_pairs)

In [7]:
def create_negative_pairs(dataset_path, positive_pairs, len_list):
    negative_pairs = []

    for i in range(len(positive_pairs)):
        person_1, img_1, _, _ = positive_pairs[i]

        person_2 = person_1
        while person_2 == person_1:
            person_2 = str(np.random.randint(0, 104))

        img_2 = str(np.random.randint(0, len_list[int(person_2)])) + '.jpg'
        negative_pairs.append((person_1, img_1, person_2, img_2))
    return np.array(negative_pairs)

In [8]:
def load_and_preprocess_image(image_path):
    byte_img = tf.io.read_file(image_path)
    img = tf.io.decode_jpeg(byte_img)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    return img

In [9]:
def preprocess_and_create_example(person_1, img_1, person_2, img_2, dataset_path):
    image_1 = load_and_preprocess_image(os.path.join(dataset_path, person_1, img_1))
    image_2 = load_and_preprocess_image(os.path.join(dataset_path, person_2, img_2))
    
    label = int(person_1 == person_2)
    
    return ((image_1, image_2), label)

In [10]:
def create_dataset(dataset_path):
    positive = create_positive_pairs(dataset_path)
    len_dataset = 2 * len(positive)
    
    len_list = []
    for i in range(104):
        len_list.append(len(os.listdir(os.path.join(dataset_path, str(i)))))

    def generator():
        negative = create_negative_pairs(dataset_path, positive, len_list)
        data_pairs = np.concatenate((positive, negative), axis=0)
        np.random.shuffle(data_pairs)
        
        for person_1, img_1, person_2, img_2 in data_pairs:
            yield preprocess_and_create_example(person_1, img_1, person_2, img_2, dataset_path)

    output_signature = (
        (tf.TensorSpec(shape=INPUT_SHAPE, dtype=tf.float32),
        tf.TensorSpec(shape=INPUT_SHAPE, dtype=tf.float32)),
        tf.TensorSpec(shape=(), dtype=tf.float32)
    )

    tf_dataset = tf.data.Dataset.from_generator(generator, output_signature=output_signature)
    return tf_dataset, len_dataset

In [11]:
train, train_size = create_dataset(train_path)
dev, dev_size = create_dataset(dev_path)
test, test_size = create_dataset(test_path)

2024-02-13 22:28:17.912031: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-13 22:28:17.912374: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-13 22:28:17.912626: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [12]:
for x in train.take(1):
    print(x)

((<tf.Tensor: shape=(128, 128, 3), dtype=float32, numpy=
array([[[3.45098048e-01, 3.21568638e-01, 2.74509817e-01],
        [3.30514699e-01, 3.03339452e-01, 2.63572305e-01],
        [3.32873762e-01, 3.05422783e-01, 2.73131132e-01],
        ...,
        [1.27787992e-01, 7.15073496e-02, 3.66727933e-02],
        [1.45649508e-01, 7.89828449e-02, 4.70588244e-02],
        [1.52941182e-01, 8.62745121e-02, 4.70588244e-02]],

       [[3.63327205e-01, 3.39797795e-01, 2.92738974e-01],
        [3.45354378e-01, 3.18179131e-01, 2.78411984e-01],
        [3.63550037e-01, 3.36099058e-01, 3.03807408e-01],
        ...,
        [1.62565827e-01, 1.06285185e-01, 7.05961362e-02],
        [1.10529877e-01, 4.38632146e-02, 6.69830898e-03],
        [8.73161778e-02, 2.06495095e-02, 3.30882357e-03]],

       [[4.16636020e-01, 3.89644593e-01, 3.49509805e-01],
        [3.98406863e-01, 3.70988190e-01, 3.31707895e-01],
        [3.07773232e-01, 2.89491177e-01, 2.54143208e-01],
        ...,
        [2.13474885e-01, 1.614

In [13]:
print('train size:', train_size)
print('dev size:', dev_size)
print('test size:', test_size)

train size: 163144
dev size: 1686
test size: 1686


In [14]:
train = train.shuffle(BUFFER_SIZE)
dev = dev.shuffle(BUFFER_SIZE)

In [15]:
train = train.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
dev = dev.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test = test.batch(BATCH_SIZE)

In [16]:
for x, y in dev.take(1):
    print(x)

(<tf.Tensor: shape=(256, 128, 128, 3), dtype=float32, numpy=
array([[[[0.00392157, 0.0484375 , 0.01135248],
         [0.02614477, 0.06801058, 0.03663803],
         [0.05882353, 0.0994179 , 0.07715993],
         ...,
         [0.10514706, 0.15813574, 0.19093981],
         [0.03286139, 0.08013174, 0.1128482 ],
         [0.07354335, 0.10883747, 0.12844531]],

        [[0.01441483, 0.06147365, 0.02225797],
         [0.03529412, 0.07843138, 0.04705882],
         [0.04705882, 0.09019608, 0.07450981],
         ...,
         [0.08461478, 0.14100264, 0.17332494],
         [0.05161791, 0.1025983 , 0.13407809],
         [0.04193827, 0.07723239, 0.09684023]],

        [[0.01849646, 0.05771215, 0.02241803],
         [0.03783701, 0.0770527 , 0.04568015],
         [0.05490196, 0.09019608, 0.07058824],
         ...,
         [0.06408159, 0.11506198, 0.14887019],
         [0.08786765, 0.12708333, 0.16248468],
         [0.06009497, 0.09538909, 0.11499694]],

        ...,

        [[0.19088541, 0.3154258

In [21]:
file_pth = os.path.join('dataset', 'train')
result = DeepFace.verify(img1_path=os.path.join(file_pth, '98', '2.jpg'), img2_path=os.path.join(file_pth, '98', '0.jpg'))

In [30]:
result['verified']

True

In [1]:
def make_mobile_model(input_shape, embeddingDim=64):
    vgg16 = keras.applications.vgg16.VGG16(
        input_shape=input_shape,
        include_top=False,
        weights='imagenet',
    )
#     for layer in mobile_net.layers[:-30]:
#         layer.trainable = False
    vgg16.trainable = False
    flatten = keras.layers.Flatten()(mobile_net.output)
    dense = keras.layers.Dense(512, activation='relu')(flatten)
#     drop = keras.layers.Dropout(0.4)(dense)
    embed = keras.layers.Dense(embeddingDim, activation='relu')(dense)
    
    model = keras.models.Model(inputs=mobile_net.input, outputs=embed)
    return model

In [None]:
def create_siamese_model(input_shape):
    input_1 = keras.layers.Input(shape=input_shape)
    input_2 = keras.layers.Input(shape=input_shape)

    siamese_network = make_mobile_model(input_shape)
    encoded_1 = siamese_network(input_1)
    encoded_2 = siamese_network(input_2)

    distance = tf.abs(tf.subtract(encoded_1, encoded_2))

    output = keras.layers.Dense(1, activation='sigmoid')(distance)

    siamese_model = keras.models.Model(inputs=[input_1, input_2], outputs=output)

    return siamese_model

In [None]:
model = create_siamese_model(INPUT_SHAPE)
model.summary()

In [None]:
METRICS = [
    keras.metrics.BinaryAccuracy(),
    keras.metrics.F1Score(),
    keras.metrics.Precision(),
    keras.metrics.Recall()
]

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=METRICS
)

In [None]:
save_path = os.path.join('saved_model', 'model.h5')

checkpoints = keras.callbacks.ModelCheckpoint(
    save_path,
    monitor = 'val_loss',
    save_best_only = True,
    mode = 'min'
)

In [None]:
hist = model.fit(
    train,
    epochs=50,
    verbose=1,
    validation_data=dev,
    callbacks=[checkpoints]
)