In [None]:
# do it for one person first and then check it for multiple persons 

In [30]:
import cv2
import numpy as np
import random
import os
from matplotlib import pyplot as plt

In [31]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten
import tensorflow as tf

In [32]:
POS_PATH = os.path.join('data', 'positive')
NEG_PATH = os.path.join('data', 'negative')
ANC_PATH = os.path.join('data', 'anchor')

In [None]:
os.makedirs(POS_PATH)
os.makedirs(NEG_PATH)
os.makedirs(ANC_PATH)

In [None]:
!tar -xf lfw.tgz

In [None]:
for directory in os.listdir('lfw'):
    for file in os.listdir(os.path.join('lfw', directory)):
        EX_PATH = os.path.join('lfw', directory, file)
        NEW_PATH = os.path.join(NEG_PATH, file)
        os.replace(EX_PATH, NEW_PATH)
        

In [35]:
anchor = tf.data.Dataset.list_files(ANC_PATH+'/*.jpg').take(300)
positive = tf.data.Dataset.list_files(POS_PATH+'/*.jpg').take(300)
negative = tf.data.Dataset.list_files(NEG_PATH+'/*.jpg').take(300)

In [58]:
def preprocess(file_path):
    # reads the image
    byte_img = tf.io.read_file(file_path)
    # loads the image
    img = tf.io.decode_jpeg(byte_img)
    # resizing/ preprocessing the image
    img = tf.image.resize(img, (100, 100))
    img  = img/255.0
    return img

In [46]:
tf.ones_like(1)

<tf.Tensor: shape=(), dtype=int32, numpy=1>

In [47]:
# the zip method create tuples containing anchor, positive or negative , 1 or 0 based on the previous value
positives = tf.data.Dataset.zip((anchor, positive , tf.data.Dataset.from_tensor_slices(tf.ones(len(anchor)))))
negatives = tf.data.Dataset.zip((anchor, negative, tf.data.Dataset.from_tensor_slices(tf.zeros(len(anchor)))))

In [48]:
def preprocess_twin(input_img, validation_img, label):
    return (preprocess(input_img), preprocess(validation_img), label)

In [68]:
data = positives.concatenate(negatives)
samples = data.as_numpy_iterator()
example = samples.next()
# tuple of three values
example

(b'data/anchor/31ed2ce7-5b00-11ee-b510-41c5767b88c9.jpg',
 b'data/positive/d40c8c53-5b00-11ee-b510-41c5767b88c9.jpg',
 1.0)

In [None]:
res = preprocess_twin(*example)
plt.imshow(res[1])
res[2]

In [69]:
data = data.map(preprocess_twin)
# caches data into the memory after it is preprocessed
data = data.cache()
data = data.shuffle(buffer_size=1024)

In [70]:
# takes 70% of the data
train_data = data.take(round(len(data)*.7))
# creates batches of 16 i.e 1 batch of data contains 16 elements
train_data = train_data.batch(16)
# it overlaps data loading and model training techniques makes them work simultaniously
train_data = train_data.prefetch(8)

In [74]:
train_samples = train_data.as_numpy_iterator()
len(train_samples.next()[0])

16

In [81]:
test_data = data.skip(round(len(data)*.7))
test_data = test_data.take(round(len(data)*.3))
test_data = test_data.batch(16)
test_data = test_data.prefetch(8)

In [86]:
def make_embedding():
    inp =  Input(shape= (100, 100,3), name='input_image')\

    # first block
    c1 = Conv2D(64, (10, 10), activation = 'relu')(inp)
    m1 =  MaxPooling2D(64, (2, 2), padding='same')(c1)

    # second block
    c2 = Conv2D(128, (7, 7), activation = 'relu')(m1)
    m2 = MaxPooling2D(64, (2, 2), padding='same')(c2)

    # third block
    c3 = Conv2D(128, (4, 4), activation='relu')(m2)
    m3 = MaxPooling2D(64, (2, 2), padding='same')(c3)
    
    # final embedding block
    c4 = Conv2D(256, (4, 4), activation='relu')(m3)
    f1 = Flatten()(c4)
    d1 = Dense(4096, activation='sigmoid')(f1)

    return Model(inputs=[inp], outputs=[d1] , name='embedding')

In [95]:
embedding = make_embedding()
embedding.summary()

Model: "embedding"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_image (InputLayer)    [(None, 100, 100, 3)]     0         
                                                                 
 conv2d_16 (Conv2D)          (None, 91, 91, 64)        19264     
                                                                 
 max_pooling2d_12 (MaxPooli  (None, 46, 46, 64)        0         
 ng2D)                                                           
                                                                 
 conv2d_17 (Conv2D)          (None, 40, 40, 128)       401536    
                                                                 
 max_pooling2d_13 (MaxPooli  (None, 20, 20, 128)       0         
 ng2D)                                                           
                                                                 
 conv2d_18 (Conv2D)          (None, 17, 17, 128)       26

In [90]:
class L1Dist(Layer):
    def __init__(self, **kwargs):
        super().__init__()
    
    
    def call(self, input_embedding, validation_embedding):
        return tf.math.abs(input_embedding - validation_embedding)

In [96]:
input_image = Input(name = 'input_img', shape = (100, 100, 3))
validation_image = Input(name= 'validation_img', shape=(100, 100, 3))
inp_embedding = embedding(input_image)
val_embedding = embedding(validation_image)

In [97]:
inp_embedding

<KerasTensor: shape=(None, 4096) dtype=float32 (created by layer 'embedding')>

In [93]:
def make_siamese_model():
    # anchor image input in the network
    input_image = Input(name = 'input_img', shape = (100, 100, 3))

    # validation image in the network
    validation_image = Input(name= 'validation_img', shape=(100, 100, 3))

    # combine siamese distance components
    siamese_layer = L1Dist()
    siamese_layer._name = 'distance'
    distances = siamese_layer(embedding(input_image), embedding(validation_image))