## Data Loading

In [None]:
import shutil
shutil.unpack_archive("/content/faces.zip","/content/faces")

data_dir = "/content/faces/faces"

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt
import os

import numpy as np
import cv2 as cv
import sklearn
import pickle as pkl
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
print(tf.__version__)
print(np.__version__)
print(sklearn.__version__)

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset = "training",
    image_size = (256,256),
    batch_size = 32,
    seed =  123,
    shuffle = True,
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset = "validation",
    image_size = (256,256),
    batch_size = 32,
    seed = 123,
    shuffle = True,
)

class_names = train_ds.class_names
print(class_names)

## visualization of a Batch

In [None]:
plt.figure(figsize=(10,10))
for imgs ,labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3,3,i+1)
    plt.imshow(imgs[i].numpy().astype("int"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

## Preprocessing and augmentation

In [None]:
img_size = 256

preprocess_input = tf.keras.applications.resnet50.preprocess_input

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.2),
])

class L2NormLayer(layers.Layer):
    def __init__(self, axis=1, **kwargs):
        super().__init__(**kwargs)
        self.axis = axis

    def call(self, inputs):
        return tf.nn.l2_normalize(inputs, axis=self.axis)

    def get_config(self):
        cfg = super().get_config()
        cfg.update({"axis": self.axis})
        return cfg

In [None]:
for image, _ in train_ds.take(1):
  plt.figure(figsize=(10, 10))
  first_image = image[0]
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    augmented_image = data_augmentation(tf.expand_dims(first_image, 0))
    plt.imshow(augmented_image[0] / 255)
    plt.axis('off')

## Model Building

In [None]:
img_shape = (img_size,img_size,3)
base_model = tf.keras.applications.ResNet50(input_shape=img_shape,include_top=False,weights='imagenet')

In [None]:
image_batch, label_batch = next(iter(train_ds))
feature_batch = base_model(image_batch)
print(feature_batch.shape,image_batch.shape,label_batch.shape)

In [None]:
base_model.trainable=False
base_model.summary()

In [None]:
global_avg_layer = layers.GlobalAveragePooling2D()
feature_batch_avg = global_avg_layer(feature_batch)

print(feature_batch_avg.shape)

In [None]:
img_shape

In [None]:
# input and preprocessing in api format

inputs = layers.Input(shape=img_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)

## pretrained model

x = base_model(x ,training=False)
x = global_avg_layer(x)

# ann

x = layers.Dense(512 ,activation = 'relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256,activation = 'relu')(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(128 ,activation = 'relu')(x)

x = layers.BatchNormalization()(x)

embeddings = L2NormLayer()(x)

prediction_layer = layers.Dense(len(train_ds.class_names) ,activation='softmax')(embeddings)

embedding_model = tf.keras.Model(inputs,embeddings)
classification_model = tf.keras.Model(inputs,prediction_layer)

In [None]:
classification_model.summary()

In [None]:
lr = 0.0001

classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                             loss = tf.keras.losses.SparseCategoricalCrossentropy(),
                             metrics = ["accuracy"]
)

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = classification_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=10,
    callbacks=[early_stop]
)


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.figure(figsize=(8, 5))
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')

# fine tune

In [None]:
base_model.trainable = True
print(f"no of layers {len(base_model.layers)}")

In [None]:
st_from = 100
for layer in base_model.layers[:st_from]:
  layer.trainable = False

In [None]:
classification_model.summary()

In [None]:
classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr/10),
                             loss = tf.keras.losses.SparseCategoricalCrossentropy(),
                             metrics = ["accuracy"]
)

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = classification_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=20,
    callbacks=[early_stop]
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.figure(figsize=(8, 5))
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')

## Store Embedding

In [None]:
sorted(os.listdir(data_dir))

In [None]:
data_embedding = {}

for celeb in os.listdir(data_dir):
  temp = []
  for file_name in os.listdir(os.path.join(data_dir,celeb)):

    img = cv.imread(os.path.join(data_dir,celeb,file_name))
    if img is None:
      continue
    img = cv.cvtColor(img,cv.COLOR_BGR2RGB)

    embedding = embedding_model.predict(np.expand_dims(img,axis=0),verbose=0)
    temp.append(embedding[0])

  if temp:
   data_embedding[celeb] = np.mean(temp,axis=0)

## **Testing**

In [None]:
def find(embedding):
  mx = -1
  best_match = "aryan"
  for celeb,emb in data_embedding.items():
    sim = cosine_similarity([embedding],[emb])[0][0]
    if sim > mx:
      mx = sim
      best_match = celeb

  return mx,best_match

In [None]:
img = cv.imread("/content/188_ebfc6465.jpg")
img = cv.cvtColor(img,cv.COLOR_BGR2RGB)
embedding = embedding_model.predict(np.expand_dims(img,axis=0),verbose=0)

print(find(embedding[0]))

## Model saving

In [None]:
with open("data_embedding.pkl","wb") as f:
  pkl.dump(data_embedding,f)

In [None]:
embedding_model.save("face_embedding_model.keras")