Import necessary libraries
and Mount Google Drive to access data

In [None]:
from google.colab import drive
drive.mount('/content/drive')
# Import the required libraries
import math
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.datasets import ImageFolder
from torch.utils.data.dataset import Subset
import torch.nn.functional as F
from PIL import Image
import zipfile
import requests
from io import BytesIO
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Layer, Flatten, Dense, Dropout, Input
from tensorflow.keras.applications import ResNet50V2
from sklearn.metrics import accuracy_score
from keras import regularizers


MessageError: ignored

Extract data which is in zip file from google drive and store in data folder

In [None]:
# Extract zip file
# Install the gdown library if not already installed
!pip install gdown

# Import required libraries
import os
import zipfile
import gdown

# Google Drive file ID from the sharing link
file_id = '1gwIcChfW5Zbe-Qw0Rqi2vOVWyQc2PAY1'

# URL to download the file from Google Drive
url = f'https://drive.google.com/uc?id={file_id}'

# Target directory for extraction
target_dir = '/content/data'  # Replace with your desired target directory

# Create the target directory if it doesn't exist
os.makedirs(target_dir, exist_ok=True)

# Get the file name from the URL
file_name = os.path.basename(url)

# Construct the file path
zip_file_path = os.path.join(target_dir, file_name)

# Download the file using gdown
gdown.download(url, zip_file_path, quiet=False)

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(target_dir)

# Delete the downloaded zip file
os.remove(zip_file_path)

print('File extracted successfully.')


Data Preprocessing

In [None]:
# Define transformations for data preprocessing
transform = transforms.Compose([
    transforms.Resize((56, 56)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

Load Dataset

In [None]:
# Load dataset from extracted folder
data_path = '/content/data/lfw-deepfunneled'  # Replace with the extracted data path
dataset = ImageFolder(root=data_path, transform=transform)

**L2** **Normalization**

In [None]:

# Define the l2_norm function
def l2_norm(input, axis=1):
    norm = torch.norm(input, 2, axis, keepdim=True)
    output = torch.div(input, norm)
    return output

**SphereFace loss Function**

In [None]:
# Define the SphereFace head
class SphereFace(nn.Module):
    def __init__(self, embedding_size=512, classnum=2, m=4, s=64.0):
        super(SphereFace, self).__init__()
        self.classnum = classnum
        self.kernel = nn.Parameter(torch.Tensor(embedding_size, classnum))

        # Initialize the kernel
        self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
        self.m = m
        self.s = s

    def forward(self, embeddings, labels):
        kernel_norm = l2_norm(self.kernel, axis=0)
        cosine = torch.mm(embeddings, kernel_norm)

        index_mask = torch.zeros_like(cosine)
        index_mask.scatter_(1, labels.view(-1, 1), 1)

        theta = torch.acos(torch.clamp(cosine, -1 + 1e-7, 1 - 1e-7))
        sphereface_logits = self.s * (torch.cos(self.m * theta) - cosine) * index_mask + cosine

        return sphereface_logits




**CosFace loss Function**


In [None]:
# Define the CosFace head
class CosFace(nn.Module):
    def __init__(self, embedding_size=512, classnum=2, m=0.35, s=64.0):
        super(CosFace, self).__init__()
        self.classnum = classnum
        self.kernel = nn.Parameter(torch.Tensor(embedding_size, classnum))

        # Initialize the kernel
        self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
        self.m = m
        self.s = s

    def forward(self, embeddings, labels):
        kernel_norm = l2_norm(self.kernel, axis=0)
        cosine = torch.mm(embeddings, kernel_norm)

        index_mask = torch.zeros_like(cosine)
        index_mask.scatter_(1, labels.view(-1, 1), 1)

        cosface_logits = cosine - self.m * index_mask
        cosface_logits *= self.s

        return cosface_logits

**ArcFace Loss Function**

In [None]:
# Define the ArcFace head
class ArcFace(nn.Module):
    def __init__(self, embedding_size=512, classnum=2, m=0.5, s=64.0):
        super(ArcFace, self).__init__()
        self.classnum = classnum
        self.kernel = nn.Parameter(torch.Tensor(embedding_size, classnum))

        # Initial kernel
        self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
        self.m = m
        self.eps = 1e-3
        self.s = s

    def forward(self, embeddings, labels):
        kernel_norm = l2_norm(self.kernel, axis=0)
        cosine = torch.mm(embeddings, kernel_norm)
        cosine = cosine.clamp(-1 + self.eps, 1 - self.eps)  # For stability

        index_mask = torch.zeros_like(cosine)
        index_mask.scatter_(1, labels.view(-1, 1), 1)

        theta = torch.acos(cosine)
        margin_theta = theta + self.m
        margin_cosine = torch.cos(margin_theta)

        arcface_logits = self.s * (margin_cosine - cosine) * index_mask + cosine

        return arcface_logits

**AdaFace Loss Function**

In [None]:
# Define the AdaFace head
class AdaFace(nn.Module):
    def __init__(self, embedding_size=512, classnum=2, m=0.4, h=0.333, s=64., t_alpha=1.0):
        super(AdaFace, self).__init__()
        self.classnum = classnum
        self.kernel = nn.Parameter(torch.Tensor(embedding_size, classnum))

        # Initial kernel
        self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
        self.m = m
        self.eps = 1e-3
        self.h = h
        self.s = s

        # EMA prep
        self.t_alpha = t_alpha
        self.register_buffer('t', torch.zeros(1))
        self.register_buffer('batch_mean', torch.ones(1) * (20))
        self.register_buffer('batch_std', torch.ones(1) * 100)

        print('\nAdaFace with the following property')
        print('self.m', self.m)
        print('self.h', self.h)
        print('self.s', self.s)
        print('self.t_alpha', self.t_alpha)

    def forward(self, embeddings, norms, label):
        kernel_norm = l2_norm(self.kernel, axis=0)
        cosine = torch.mm(embeddings, kernel_norm)
        cosine = cosine.clamp(-1 + self.eps, 1 - self.eps)  # For stability

        safe_norms = torch.clip(norms, min=0.001, max=100)  # For stability
        safe_norms = safe_norms.clone().detach()

        # Update batchmean batchstd
        with torch.no_grad():
            mean = safe_norms.mean().detach()
            std = safe_norms.std().detach()
            self.batch_mean = mean * self.t_alpha + (1 - self.t_alpha) * self.batch_mean
            self.batch_std = std * self.t_alpha + (1 - self.t_alpha) * self.batch_std

        margin_scaler = (safe_norms - self.batch_mean) / (self.batch_std + self.eps)  # 66% between -1, 1
        margin_scaler = margin_scaler * self.h  # 68% between -0.333 ,0.333 when h:0.333
        margin_scaler = torch.clip(margin_scaler, -1, 1)

        # G_angular
        m_arc = torch.zeros(label.size()[0], cosine.size()[1], device=cosine.device)
        m_arc.scatter_(1, label.reshape(-1, 1), 1.0)
        g_angular = self.m * margin_scaler * -1
        m_arc = m_arc * g_angular
        theta = cosine.acos()
        theta_m = torch.clip(theta + m_arc, min=self.eps, max=math.pi - self.eps)
        cosine = theta_m.cos()

        # G_additive
        m_cos = torch.zeros(label.size()[0], cosine.size()[1], device=cosine.device)
        m_cos.scatter_(1, label.reshape(-1, 1), 1.0)
        g_add = self.m + (self.m * margin_scaler)
        m_cos = m_cos * g_add
        cosine = cosine - m_cos

        # Scale
        scaled_cosine_m = cosine * self.s
        return scaled_cosine_m


**CNN Model**

In [None]:
# Define the CNN model
class SimpleCNN(nn.Module):
    def __init__(self, embedding_size=512, num_classes=2):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc = nn.Linear(64 * 14 * 14, embedding_size)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [None]:
# Define the training loop function
def train_model(model, optimizer, dataloader, criterion, device):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)

        # Calculate norms and normalized embedding
        norms = torch.norm(outputs, 2, dim=1, keepdim=True)
        normalized_embedding = outputs / norms

        # Calculate SphereFace, CosFace, or AdaFace logits
        if isinstance(criterion, ArcFace):
            logits = criterion(normalized_embedding, labels)
        elif isinstance(criterion, AdaFace):
            logits = criterion(normalized_embedding, norms, labels)
        elif isinstance(criterion, CosFace):
            logits = criterion(normalized_embedding, labels)
        elif isinstance(criterion, SphereFace):
            logits = criterion(normalized_embedding, labels)
        else:
            raise ValueError("Invalid criterion")

        loss = nn.CrossEntropyLoss()(logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = logits.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    average_loss = total_loss / len(dataloader)
    accuracy = 100.0 * correct / total

    return average_loss, accuracy


In [None]:
# Set random seed for reproducibility
torch.manual_seed(0)
# Split dataset into train, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, remaining_dataset = random_split(dataset, [train_size, len(dataset) - train_size])
val_dataset, test_dataset = random_split(remaining_dataset, [val_size, test_size])

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Get the actual number of classes from the dataset
classnum = len(dataset.classes)
print(classnum)

In [None]:
embedding_size = 512

In [None]:
# Instantiate CosFace,SphereFace,ArcFace and AdaFace
sphereface = SphereFace(embedding_size=embedding_size, classnum=classnum, m=4, s=64.0)
cosface = CosFace(embedding_size=embedding_size, classnum=classnum, m=0.35, s=64.0)
arcface = ArcFace(embedding_size=embedding_size, classnum=classnum, m=0.5, s=64.0)
adaface = AdaFace(embedding_size=embedding_size, classnum=classnum, m=0.4, h=0.333, s=64., t_alpha=0.01)

# Instantiate the CNN model and optimizer
model = SimpleCNN(embedding_size=embedding_size, num_classes=classnum)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Move models and optimizer to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sphereface.to(device)
cosface.to(device)
arcface.to(device)
adaface.to(device)
model.to(device)

In [None]:
# Training loop
num_epochs = 40
for epoch in range(num_epochs):
    arcface_loss, arcface_accuracy = train_model(model, optimizer, train_loader, arcface, device)
    adaface_loss, adaface_accuracy = train_model(model, optimizer, train_loader, adaface, device)
    cosface_loss, cosface_accuracy = train_model(model, optimizer, train_loader, cosface, device)
    sphereface_loss, sphereface_accuracy = train_model(model, optimizer, train_loader, sphereface, device)
    print(f'Epoch [{epoch + 1}/{num_epochs}]')

    #print(f'SphereFace Loss: {sphereface_loss:.4f} | SphereFace Accuracy: {sphereface_accuracy:.2f}%')
    print(f'CosFace Loss: {cosface_loss:.4f} | CosFace Accuracy: {cosface_accuracy:.2f}%')
    print(f'ArcFace Loss: {arcface_loss:.4f} | ArcFace Accuracy: {arcface_accuracy:.2f}%')
    print(f'AdaFace Loss: {adaface_loss:.4f} | AdaFace Accuracy: {adaface_accuracy:.2f}%')

    print()


In [None]:
def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Calculate norms and normalized embedding
            norms = torch.norm(outputs, 2, dim=1, keepdim=True)
            normalized_embedding = outputs / norms

            # Calculate logits based on the criterion
            if isinstance(criterion, ArcFace):
                logits = criterion(normalized_embedding, labels)
            elif isinstance(criterion, AdaFace):
                logits = criterion(normalized_embedding, norms, labels)
            elif isinstance(criterion, CosFace):
                logits = criterion(normalized_embedding, labels)
            elif isinstance(criterion, SphereFace):
                logits = criterion(normalized_embedding, labels)
            else:
                raise ValueError("Invalid criterion")

            _, predicted = logits.max(1)
            total_correct += predicted.eq(labels).sum().item()
            total_samples += labels.size(0)

    accuracy = 100.0 * total_correct / total_samples
    return accuracy


In [None]:
# Evaluate on validation set
sphereface_val_accuracy = evaluate_model(model, val_loader, sphereface, device)
cosface_val_accuracy = evaluate_model(model, val_loader, cosface, device)
arcface_val_accuracy = evaluate_model(model, val_loader, arcface, device)
adaface_val_accuracy = evaluate_model(model, val_loader, adaface, device)
#print(f'Validation Accuracy - SphereFace: {sphereface_val_accuracy:.2f}%')
print(f'Validation Accuracy - CosFace: {cosface_val_accuracy:.2f}%')
print(f'Validation Accuracy - ArcFace: {arcface_val_accuracy:.2f}%')
print(f'Validation Accuracy - AdaFace: {adaface_val_accuracy:.2f}%')



In [None]:
# Evaluate on test set
sphereface_test_accuracy = evaluate_model(model, test_loader, sphereface, device)
cosface_test_accuracy = evaluate_model(model, test_loader, cosface, device)
arcface_test_accuracy = evaluate_model(model, test_loader, arcface, device)
adaface_test_accuracy = evaluate_model(model, test_loader, adaface, device)

#print(f'Test Accuracy - SphereFace: {sphereface_test_accuracy:.2f}%')
print(f'Test Accuracy - CosFace: {cosface_test_accuracy:.2f}%')
print(f'Test Accuracy - ArcFace: {arcface_test_accuracy:.2f}%')
print(f'Test Accuracy - AdaFace: {adaface_test_accuracy:.2f}%')

In [None]:
# # Import necessary libraries
# from sklearn.model_selection import ParameterGrid
# import copy

# # Define the hyperparameter grid
# hyperparameters = {
#     'loss_function': ['AdaFace', 'CosFace',  'ArcFace'],
#     'embedding_size': [128, 256, 512],
#     'm_values': [0.3, 0.4, 0.5],
#     's_values': [32.0, 64.0, 128.0],
#     'h_values': [0.2, 0.333, 0.5],
#     't_alpha_values': [0.01, 0.1, 0.5],
#     'lr': [0.001, 0.0001]
# }

# # Create a list to store the results
# results = []

# # Iterate through the hyperparameter grid
# for params in ParameterGrid(hyperparameters):
#     loss_function = params['loss_function']
#     embedding_size = params['embedding_size']

#     # Instantiate the appropriate loss function
#     if loss_function == 'AdaFace':
#         loss_fn = AdaFace(embedding_size=embedding_size, classnum=classnum,
#                           m=params['m_values'], h=params['h_values'],
#                           s=params['s_values'], t_alpha=params['t_alpha_values'])
#     elif loss_function == 'CosFace':
#         loss_fn = CosFace(embedding_size=embedding_size, classnum=classnum,
#                           m=params['m_values'], s=params['s_values'])
#     elif loss_function == 'ArcFace':
#         loss_fn = ArcFace(embedding_size=embedding_size, classnum=classnum,
#                           m=params['m_values'], s=params['s_values'])
#     else:
#         raise ValueError("Invalid loss function")

#     # Create a new instance of the model for each set of hyperparameters
#     model = SimpleCNN(embedding_size=embedding_size, num_classes=classnum)
#     optimizer = optim.Adam(model.parameters(), lr=params['lr'])

#     # Move the model and loss function to the device
#     model.to(device)
#     loss_fn.to(device)

#     # Training loop
#     num_epochs = 10
#     for epoch in range(num_epochs):
#         train_loss, train_accuracy = train_model(model, optimizer, train_loader, loss_fn, device)

#     # Evaluate on the validation set
#     val_accuracy = evaluate_model(model, val_loader, loss_fn, device)

#     # Store the results
#     result = copy.deepcopy(params)
#     result['val_accuracy'] = val_accuracy
#     results.append(result)

# # Print the results
# for result in results:
#     print(result)


In [None]:

# Load and preprocess the dataset (modify the data_dir accordingly)
data_dir = '/content/data/lfw-deepfunneled'
X = []
Y = []

for class_name in os.listdir(data_dir):
    class_path = os.path.join(data_dir, class_name)
    if os.path.isdir(class_path):
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            image = tf.keras.utils.load_img(image_path, target_size=(224, 224))
            X.append(np.array(image))
            Y.append(class_name)

label_encoder = LabelEncoder()
Y_labelEnc = label_encoder.fit_transform(Y)
Y_onehot = tf.keras.utils.to_categorical(Y_labelEnc, num_classes=len(label_encoder.classes_))

X_train, X_test, Y_train, Y_test = train_test_split(X, Y_onehot, test_size=0.25, random_state=104, shuffle=True)


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.layers import Flatten, Dense, Dropout
from sklearn.metrics import accuracy_score
import numpy as np

# Define the AdaFace loss function
class AdaFaceLoss(tf.keras.losses.Loss):
    def __init__(self, m=0.4, h=0.333, s=64.0, t_alpha=1.0, **kwargs):
        super(AdaFaceLoss, self).__init__(**kwargs)
        self.m = m
        self.h = h
        self.s = s
        self.t_alpha = t_alpha

    def call(self, y_true, y_pred):
        # Remove L2 normalization step
        logits = tf.matmul(y_pred, tf.transpose(y_pred))

        norms = tf.norm(y_pred, axis=1)
        batch_mean = tf.reduce_mean(norms)
        batch_std = tf.math.reduce_std(norms)
        margin_scaler = (norms - batch_mean) / (batch_std + 1e-6) * self.h
        margin_scaler = tf.clip_by_value(margin_scaler, -1.0, 1.0)

        theta = tf.acos(tf.clip_by_value(logits, -1.0 + 1e-6, 1.0 - 1e-6))
        target_theta = theta + (-self.m * margin_scaler)
        target_logits = self.s * tf.cos(target_theta)

        logits = self.s * logits
        logits_softmax = tf.nn.softmax(logits, axis=-1)
        target_logits_softmax = tf.nn.softmax(target_logits, axis=-1)

        adaface_loss = tf.keras.losses.CategoricalCrossentropy()(logits_softmax, target_logits_softmax)
        return adaface_loss

class ArcFaceLoss(tf.keras.losses.Loss):
    def __init__(self, m=0.5, s=64.0, **kwargs):
        super(ArcFaceLoss, self).__init__(**kwargs)
        self.m = m
        self.s = s

    def call(self, y_true, y_pred):
        # Remove L2 normalization step
        cosine_similarity = y_pred

        theta = tf.acos(tf.clip_by_value(cosine_similarity, -1.0 + 1e-6, 1.0 - 1e-6))
        target_theta = theta + self.m
        target_logits = self.s * tf.cos(target_theta)

        logits = self.s * cosine_similarity
        logits_softmax = tf.nn.softmax(logits, axis=-1)
        target_logits_softmax = tf.nn.softmax(target_logits, axis=-1)

        arcface_loss = tf.keras.losses.CategoricalCrossentropy()(logits_softmax, target_logits_softmax)
        return arcface_loss



# Load and preprocess your data (X_train, Y_train, X_test, Y_test, label_encoder, etc.)

# Define the full face recognition model using ResNet50V2
base_model = ResNet50V2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
flatten_layer = Flatten()(base_model.output)
dense_layer1 = Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(flatten_layer)
dropout_layer1 = Dropout(0.5)(dense_layer1)
dense_layer2 = Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(dropout_layer1)
dropout_layer2 = Dropout(0.5)(dense_layer2)
output_layer = Dense(len(label_encoder.classes_), activation='softmax')(dropout_layer2)

model = tf.keras.Model(inputs=base_model.input, outputs=output_layer)

# Compile and train the model with the ArcFace loss function
model.compile(loss=ArcFaceLoss(), optimizer='adam', metrics=['accuracy'])
model.fit(np.array(X_train), np.array(Y_train), batch_size=32, epochs=5, validation_split=0.1)

# Evaluate the model with ArcFace loss
yhat = model.predict(np.array(X_test))
yhat_indices = np.argmax(yhat, axis=1)
yhat_labels = label_encoder.inverse_transform(yhat_indices)
Y_test_indices = np.argmax(Y_test, axis=1)
Y_test_labels = label_encoder.inverse_transform(Y_test_indices)
acc_arcface = accuracy_score(Y_test_labels, yhat_labels)
print('Accuracy with ArcFace: %.3f' % acc_arcface)

# Compile and train the model with the AdaFace loss function
model.compile(loss=AdaFaceLoss(), optimizer='adam', metrics=['accuracy'])
model.fit(np.array(X_train), np.array(Y_train), batch_size=32, epochs=5, validation_split=0.1)

# Evaluate the model with AdaFace loss
yhat = model.predict(np.array(X_test))
yhat_indices = np.argmax(yhat, axis=1)
yhat_labels = label_encoder.inverse_transform(yhat_indices)
Y_test_indices = np.argmax(Y_test, axis=1)
Y_test_labels = label_encoder.inverse_transform(Y_test_indices)
acc_adaface = accuracy_score(Y_test_labels, yhat_labels)
print('Accuracy with AdaFace: %.3f' % acc_adaface)

# Compare the performance of AdaFace and ArcFace
print('Accuracy comparison:')
print('AdaFace: %.3f' % acc_adaface)
print('ArcFace: %.3f' % acc_arcface)

# ... (rest of your code)

