# FaceNet的实现

## 参考 https://github.com/tamerthamoqa/facenet-pytorch-vggface2

## 修改部分：

1. 修复了变成ipynb后的bug。

2. 减少了无用的plot，并改善plot的画质。

3. 修正了loss的计算。

In [None]:
#基本的引入，按字母表排序
import argparse
import numpy as np
from numpy import random
import os
import random
#from random import shuffle
import sys
sys.path.append(r'dataloaders')
#torch部分的import
import torch
from torch.autograd import Function
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.distance import PairwiseDistance
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms

from tqdm import tqdm

#从py文件中引入functions
from dataloaders.LFWDataset import LFWDataset
from dataloaders.triplet_loss_dataloader import TripletFaceDataset
from validate_on_LFW import evaluate_lfw
from plot import plot_roc_lfw, plot_accuracy_lfw, plot_triplet_losses


# 设置随机数种子

In [None]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
# 设置随机数种子
setup_seed(20)

# 设立args

In [None]:
def config():
    parser = argparse.ArgumentParser(description="Training a FaceNet facial recognition model using Triplet Loss.")
    # Dataset
    parser.add_argument('--dataroot', '-d', type=str, default="VGG",
                        help="(REQUIRED) Absolute path to the dataset folder"
                        )
    # LFW
    parser.add_argument('--lfw', type=str, default="Cropped_images",
                        help="(REQUIRED) Absolute path to the labeled faces in the wild dataset folder"
                        )
    parser.add_argument('--dataset_csv', type=str, default='datasets/lfw_train.csv',
                        help="Path to the csv file containing the image paths of the training dataset."
                        )
    parser.add_argument('--lfw_batch_size', default=64, type=int,
                        help="Batch size for LFW dataset (default: 64)"
                        )
    parser.add_argument('--lfw_validation_epoch_interval', default=1, type=int,
                        help="Perform LFW validation every n epoch interval (default: every 1 epoch)"
                        )
    # Training settings
    parser.add_argument('--model', type=str, default="resnet18", choices=["resnet18","resnet34"],
        help="The required model architecture for training: (“resnet18”,'resnet34'), (default: 'resnet34')"
                        )
    parser.add_argument('--epochs', default=10, type=int,
                        help="Required training epochs (default: 30)"
                        )
    parser.add_argument('--training_triplets_path', default=None, type=str,
        help="Path to training triplets numpy file in 'datasets/' folder to skip training triplet generation step."
                        )
    parser.add_argument('--num_triplets_train', default=10000, type=int,
                        help="Number of triplets for training (default: 1100000)"
                        )
    parser.add_argument('--resume_path', default='',  type=str,
        help='path to latest model checkpoint: (Model_training_checkpoints/model_resnet34_epoch_0.pt file) (default: None)'
                        )
    parser.add_argument('--batch_size', default=64, type=int,
                        help="Batch size (default: 64)"
                        )
    parser.add_argument('--num_workers', default=0, type=int,
                        help="Number of workers for data loaders (default: 0)"
                        #不为零就会报错，原因是shared memory不够，而sharedmemory提高需要sudo权限。据我搜集的信息，目前没有其他解决办法
                        )
    parser.add_argument('--embedding_dim', default=256, type=int,
                        help="Dimension of the embedding vector (default: 128)"
                        )
    parser.add_argument('--pretrained', default=True, type=bool,
                        help="Download a model pretrained on the ImageNet dataset (Default: False)"
                        )
    parser.add_argument('--optimizer', type=str, default="adam", choices=["sgd", "adagrad", "rmsprop", "adam"],
        help="Required optimizer for training the model: ('sgd','adagrad','rmsprop','adam'), (default: 'adam')"
                        )
    parser.add_argument('--lr', default=0.1, type=float,
                        help="Learning rate for the optimizer (default: 0.1)"
                        )
    parser.add_argument('--margin', default=0.5, type=float,
                        help='margin for triplet loss (default: 0.5)'
                        )
    args = parser.parse_args(args=[])
    return args

args=config()

# 一些辅助functions

In [None]:
def set_model_architecture(model_architecture, pretrained, embedding_dimension):
    if model_architecture == "resnet18":
        model = Resnet18Triplet(
            embedding_dimension=embedding_dimension,
            pretrained=pretrained
        )
    elif model_architecture == "resnet34":
        model = Resnet34Triplet(
            embedding_dimension=embedding_dimension,
            pretrained=pretrained
        )
    print("Using {} model architecture.".format(model_architecture))

    return model

In [None]:
def set_model_gpu_mode(model):
    flag_train_gpu = torch.cuda.is_available()
    flag_train_multi_gpu = False

    if flag_train_gpu and torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        model.cuda()
        flag_train_multi_gpu = True
        print('Using multi-gpu training.')

    elif flag_train_gpu and torch.cuda.device_count() == 1:
        model.cuda()
        print('Using single-gpu training.')
    return model, flag_train_multi_gpu

In [None]:
def set_optimizer(optimizer, model, learning_rate):
    if optimizer == "sgd":
        optimizer_model = torch.optim.SGD(model.parameters(), lr=learning_rate)

    elif optimizer == "adagrad":
        optimizer_model = torch.optim.Adagrad(model.parameters(), lr=learning_rate)

    elif optimizer == "rmsprop":
        optimizer_model = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

    elif optimizer == "adam":
        optimizer_model = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, amsgrad=True)

    return optimizer_model


# Model-Resnet18

In [None]:
class Resnet18Triplet(nn.Module):
    """Constructs a ResNet-18 model for FaceNet training using triplet loss.
    Args:
        embedding_dimension (int): Required dimension of the resulting embedding layer that is outputted by the model.
                                   using triplet loss. Defaults to 128.
        pretrained (bool): If True, returns a model pre-trained on the ImageNet dataset from a PyTorch repository.
                           Defaults to False.
    """

    def __init__(self, embedding_dimension=128, pretrained=False):
        super(Resnet18Triplet, self).__init__()
        self.model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=pretrained)
        input_features_fc_layer = self.model.fc.in_features
        # Output embedding
        self.model.fc = nn.Linear(input_features_fc_layer, embedding_dimension)

    def l2_norm(self, input):
        """Perform l2 normalization operation on an input vector.
        code copied from liorshk's repository: https://github.com/liorshk/facenet_pytorch/blob/master/model.py
        """
        input_size = input.size()
        buffer = torch.pow(input, 2)
        normp = torch.sum(buffer, 1).add_(1e-10)
        norm = torch.sqrt(normp)
        _output = torch.div(input, norm.view(-1, 1).expand_as(input))
        output = _output.view(input_size)

        return output

    def forward(self, images):
        """Forward pass to output the embedding vector (feature vector) after l2-normalization and multiplication
        by scalar (alpha)."""
        embedding = self.model(images)
        embedding = self.l2_norm(embedding)
        # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf
        #   Equation 9: number of classes in VGGFace2 dataset = 9131
        #   lower bound on alpha = 5, multiply alpha by 2; alpha = 10
        alpha = 10
        embedding = embedding * alpha

        return embedding

# Model-Resnet34

In [None]:
class Resnet34Triplet(nn.Module):
    """Constructs a ResNet-34 model for FaceNet training using triplet loss.
    Args:
        embedding_dimension (int): Required dimension of the resulting embedding layer that is outputted by the model.
                                   using triplet loss. Defaults to 128.
        pretrained (bool): If True, returns a model pre-trained on the ImageNet dataset from a PyTorch repository.
                           Defaults to False.
    """

    def __init__(self, embedding_dimension=128, pretrained=False):
        super(Resnet34Triplet, self).__init__()
        #从pytorch官方下载resnet34
        self.model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet34', pretrained=pretrained)
        input_features_fc_layer = self.model.fc.in_features
        # Output embedding
        self.model.fc = nn.Linear(input_features_fc_layer, embedding_dimension)

    def l2_norm(self, input):
        """Perform l2 normalization operation on an input vector.
        code copied from liorshk's repository: https://github.com/liorshk/facenet_pytorch/blob/master/model.py
        """
        input_size = input.size()
        buffer = torch.pow(input, 2)
        normp = torch.sum(buffer, 1).add_(1e-10)
        norm = torch.sqrt(normp)
        _output = torch.div(input, norm.view(-1, 1).expand_as(input))
        output = _output.view(input_size)

        return output

    def forward(self, images):
        """Forward pass to output the embedding vector (feature vector) after l2-normalization and multiplication
        by scalar (alpha)."""
        embedding = self.model(images)
        embedding = self.l2_norm(embedding)
        # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf
        #   Equation 9: number of classes in VGGFace2 dataset = 9131
        #   lower bound on alpha = 5, multiply alpha by 2; alpha = 10
        alpha = 10
        embedding = embedding * alpha

        return embedding

### 可以利用已经训练好的参数

In [None]:
# checkpoint = torch.load('model_resnet34_triplet.pt')
# model = Resnet34Triplet(embedding_dimension=checkpoint['embedding_dimension'])
# model.load_state_dict(checkpoint['model_state_dict'])
# best_distance_threshold = checkpoint['best_distance_threshold']

# TripletLoss

## 这里只是写出了，后面训练的时候其实使用的是pytorch官方提供的tripletLoss

In [None]:
class TripletLoss(Function):

    def __init__(self, margin):
        super(TripletLoss, self).__init__()
        self.margin = margin
        self.pdist = PairwiseDistance(2)

    def forward(self, anchor, positive, negative):
        pos_dist = self.pdist.forward(anchor, positive)
        neg_dist = self.pdist.forward(anchor, negative)

        hinge_dist = torch.clamp(self.margin + pos_dist - neg_dist, min=0.0)
        loss = torch.mean(hinge_dist)

        return loss

# Train

In [None]:
def train_triplet(start_epoch, end_epoch, epochs, train_dataloader, lfw_dataloader, lfw_validation_epoch_interval,
                  model, model_architecture, optimizer_model, embedding_dimension, batch_size, margin,
                  flag_train_multi_gpu):

    for epoch in range(start_epoch, end_epoch):
        triplet_loss_sum=[]
        flag_validate_lfw = (epoch + 1) % lfw_validation_epoch_interval == 0 or (epoch + 1) % epochs == 0
        num_valid_training_triplets = 0
        l2_distance = PairwiseDistance(2).cuda()
        distances, labels = [], []
        # Training pass
        model.train()
        for batch_idx, (batch_sample) in enumerate(tqdm(train_dataloader)):

            anc_img = batch_sample['anc_img'].cuda()
            pos_img = batch_sample['pos_img'].cuda()
            neg_img = batch_sample['neg_img'].cuda()

            # Forward pass - compute embeddings
            anc_embedding, pos_embedding, neg_embedding = model(anc_img), model(pos_img), model(neg_img)

            # Forward pass - choose hard negatives only for training
            pos_dist = l2_distance.forward(anc_embedding, pos_embedding)
            neg_dist = l2_distance.forward(anc_embedding, neg_embedding)

            all = (neg_dist - pos_dist < margin).cpu().numpy().flatten()

            hard_triplets = np.where(all == 1)
            if len(hard_triplets[0]) == 0:
                continue

            anc_hard_embedding = anc_embedding[hard_triplets].cuda()
            pos_hard_embedding = pos_embedding[hard_triplets].cuda()
            neg_hard_embedding = neg_embedding[hard_triplets].cuda()

            # Calculate triplet loss
            triplet_loss = nn.TripletMarginLoss(margin=margin, p=2)
            output= triplet_loss(
                anchor=anc_hard_embedding,
                positive=pos_hard_embedding,
                negative=neg_hard_embedding
            ).cuda()

            # Calculating loss
            triplet_loss_sum.append(output.item())
            num_valid_training_triplets += len(anc_hard_embedding)

            # Backward pass
            optimizer_model.zero_grad()
            output.backward()
            optimizer_model.step()
            
            #检测accuracy
            dists = l2_distance(anc_embedding,neg_embedding)
            distances.append(dists.data.cpu().numpy())
            labels.append(np.zeros(dists.size(0)))

            dists = l2_distance(anc_embedding,pos_embedding)
            distances.append(dists.data.cpu().numpy())
            labels.append(np.ones(dists.size(0)))

        labels = np.array([sublabel for label in labels for sublabel in label])
        distances = np.array([subdist for distance in distances for subdist in distance])

        true_positive_rate, false_positive_rate, precision, recall, accuracy, roc_auc, best_distances, \
        tar, far = evaluate_lfw(
            distances=distances,
            labels=labels
        )
        
        # Model only trains on hard negative triplets
        avg_triplet_loss = 0 if (num_valid_training_triplets == 0) else np.mean(triplet_loss_sum)

        # Print training statistics and add to log
        print(num_valid_training_triplets)
        print('Epoch {}:\tAverage Triplet Loss: {:.4f}\t Training Accuracy: {:.4f}+-{:.4f}'.format(
                epoch + 1,
                avg_triplet_loss,
                np.mean(accuracy),
                np.std(accuracy)
            )
        ) 
        print("    Number of valid training triplets in epoch: {}".format(num_valid_training_triplets))
        with open('logs/{}_log_triplet.txt'.format(model_architecture), 'a') as f:
            val_list = [
                epoch + 1,
                avg_triplet_loss,
                num_valid_training_triplets
            ]
            log = '\t'.join(str(value) for value in val_list)
            f.writelines(log + '\n')

        # Evaluation pass on LFW dataset
        if flag_validate_lfw:
            best_distances = validate_lfw(
                model=model,
                lfw_dataloader=lfw_dataloader,
                model_architecture=model_architecture,
                epoch=epoch,
                epochs=epochs
            )

        # Save model checkpoint
        state = {
            'epoch': epoch + 1,
            'embedding_dimension': embedding_dimension,
            'batch_size_training': batch_size,
            'model_state_dict': model.state_dict(),
            'model_architecture': model_architecture,
            'optimizer_model_state_dict': optimizer_model.state_dict()
        }

        # For storing data parallel model's state dictionary without 'module' parameter
        if flag_train_multi_gpu:
            state['model_state_dict'] = model.module.state_dict()

        # For storing best euclidean distance threshold during LFW validation
        if flag_validate_lfw:
            state['best_distance_threshold'] = np.mean(best_distances)

        # Save model checkpoint
        torch.save(state, 'Model_training_checkpoints/model_{}_triplet_epoch_{}.pt'.format(
                model_architecture,
                epoch + 1
            )
        )

# 在LFW数据集上validate测试

In [None]:
def validate_lfw(model, lfw_dataloader, model_architecture, epoch, epochs):
    model.eval()
    with torch.no_grad():
        l2_distance = PairwiseDistance(2).cuda()
        distances, labels = [], []

        print("Validating on LFW! ...")
        progress_bar = enumerate(tqdm(lfw_dataloader))

        for batch_index, (data_a, data_b, label) in progress_bar:
            data_a, data_b, label = data_a.cuda(), data_b.cuda(), label.cuda()

            output_a, output_b = model(data_a), model(data_b)
            distance = l2_distance.forward(output_a, output_b)  # Euclidean distance

            distances.append(distance.cpu().detach().numpy())
            labels.append(label.cpu().detach().numpy())

        labels = np.array([sublabel for label in labels for sublabel in label])
        distances = np.array([subdist for distance in distances for subdist in distance])

        true_positive_rate, false_positive_rate, precision, recall, accuracy, roc_auc, best_distances, \
        tar, far = evaluate_lfw(
            distances=distances,
            labels=labels
        )
        # Print statistics and add to log
        print("Accuracy on LFW: {:.4f}+-{:.4f}\tPrecision {:.4f}+-{:.4f}\tRecall {:.4f}+-{:.4f}\t"
              "ROC Area Under Curve: {:.4f}\tBest distance threshold: {:.2f}+-{:.2f}\t"
              "TAR: {:.4f}+-{:.4f} @ FAR: {:.4f}".format(
                np.mean(accuracy),
                np.std(accuracy),
                np.mean(precision),
                np.std(precision),
                np.mean(recall),
                np.std(recall),
                roc_auc,
                np.mean(best_distances),
                np.std(best_distances),
                np.mean(tar),
                np.std(tar),
                np.mean(far)
            )
        )
        with open('logs/lfw_{}_log_triplet.txt'.format(model_architecture), 'a') as f:
            val_list = [
                epoch + 1,
                np.mean(accuracy),
                np.std(accuracy),
                np.mean(precision),
                np.std(precision),
                np.mean(recall),
                np.std(recall),
                roc_auc,
                np.mean(best_distances),
                np.std(best_distances),
                np.mean(tar)
            ]
            log = '\t'.join(str(value) for value in val_list)
            f.writelines(log + '\n')
    try:
        # Plot ROC curve
        plot_roc_lfw(
            false_positive_rate=false_positive_rate,
            true_positive_rate=true_positive_rate,
            figure_name="plots/roc_plots/roc_{}_epoch_{}_triplet.png".format(model_architecture, epoch + 1)
        )
    except Exception as e:
        print(e)
        
    return best_distances

# 设立dataloaders

In [None]:
dataroot = args.dataroot
lfw_dataroot = args.lfw
dataset_csv = args.dataset_csv
lfw_batch_size = args.lfw_batch_size
lfw_validation_epoch_interval = args.lfw_validation_epoch_interval
num_triplets_train = args.num_triplets_train
model_architecture = args.model
epochs = args.epochs
embedding_dimension = args.embedding_dim
pretrained = args.pretrained
optimizer = args.optimizer
learning_rate = args.lr
margin = args.margin
start_epoch = 0
resume_path = args.resume_path

# training_triplets_path = args.training_triplets_path
#我已经生成了training_triplets_1100000.npy
training_triplets_path ="datasets/training_triplets_10000.npy"

batch_size = args.batch_size
num_workers = args.num_workers
num_triplets_train = args.num_triplets_train


# Define image data pre-processing transforms
#   ToTensor() normalizes pixel values between [0, 1]
#   Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) normalizes pixel values between [-1, 1]
data_transforms = transforms.Compose([
    transforms.Resize([160, 160]),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
# Size 160x160 RGB image
lfw_transforms = transforms.Compose([
    transforms.Resize([160,160]),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

#Set dataloaders
lfw_train_dataset=TripletFaceDataset(
        root_dir=dataroot,
        csv_name=dataset_csv,
        num_triplets=num_triplets_train,
        training_triplets_path=training_triplets_path,
        transform=data_transforms
    )



train_dataloader = DataLoader(
    lfw_train_dataset,
    batch_size=batch_size,
    num_workers=num_workers,
    shuffle=True
)

lfw_valid_dataset=LFWDataset(
        dir=lfw_dataroot,
        pairs_path='datasets/LFW_pairs.txt',
        #pairs_path='txt/pairsDevTest.txt',
        transform=lfw_transforms
    )
lfw_dataloader = DataLoader(
    lfw_valid_dataset,
    batch_size=lfw_batch_size,
    num_workers=num_workers,
    shuffle=False
)

# 初始化model，设定 optimizer，learning_rate

In [None]:
# Instantiate model
model = set_model_architecture(
    model_architecture=model_architecture,
    pretrained=pretrained,
    embedding_dimension=embedding_dimension
)

# # 利用pretrain的model

# checkpoint = torch.load('model_resnet34_triplet.pt')
# model = Resnet34Triplet(embedding_dimension=checkpoint['embedding_dimension'])
# model.load_state_dict(checkpoint['model_state_dict'])
# best_distance_threshold = checkpoint['best_distance_threshold']



# Load model to GPU or multiple GPUs if available
model, flag_train_multi_gpu = set_model_gpu_mode(model)

# Set optimizer
optimizer_model = set_optimizer(
    optimizer=optimizer,
    model=model,
    learning_rate=learning_rate
)

# Resume from a model checkpoint
if resume_path:
    if os.path.isfile(resume_path):
        print("Loading checkpoint {} ...".format(resume_path))

        checkpoint = torch.load(resume_path)
        start_epoch = checkpoint['epoch']

        # In order to load state dict for optimizers correctly, model has to be loaded to gpu first
        if flag_train_multi_gpu:
            model.module.load_state_dict(checkpoint['model_state_dict'])
        else:
            model.load_state_dict(checkpoint['model_state_dict'])

        optimizer_model.load_state_dict(checkpoint['optimizer_model_state_dict'])

        print("Checkpoint loaded: start epoch from checkpoint = {}\nRunning for {} epochs.\n".format(
                start_epoch,
                epochs - start_epoch
            )
        )
    else:
        print("WARNING: No checkpoint found at {}!\nTraining from scratch.".format(resume_path))

# 开始训练

In [None]:
# Start Training loop
print("Training using triplet loss on {} triplets starting for {} epochs:\n".format(
        num_triplets_train,
        epochs - start_epoch
    )
)

start_epoch = start_epoch
end_epoch = start_epoch + epochs
# Start training model using Triplet Loss
train_triplet(
    start_epoch=start_epoch,
    end_epoch=end_epoch,
    epochs=epochs,
    train_dataloader=train_dataloader,
    lfw_dataloader=lfw_dataloader,
    lfw_validation_epoch_interval=lfw_validation_epoch_interval,
    model=model,
    model_architecture=model_architecture,
    optimizer_model=optimizer_model,
    embedding_dimension=embedding_dimension,
    batch_size=batch_size,
    margin=margin,
    flag_train_multi_gpu=flag_train_multi_gpu
)

# 可视化训练结果

## Triple Loss

### Triple loss走高的原因可能是hard_anchor 数量减小了？？？？？？

In [None]:
# Plot Triplet losses plot
plot_triplet_losses(
    log_dir="logs/{}_log_triplet.txt".format(model_architecture),
    epochs=epochs,
    figure_name="plots/triplet_losses_{}.png".format(model_architecture)
)

## 可视化 我的model 在 LFW accuracy的表现

In [None]:
# Plot LFW accuracies plot
plot_accuracy_lfw(
    log_dir="logs/lfw_{}_log_triplet.txt".format(model_architecture),
    epochs=epochs,
    figure_name="plots/lfw_accuracies_{}_triplet.png".format(model_architecture)
)