# MultiTask Face Analysis Model Evaluation Notebook

In [8]:
import numpy as np, eval, matplotlib.pyplot as plt, os
import torch, torchvision, datasets
import torch.nn as nn
from torchvision.transforms import v2
from multitask.models import MultiTaskFaceAnalysisModel
from configs.train_multitask import config

In [2]:
ms1mv2 = datasets.MS1MV2()
num_classes = ms1mv2.number_of_classes()

In [3]:
model = MultiTaskFaceAnalysisModel(num_classes = num_classes, **config)

data/models/davit_t_adaface_MS1MV2_Dataset/backbone.pth
Loaded pretrained backbone from data/models/davit_t_adaface_MS1MV2_Dataset/backbone.pth.
Loaded pretrained face recognition subnet from data/models/davit_t_adaface_MS1MV2_Dataset/recognition_subnet.pth

AdaFace with the following property
self.m 0.4
self.h 0.333
self.s 64
self.t_alpha 0.99


In [4]:
model.load_state_dict(
    torch.load(os.path.join('data', 'models', 'multitask_davit_t_face_emotion_age_gender_race', 'model.pth'))
)

<All keys matched successfully>

In [5]:
face_recognition_model = nn.Sequential(
    model.backbone,
    model.face_recognition_embedding_subnet
)

In [6]:
metrics = eval.evaluate_face_recognition(face_recognition_model)

Processing dataset: LFW...
Finished processing LFW.
Processing dataset: CPLFW...
Finished processing CPLFW.
Processing dataset: CALFW...
Finished processing CALFW.
Processing dataset: CFP-FP...
Finished processing CFP-FP.
Processing dataset: CFP-FF...
Finished processing CFP-FF.
Processing dataset: AgeDB30...
Finished processing AgeDB30.
Processing dataset: VGG2FP...
Finished processing VGG2FP.


In [7]:
for key, db_metrics in metrics.items():
    accuracy, _, _, f1_score, _, _, _, _ = db_metrics
    print(f'Accuracy for {key} = {accuracy}.')
    print(f'F1 score for {key} = {f1_score}')

Accuracy for LFW = 0.9978333333333167.
F1 score for LFW = 0.9978257853705955
Accuracy for CPLFW = 0.9279999999999846.
F1 score for CPLFW = 0.9250478445250513
Accuracy for CALFW = 0.9584999999999839.
F1 score for CALFW = 0.9568860490254536
Accuracy for CFP-FP = 0.959714285714272.
F1 score for CFP-FP = 0.9583161672353014
Accuracy for CFP-FF = 0.9978571428571286.
F1 score for CFP-FF = 0.9978524170558153
Accuracy for AgeDB30 = 0.9763333333333171.
F1 score for AgeDB30 = 0.975873615691968
Accuracy for VGG2FP = 0.951599999999981.
F1 score for VGG2FP = 0.9500311586498981


In [13]:
model.to('cuda')

MultiTaskFaceAnalysisModel(
  (backbone): DaViT(
    (model): DaViT(
      (patch_embeds): ModuleList(
        (0): PatchEmbed(
          (proj): Conv2d(3, 96, kernel_size=(2, 2), stride=(2, 2))
          (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        )
        (1): PatchEmbed(
          (proj): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
          (norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        )
        (2): PatchEmbed(
          (proj): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
          (norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
        )
        (3): PatchEmbed(
          (proj): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2))
          (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
      )
      (main_blocks): ModuleList(
        (0): ModuleList(
          (0): MySequential(
            (0): SpatialBlock(
              (cpe): ModuleList(
                (0-1): 2 x ConvPosE

In [14]:
test_transform = v2.Compose([ # for testing on datasets other than face recognition.
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale = True),
    v2.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [15]:
rafdb_test = datasets.RAFDB(subset = 'test', transform = test_transform)

In [16]:
raf_test_dataloader = torch.utils.data.DataLoader(
    dataset = rafdb_test, 
    batch_size = 64,
    num_workers = 2,
)

In [17]:
emotion_recognition_accuracy, loss = eval.evaluate_emotion(model = model, dataloader = raf_test_dataloader)

                                                                   

In [18]:
emotion_recognition_accuracy

0.8888526727509778

In [19]:
utk_face_db = datasets.UTKFace(subset = 'test', transform = test_transform)
utk_face_dataloader = torch.utils.data.DataLoader(
    dataset = utk_face_db,
    batch_size = 64,
    num_workers = 2
)

In [20]:
age_mae = eval.evaluate_age(model, utk_face_dataloader)

                                                               

In [21]:
age_mae

4.375353720179083

In [22]:
gender_accuracy, loss = eval.evaluate_gender(model, utk_face_dataloader)

                                                                  

In [23]:
gender_accuracy

0.9636977627691009

In [25]:
race_accuracy, loss = eval.evalate_race(model, utk_face_dataloader, device = 'cuda')

                                                                

In [26]:
race_accuracy

0.879273955255382