# Model comparisons

In [1]:
from face_alignment import FaceAlignment, LandmarksType

from giskard_vision.landmark_detection.dataloaders.loaders import DataLoaderFFHQ, DataLoader300W
from giskard_vision.landmark_detection.dataloaders.wrappers import (
    CroppedDataLoader,
    ResizedDataLoader,
    HeadPoseDataLoader,
    EthnicityDataLoader,
)
from giskard_vision.core.dataloaders.wrappers import (
    CachedDataLoader,
    FilteredDataLoader,
    ColoredDataLoader,
    BlurredDataLoader,
)

from giskard_vision.landmark_detection.models.wrappers import OpenCVWrapper, FaceAlignmentWrapper
from giskard_vision.landmark_detection.tests.performance import NMEMean
from giskard_vision.landmark_detection.tests.base import Test, TestDiff
from giskard_vision.landmark_detection.marks.facial_parts import FacialParts

### Loading dataset

In [2]:
# dl_ref = DataLoaderFFHQ("../datasets/ffhq")
# dl_ref = DataLoader300W(dir_path="_300W_full/01_Indoor")
dl_ref = DataLoader300W(dir_path="../datasets/300W/sample")

### Loading landmark-detection models
- FaceAlignment
- OpenCV

In [3]:
models = {
    "FaceAlignment": FaceAlignmentWrapper(
        model=FaceAlignment(LandmarksType.TWO_D, device="cpu", flip_input=False, face_detector="blazeface")
    ),
    "OpenCV": OpenCVWrapper(),
}
# models.pop("FaceAlignment")  # takes a long time

loading data from : lbfmodel.yaml


In [4]:
results = []

### Case 1: Cropped Images

In [5]:
facial_parts = [FacialParts.LEFT_HALF.value, FacialParts.RIGHT_HALF.value]

for model in models.values():
    for fp in facial_parts:
        dl = CroppedDataLoader(dl_ref, part=fp)
        results.append(
            TestDiff(metric=NMEMean, threshold=1)
            .run(
                model=model,
                dataloader=dl,
                dataloader_ref=dl_ref,
                facial_part=fp,
            )
            .to_dict()
        )

FaceAlignmentWrapper: Face not detected in processed image of batch 3 and index 0.
FaceAlignmentWrapper: Face not detected in processed image of batch 5 and index 0.
  return np.nanmean(nes, axis=1)
OpenCVWrapper: Face not detected in processed image of batch 1 and index 0.
OpenCVWrapper: Face not detected in processed image of batch 5 and index 0.
OpenCVWrapper: Face not detected in processed image of batch 1 and index 0.
OpenCVWrapper: Face not detected in processed image of batch 2 and index 0.
OpenCVWrapper: Face not detected in processed image of batch 3 and index 0.
OpenCVWrapper: Face not detected in processed image of batch 4 and index 0.


### Case 2A: Resized Images

In [6]:
for model in models.values():
    dl = ResizedDataLoader(dl_ref, scales=0.5)
    results.append(
        TestDiff(metric=NMEMean, threshold=1)
        .run(
            model=model,
            dataloader=dl,
            dataloader_ref=dl_ref,
        )
        .to_dict()
    )

### Case 2B: Recolored Images

In [7]:
for model in models.values():
    dl = ColoredDataLoader(dl_ref)
    results.append(
        TestDiff(metric=NMEMean, threshold=1)
        .run(
            model=model,
            dataloader=dl,
            dataloader_ref=dl_ref,
        )
        .to_dict()
    )

### Case 2C: Blurred Images

In [8]:
for model in models.values():
    dl = BlurredDataLoader(dl_ref)
    results.append(
        TestDiff(metric=NMEMean, threshold=1)
        .run(
            model=model,
            dataloader=dl,
            dataloader_ref=dl_ref,
        )
        .to_dict()
    )

### Case 3: Head Pose

In [9]:
cached_dl = CachedDataLoader(HeadPoseDataLoader(dl_ref), cache_size=None, cache_img=False, cache_labels=False)


def positive_roll(elt):
    return elt[2].get_includes("roll") > 0


def negative_roll(elt):
    return elt[2].get_includes("roll") < 0


head_poses = [positive_roll, negative_roll]

for model in models.values():
    for hp in head_poses:
        dl = FilteredDataLoader(cached_dl, hp)
        results.append(
            TestDiff(metric=NMEMean, threshold=1)
            .run(
                model=model,
                dataloader=dl,
                dataloader_ref=dl_ref,
            )
            .to_dict()
        )

### Case 4: Ethnicity

In [10]:
ethnicity_dl = EthnicityDataLoader(dl_ref, ethnicity_map={"indian": "asian"})
cached_dl = CachedDataLoader(ethnicity_dl, cache_size=None, cache_img=False, cache_labels=False)


def white_ethnicity(elt):
    return elt[2].get_includes("ethnicity") == "white"


ethnicities = [white_ethnicity]

for model in models.values():
    for e in ethnicities:
        dl = FilteredDataLoader(cached_dl, e)
        results.append(
            TestDiff(metric=NMEMean, threshold=1)
            .run(
                model=model,
                dataloader=dl,
                dataloader_ref=dl_ref,
            )
            .to_dict()
        )

2024-01-16 14:42:22.545292: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
import pandas as pd

pd.set_option("display.max_colwidth", None)

# columns reordering
report = pd.DataFrame(results)[
    [
        "model",
        "facial_part",
        "dataloader",
        "dataloader_ref",
        "test",
        "metric",
        "metric_value",
        "threshold",
        "passed",
        "prediction_time",
        "prediction_fail_rate",
    ]
]
# report.groupby(["model"]).apply(display)  # display doesn't work in CI

Unnamed: 0,model,facial_part,dataloader,dataloader_ref,test,metric,metric_value,threshold,passed,prediction_time,prediction_fail_rate
0,FaceAlignment,left half,300W cropped on left half,300W,TestDiff,NME_mean,-0.656253,1,True,59.517539,0.564706
1,FaceAlignment,right half,300W cropped on right half,300W,TestDiff,NME_mean,-0.341428,1,True,71.563052,0.544118
4,FaceAlignment,entire face,300W resizing with ratios: 0.5,300W,TestDiff,NME_mean,-0.011428,1,True,49.050097,0.0
6,FaceAlignment,entire face,300W altered with color mode 7,300W,TestDiff,NME_mean,-0.006728,1,True,63.233154,0.0
8,FaceAlignment,entire face,300W blurred,300W,TestDiff,NME_mean,-0.009508,1,True,67.527268,0.0
10,FaceAlignment,entire face,(Cached (300W) with head-pose) filtered using 'positive_roll',300W,TestDiff,NME_mean,0.146944,1,True,38.808692,0.0
11,FaceAlignment,entire face,(Cached (300W) with head-pose) filtered using 'negative_roll',300W,TestDiff,NME_mean,-0.036736,1,True,58.33623,0.0
14,FaceAlignment,entire face,(Cached (300W) with ethnicity) filtered using 'white_ethnicity',300W,TestDiff,NME_mean,0.033779,1,True,59.879113,0.0


Unnamed: 0,model,facial_part,dataloader,dataloader_ref,test,metric,metric_value,threshold,passed,prediction_time,prediction_fail_rate
2,OpenCV,left half,300W cropped on left half,300W,TestDiff,NME_mean,-0.644057,1,True,1.20354,0.564706
3,OpenCV,right half,300W cropped on right half,300W,TestDiff,NME_mean,-0.390821,1,True,1.059936,0.726471
5,OpenCV,entire face,300W resizing with ratios: 0.5,300W,TestDiff,NME_mean,-0.079876,1,True,0.762448,0.0
7,OpenCV,entire face,300W altered with color mode 7,300W,TestDiff,NME_mean,0.001347,1,True,1.118775,0.0
9,OpenCV,entire face,300W blurred,300W,TestDiff,NME_mean,-0.103017,1,True,0.89821,0.0
12,OpenCV,entire face,(Cached (300W) with head-pose) filtered using 'positive_roll',300W,TestDiff,NME_mean,0.077927,1,True,0.524891,0.0
13,OpenCV,entire face,(Cached (300W) with head-pose) filtered using 'negative_roll',300W,TestDiff,NME_mean,-0.019482,1,True,0.905345,0.0
15,OpenCV,entire face,(Cached (300W) with ethnicity) filtered using 'white_ethnicity',300W,TestDiff,NME_mean,0.168421,1,True,0.823915,0.0
