# Analysis of DeepFace Gender Detector

In [None]:
import itertools
import json
from pathlib import Path
from pprint import pprint
import shutil


import cv2 as cv
import joblib
import numpy as np
import pandas as pd
from sklearn import base, calibration, metrics, model_selection
from tqdm import tqdm

from deepface import DeepFace
from deepface.commons import functions
from deepface.extendedmodels import Gender

import matplotlib
from matplotlib import pyplot as plt

In [None]:
from saac.utils import cv_imshow, color_show, quadrant_bboxes, crop_bbox
from saac.models import IdentityClassifier, CalibratedGenderModel
from saac.process import MidJourneyProcessor

## Labeled Midjourney dataset

In [None]:
midjourney_root = Path('../../data/images/test/raw')

labels = sorted(list(map(lambda p: p.stem, midjourney_root.glob('*'))))
print(f'Labels: {labels}')

midjourney_ds = list(itertools.chain.from_iterable([zip((midjourney_root / Path(label)).glob('*.png'), itertools.repeat(idx)) for idx, label in enumerate(labels)]))

print('Midjourney Dataset:')
for idx, label in enumerate(labels):
    count = len(list(filter(lambda ex: ex[1] == idx, midjourney_ds)))
    print(f'{label}: {count}')

### Load desired gender detector

In [None]:
df_weight_cache = Path(functions.get_deepface_home()) / Path('.deepface/weights/gender_model_weights.h5')
df_weight_cache.unlink(missing_ok=True)

gender_model = Gender.loadModel()

# df_model_version = Path('../../models/gender_model_transfer.h5')
# shutil.copy(df_model_version, df_weight_cache)
# gender_model = Gender.loadModel()

### Collect predictions

In [None]:
kwargs = {
    'equalizer': True,
    'detector_backend': 'mtcnn',
    'actions': ('gender',),
    'models': { 'gender': gender_model }
}

processor = MidJourneyProcessor()

all_predictions = []

for image_path, label in tqdm(midjourney_ds):
    image = cv.imread(str(image_path))
    assert(image.shape == (256, 256, 3))
    predictions =  processor.image_predictions(image, **kwargs)
    if predictions is None:
        predictions = {}
    predictions['image'] = image_path
    predictions['label'] = label
    all_predictions.append(predictions)

In [None]:
predictions_path = Path('./analysis_deepface_default_equalized.csv')

if not predictions_path.exists():
    df = pd.json_normalize(all_predictions)
    df = df.reindex(columns=['image', 'label', 'bbox', 'gender.Woman', 'gender.Man'])
    df.to_csv(str(predictions_path), index=False)

### Analyze face detections

In [None]:
all_predictions = json.loads(pd.read_csv(str(predictions_path)).to_json(orient='records'))
all_labels = list(map(lambda p: p['label'], all_predictions))

In [None]:
predictions_detect = list(map(lambda p: 1 if p['bbox'] is not None else 0, all_predictions))
labels_detect = [1]*len(all_labels)

print(metrics.classification_report(labels_detect, predictions_detect, target_names=['no detect', 'detect']))

In [None]:
cm = metrics.confusion_matrix(labels_detect, predictions_detect)
disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['no detect', 'detect'])
disp.plot()

### Analyze gender detections

In [None]:
threshold = 0.5

predictions_gender = list(map(lambda p: (p['gender.Man']/100, 1 if p['gender.Man']/100 >= threshold else 0) if p['bbox'] else None, all_predictions))
                
labels_gender = all_labels

labels_gender, predictions_gender = zip(*list(filter(lambda p: p[0] is not None and p[1] is not None, zip(labels_gender, predictions_gender))))
labels_gender = list(labels_gender)

In [None]:
print(metrics.classification_report(labels_gender, [p[1] for p in predictions_gender], target_names=['Woman', 'Man']))

In [None]:
cm = metrics.confusion_matrix(labels_gender, [p[1] for p in predictions_gender])
disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Woman', 'Man'])
disp.plot()

In [None]:
disp = metrics.RocCurveDisplay.from_predictions(labels_gender, [p[0] for p in predictions_gender])
disp

In [None]:
plt.hist([p[0] for p in predictions_gender], bins=20)
plt.title('Gender Probabilities')

## Gender detector calibration

In [None]:
disp = calibration.CalibrationDisplay.from_predictions(labels_gender, [p[0] for p in predictions_gender], n_bins=5)
disp

In [None]:
predictions_gender_train, predictions_gender_test, labels_gender_train, labels_gender_test = model_selection.train_test_split(
    predictions_gender, labels_gender,
    test_size=0.33,
    stratify=labels_gender,
    random_state=42)

### Fit calibrated classifier

In [None]:
base_clf = IdentityClassifier(threshold=0.5)
pprint(base_clf.predict_proba(np.array([p[0] for p in predictions_gender_train]))[:10])
pprint(np.array([p[0] for p in predictions_gender_train])[:10])

In [None]:
calibrated_clf = calibration.CalibratedClassifierCV(base_estimator=base_clf, method='isotonic', cv='prefit')
calibrated_clf.fit(np.array([p[0] for p in predictions_gender_train]), np.array(labels_gender_train))

### Evaluate calibrated classifier

In [None]:
print(metrics.classification_report(labels_gender_test,
                                    calibrated_clf.predict(np.array([p[0] for p in predictions_gender_test])),
                                    target_names=['Woman', 'Man']))

In [None]:
cm = metrics.confusion_matrix(labels_gender_test, calibrated_clf.predict(np.array([p[0] for p in predictions_gender_test])))
disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Woman', 'Man'])
disp.plot()

In [None]:
disp = metrics.RocCurveDisplay.from_predictions(labels_gender_test, calibrated_clf.predict_proba(np.array([p[0] for p in predictions_gender_test]))[:, 1])
disp

In [None]:
disp = calibration.CalibrationDisplay.from_predictions(labels_gender_test, calibrated_clf.predict_proba(np.array([p[0] for p in predictions_gender_test]))[:, 1], n_bins=5)
disp

### Save model

In [None]:
calibrated_model_path = Path('../../models/gender_model_default_calibrated.joblib')

joblib.dump(calibrated_clf, calibrated_model_path)

In [None]:
calibrated_clf = joblib.load(calibrated_model_path)

### Run calibrated model on test data

In [None]:
df_default_models = {
    'age': DeepFace.build_model('Age'),
    'gender': DeepFace.build_model('Gender'),
    'emotion': DeepFace.build_model('Emotion'),
    'race': DeepFace.build_model('Race')
}

calibrated_gender_model = CalibratedGenderModel(
    gender_model=df_default_models['gender'],
    calibrator=calibrated_clf
)

In [None]:
kwargs = {
    'equalizer': True,
    'detector_backend': 'mtcnn',
    'actions': ('gender',),
    'models': { 'gender': calibrated_gender_model }
}

all_predictions = []

for image_path, label in tqdm(midjourney_ds):
    image = cv.imread(str(image_path))
    assert(image.shape == (256, 256, 3))
    predictions =  processor.image_predictions(image, **kwargs)
    if predictions is None:
        predictions = {}
    predictions['image'] = image_path
    predictions['label'] = label
    all_predictions.append(predictions)

In [None]:
predictions_path = Path('./analysis_deepface_calibrated_equalized.csv')

if not predictions_path.exists():
    df = pd.json_normalize(all_predictions)
    df = df.reindex(columns=['image', 'label', 'bbox', 'gender.Woman', 'gender.Man'])
    df.to_csv(str(predictions_path), index=False)

In [None]:
all_predictions = json.loads(pd.read_csv(str(predictions_path)).to_json(orient='records'))
all_labels = list(map(lambda p: p['label'], all_predictions))

### Evaluate calibrated model

In [None]:
threshold = 0.5

predictions_gender = list(map(lambda p: (p['gender.Man']/100, 1 if p['gender.Man']/100 >= threshold else 0) if p['bbox'] else None, all_predictions))
                
labels_gender = all_labels

labels_gender, predictions_gender = zip(*list(filter(lambda p: p[0] is not None and p[1] is not None, zip(labels_gender, predictions_gender))))
labels_gender = list(labels_gender)

In [None]:
print(metrics.classification_report(labels_gender, [p[1] for p in predictions_gender], target_names=['Woman', 'Man']))

In [None]:
cm = metrics.confusion_matrix(labels_gender, [p[1] for p in predictions_gender])
disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Woman', 'Man'])
disp.plot()

In [None]:
disp = metrics.RocCurveDisplay.from_predictions(labels_gender, [p[0] for p in predictions_gender])
disp