In [1]:
#source https://github.com/HimariO/FairFace/blob/master/inference.py

In [2]:
# Два первых способа установки не родошли, только этот:
#sudo apt-get install cmake
#wget https://files.pythonhosted.org/packages/05/57/e8a8caa3c89a27f80bc78da39c423e2553f482a3705adc619176a3a24b36/dlib-19.17.0.tar.gz
#tar -xvzf dlib-19.17.0.tar.gz
#cd dlib-19.17.0/
#sudo python3 setup.py install

In [3]:
import os
import glob
import json
import fire
import random
from multiprocessing import Pool
from collections import Counter

import dlib
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import datasets, models, transforms
from tqdm.notebook import tqdm

In [4]:
repo_dir = '/media/alex/Storage/coding/HimariO/HatefulMemesChallenge'
gqa_box_anno = os.path.join(repo_dir, 'data/hateful_memes/box_annos.json')
meme_img_dir = os.path.join(repo_dir, 'data/hateful_memes/img_clean')
face_race_boxes = os.path.join(repo_dir, 'data/hateful_memes/face_race_boxes.json')
(gqa_box_anno, meme_img_dir, face_race_boxes)

('/media/alex/Storage/coding/HimariO/HatefulMemesChallenge/data/hateful_memes/box_annos.json',
 '/media/alex/Storage/coding/HimariO/HatefulMemesChallenge/data/hateful_memes/img_clean',
 '/media/alex/Storage/coding/HimariO/HatefulMemesChallenge/data/hateful_memes/face_race_boxes.json')

In [8]:
def box_coverage(box_a, box_b):
    assert (box_a[2] > 1 and box_b[2] > 1) or (box_a[2] <= 1 and box_b[2] <= 1)
    area_a = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1])
    area_b = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1])
    
    larger = box_a if area_a > area_b else box_b
    small = box_b if area_a > area_b else box_a

    w = small[2] - small[0]
    h = small[3] - small[1]
    ocr_l_to_img_r = max(min(larger[2] - small[0], w), 0)
    ocr_r_to_img_l = max(min(small[2] - larger[0], w), 0)
    cover_w = min(ocr_l_to_img_r, ocr_r_to_img_l)
    
    ocr_t_to_img_b = max(min(larger[3] - small[1], h), 0)
    ocr_b_to_img_t = max(min(small[3] - larger[1], h), 0)
    cover_h = min(ocr_t_to_img_b, ocr_b_to_img_t)
    return (cover_h * cover_w) / (w * h)

In [7]:
def converage_nms(primary_set, sec_set, indies=False, drop=True, threshold=0.4):
    keep_sec = []
    keep = []
    for j, s_box in enumerate(sec_set):
        covers = [0]
        for i, p_box in enumerate(primary_set):
            cov = box_coverage(p_box, s_box)
            covers.append(cov)
        
        if drop:
            if max(covers) < threshold:
                keep_sec.append(s_box)
                keep.append(j)
        else:
            if max(covers) >= threshold:
                keep_sec.append(s_box)
                keep.append(j)
    if indies:
        return keep_sec, keep
    else:
        return keep_sec

In [10]:
def map_race_to_person_box(img_dir, boxes_json, face_race_json, detector='oid'):
    assert detector in ['oid', 'gqa']

    person_cls = [
        'Woman',
        'Person',
        'Human body',
        'Man',
        'Girl',
        'Boy',
    ] if detector == 'oid' else [
        ''
    ]
    person_cls = [c.lower() for c in person_cls]

    with open(boxes_json, 'r') as f:
        det_boxes = json.load(f)
    with open(face_race_json, 'r') as f:
        face_det_boxes = json.load(f)
    
    match_cnt = []
    for img_boxes in tqdm(det_boxes):
        dets = img_boxes['boxes_and_score']
        
        img_name = img_boxes['img_name']
        img_path = os.path.join(img_dir, img_name)
        img = dlib.load_rgb_image(img_path)
        h, w = img.shape[:2]
        
        face_dets = face_det_boxes[img_name]
        face_box = face_dets['face_boxes']
        face_race = face_dets['face_race']
        face_gender = face_dets['face_gender']

        zip_box_size = lambda tup: (tup[0][2] - tup[0][0]) * (tup[0][3] - tup[0][1])
        sorted_by_area = sorted(
            zip(face_box, face_race, face_gender),
            key=zip_box_size,
            reverse=True)
        face_box = [tup[0] for tup in sorted_by_area]
        face_race = [tup[1] for tup in sorted_by_area]
        face_gender = [tup[2] for tup in sorted_by_area]
        
        pbox_idx = []
        for i, det in enumerate(dets):
            if det['class_name'].lower() in person_cls:
                detector_box = [
                    det['xmin'] * w, det['ymin'] * h,
                    det['xmax'] * w, det['ymax'] * h
                ]
                _, keep_idx = converage_nms(
                    [detector_box],
                    face_box,
                    indies=True,
                    drop=False,
                    threshold=0.8,
                )

                if keep_idx:
                    det['race'] = face_race[keep_idx[0]]
                    det['gender'] = face_gender[keep_idx[0]]
                    match_cnt.append(len(keep_idx))
                else:
                    det['race'] = None
                    det['gender'] = None
            else:
                det['race'] = None
                det['gender'] = None

    print('Match cnt freq: ', Counter(match_cnt))
    taged_box_anno_path = boxes_json.replace('.json', '.race.json')
    with open(taged_box_anno_path, 'w') as f:
        json.dump(det_boxes, f)

In [11]:
map_race_to_person_box(meme_img_dir, gqa_box_anno, face_race_boxes)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000.0), HTML(value='')))


Match cnt freq:  Counter({1: 17806, 2: 2801, 3: 434, 4: 76, 5: 29, 6: 3, 7: 2})


In [None]:
#Результат сохраняется в face_race_boxes.race.json

In [12]:
#Пример
#{"img_name": "01235.png", "boxes_and_score": [
#{"ymin": 0.2707633674144745, "xmin": 0.39449542760849, "ymax": 0.618645429611206, "xmax": 0.5975348353385925, "score": 0.9788097739219666, "class_name": "Human face", "class_id": 502, "race": null, "gender": null},
#{"ymin": 0.1195806935429573, "xmin": 0.050002746284008026, "ymax": 0.9922588467597961, "xmax": 0.9625630378723145, "score": 0.9190647006034851, "class_name": "Man", "class_id": 308, "race": "Middle Eastern", "gender": "Male"},
#{"ymin": 0.49269676208496094, "xmin": 0.07834305614233017, "ymax": 0.9805120229721069, "xmax": 0.9698421359062195, "score": 0.7085372805595398, "class_name": "Clothing", "class_id": 433, "race": null, "gender": null},
#{"ymin": 0.45603710412979126, "xmin": 0.020553115755319595, "ymax": 0.9691803455352783, "xmax": 0.3579131066799164, "score": 0.21738770604133606, "class_name": "Human arm", "class_id": 503, "race": null, "gender": null}]}, 