### cli command for face detection with RetinaFace

In [1]:
# default_exp face_detection.generate_detections

In [2]:
#export
from fastscript import call_parse, Param
from fastai.imports import *
from tqdm import tqdm

In [3]:
#export
from dfdc.core.video_core import *
from dfdc.face_detection.bbox_utils import *
from dfdc.face_detection.retinaface import *
from fastai.data_block import get_files

### `generate_face_detections()`

In [4]:
#export
@call_parse
def generate_face_detections(video_directory:Param("Directory of videos", type=str), 
                            dest_fname:Param("Destination to save the detection results dataframe", type=str), 
                            freq:Param("Sample frequency for reading videos", type=int), 
                            modelname:Param("Detection model backbone", type=str),
                            confidence_threshold:Param("confidence_threshold", type=float) = 0.5,
                            top_k:Param("top_k before nms", type=int) = 5,
                            nms_threshold:Param("nms_threshold", type=float) = 0.5,
                            keep_top_k:Param("keep_top_k after nms", type=int) = 5):
    
    # retinaface model arguments
    model_args = dict(confidence_threshold = confidence_threshold, 
                      top_k = top_k,
                      nms_threshold = nms_threshold,
                      keep_top_k = keep_top_k)
    
    # load model
    model, cfg = get_model(modelname)

    # get all video files under dir
    video_files = get_files(video_directory, extensions=['.mp4'])

    # get face detections
    res = []
    for fname in tqdm(video_files):
        # get face detections per video
        sz = cfg['image_size']
        t, t_raw, (H,W), len_video = get_decord_video_batch_cpu(fname, freq, sz, retinaface_stats)
        bboxes, landmarks = predict(model, t, sz, cfg, **model_args)
        orig_bboxes = bboxes_to_original_scale(bboxes, H, W, sz)
        orig_landmarks = landmarks_to_original_scale(landmarks, H, W, sz)
        orig_bboxes = [o.tolist() for o in orig_bboxes]
        orig_landmarks = [o.tolist() for o in orig_landmarks]

        # generate structured output
        video_res = {}
        video_res["fname"] = fname.name
        video_res["size"] = (H, W)
        video_res["face_detections"] = [{"frame_no":frame_no, "detections":detections, "landmarks":landmarks}
                                            for frame_no, detections, landmarks in 
                                            zip(range(0, len_video, freq), orig_bboxes, orig_landmarks)]
        video_res["n_frames"] = t.shape[0]
        video_res["sample_freq"] = freq
        video_res["len_video"] = len_video
        res.append(video_res)
    
    # save results
    df = pd.DataFrame(res)
    df.to_csv(dest_fname, index=False)
    return df

### Test command

In [5]:
modelname = "mobilenet"
video_directory = "/home/ubuntu/data/dfdc/dfdc_train/dfdc_train_part_48/"
dest_fname = "/home/ubuntu/data/dfdc/dfdc_face_detections/part_48_retina_detections.csv"
freq = 10
model_args = dict(confidence_threshold = 0.5, top_k = 5, nms_threshold = 0.5, keep_top_k = 5)

In [None]:
df = generate_face_detections(video_directory, dest_fname, freq, "mobilenet")

### Visualize Results

In [6]:
df = pd.read_csv(dest_fname)
df.face_detections = (df.face_detections.apply(lambda o: json.loads(o.replace("'", '"'))))

In [7]:
df['face_detections'][0]

[{'frame_no': 0, 'detections': [[809, 211, 943, 317]]},
 {'frame_no': 10, 'detections': [[808, 211, 943, 317]]},
 {'frame_no': 20, 'detections': []},
 {'frame_no': 30, 'detections': [[796, 208, 907, 297]]},
 {'frame_no': 40, 'detections': []},
 {'frame_no': 50, 'detections': []},
 {'frame_no': 60, 'detections': []},
 {'frame_no': 70, 'detections': []},
 {'frame_no': 80, 'detections': [[815, 221, 934, 314]]},
 {'frame_no': 90, 'detections': []},
 {'frame_no': 100, 'detections': [[815, 219, 932, 310]]},
 {'frame_no': 110, 'detections': [[807, 213, 942, 319]]},
 {'frame_no': 120, 'detections': [[812, 207, 936, 305]]},
 {'frame_no': 130, 'detections': []},
 {'frame_no': 140, 'detections': [[784, 220, 926, 331]]},
 {'frame_no': 150, 'detections': []},
 {'frame_no': 160, 'detections': []},
 {'frame_no': 170, 'detections': []},
 {'frame_no': 180, 'detections': []},
 {'frame_no': 190, 'detections': [[818, 209, 970, 324]]},
 {'frame_no': 200, 'detections': [[797, 208, 948, 324]]},
 {'frame_no':

### export

In [8]:
from nbdev.export import notebook2script

In [9]:
notebook2script()

Converted 001 - extract_faces.ipynb.
Converted 002 - face_detection_retinaface.ipynb.
Converted 003 - save_face_crops.ipynb.
Converted 004 - tl_baseline.ipynb.
Converted 00_core.ipynb.
Converted 01_video_core.ipynb.
Converted 10_bbox_utils.ipynb.
Converted 11_retinaface_detection.ipynb.
Converted 12_generate_face_detections.ipynb.
Converted 13_save_cropped_faces.ipynb.
Converted 21_baseline_model.ipynb.
Converted index.ipynb.
