### Crop faces from videos by detections by `face_detections_df`

In [131]:
from fastai.vision import *
from tqdm import tqdm

In [2]:
data_path = Path("/home/ubuntu/data/")

In [3]:
video_path = data_path/"dfdc_train_part_49"

In [4]:
metadata = get_files("/home/ubuntu/data/dfdc_train_part_49/", extensions=['.json'])

In [5]:
video_files = get_files("/home/ubuntu/data/dfdc_train_part_49/", extensions=['.mp4'])

In [6]:
def read_metadata(fn):
    metadf = pd.read_json(fn).T.reset_index()
    metadf.columns = ['fname','label','split','original']
    return metadf 

In [7]:
metadf = read_metadata(metadata[0])

In [8]:
metadf.head()

Unnamed: 0,fname,label,split,original
0,dwwytkheyx.mp4,FAKE,train,nlcqykqsdp.mp4
1,bdsxxaamze.mp4,FAKE,train,xzmplldajk.mp4
2,tfceaqvefa.mp4,FAKE,train,zkksmnscsf.mp4
3,lurozpmsqd.mp4,FAKE,train,urbulrzowx.mp4
4,jvlvkijuwa.mp4,FAKE,train,wvnjcwevzo.mp4


In [9]:
metadf.groupby("label")[["fname",'original']].count()

Unnamed: 0_level_0,fname,original
label,Unnamed: 1_level_1,Unnamed: 2_level_1
FAKE,2619,2619
REAL,515,0


### face detections

In [None]:
face_detections_df = pd.read_csv(data_path/'dfdc_faces/part_49_retina_detections.csv')

In [50]:
face_detections_df.face_detections = (face_detections_df.face_detections
                                          .apply(lambda o: json.loads(o.replace("'", '"'))))

In [51]:
face_detections_df.head()

Unnamed: 0,fname,size,face_detections,n_frames,sample_freq,len_video
0,yraysfvtgv.mp4,"(1280, 720)","[{'frame_no': 0, 'detections': [[290, 268, 398...",31,10,301
1,jidkzoagws.mp4,"(1080, 1920)","[{'frame_no': 0, 'detections': [[895, 172, 101...",30,10,300
2,uowiocuqqt.mp4,"(1080, 1920)","[{'frame_no': 0, 'detections': [[845, 265, 105...",30,10,300
3,sigtxuuutc.mp4,"(1080, 1920)","[{'frame_no': 0, 'detections': [[899, 139, 104...",30,10,300
4,uzawooqxrq.mp4,"(1080, 1920)","[{'frame_no': 0, 'detections': [[818, 146, 978...",30,10,300


In [54]:
Counter(face_detections_df['size'])

Counter({'(1280, 720)': 715, '(1080, 1920)': 2234, '(1920, 1080)': 185})

### video utils

In [66]:
from decord import VideoReader
from decord import cpu
from decord.bridge import set_bridge
set_bridge('torch')

In [67]:
def get_decord_video_batch_cpu(fname, sz, freq=10, stats=None):
    "get batch tensor for inference, original for cropping and H,W of video"
    video = VideoReader(str(fname), ctx=cpu())
    t = video.get_batch(range(0, len(video), freq))
    H,W = t.shape[2:]
    if sz: t = F.interpolate(t.to(torch.float32), (sz,sz)).to(device)
    if stats is not None: t -= stats
    return (t, (H, W))

In [115]:
def convert_bboxes(bboxes, H, W, sz):
    "rescale bbox prediction to original image sz"
    res = []
    for bb in bboxes:
        h_scale, w_scale = H/sz, W/sz
        orig_bboxes = (bb*array([w_scale, h_scale, w_scale, h_scale])[None, ...]).astype(int)
        res.append(orig_bboxes)
    return res

In [18]:
def rescale_bbox(bb, bb_scale, H,W):
    "rescale a bbox: (left, top, right, bottom) with a given scale parameter"
    left, top, right, bottom = bb
    
    cx,cy = (top + bottom)//2, (left + right)//2 
    h,w = (bottom - top), (right - left)
    sh, sw = int(h*bb_scale), int(w*bb_scale)

    stop, sbottom = cx - sh//2, cx + sh//2
    sleft, sright = cy - sw//2, cy + sw//2
    stop, sleft, sbottom, sright = max(0, stop), max(0, sleft), min(H, sbottom), min(W, sright)    
    return (sleft, stop, sright, sbottom)

### save cropped faces

In [124]:
# dir to save images for all videos
crop_path = data_path/"cropped_faces"/video_path.name
os.makedirs(crop_path, exist_ok=True); crop_path

PosixPath('/home/ubuntu/data/cropped_faces/dfdc_train_part_49')

In [129]:
video_path.ls()[:3]

[PosixPath('/home/ubuntu/data/dfdc_train_part_49/yraysfvtgv.mp4'),
 PosixPath('/home/ubuntu/data/dfdc_train_part_49/jidkzoagws.mp4'),
 PosixPath('/home/ubuntu/data/dfdc_train_part_49/uowiocuqqt.mp4')]

In [126]:
def save(self, fn:PathOrStr, mult=False):
    "Save the image to `fn`."
    x = image2np(self.data*255 if mult else self.data).astype(np.uint8)
    PIL.Image.fromarray(x).save(fn)
Image.save = save # monkey patch

def crop_and_save(path:PathOrStr, fname:PathOrStr, crop_path:PathOrStr, bboxes:List[List[List]],
                  freq, total_frames):
    """
    path: directory which has the video with fname
    fname: filename of video "xxxxx.mp4"
    crop_path: destination directory to save cropped images for video
    bboxes: list of bbox coordinates for each sampled frame for video
    freq: sample frequence used in bbox detection
    total_frames: total number of frames sampled from video during detection    
    """
    
    # read sampled raw video
    t, (H, W) = get_decord_video_batch_cpu(path/fname, None, freq, None)
    # create directory to save crops
    video_dir = crop_path/Path(fname).stem
    os.makedirs(video_dir, exist_ok=True)
    # check if # of face detections are same as # of sampled frames
    assert len(bboxes) == t.shape[0]
    
    for frame_no, (_frame, _bb) in enumerate(zip(t, bboxes)):
        # don't try cropping if no detection is available for the frame
        try: _bb[0] 
        except: continue
        # naive: get first bbox, optionally rescale
        left, top, right, bottom  = rescale_bbox(_bb[0], 1.3, H, W) 
        # crop and save
        face_crop = Image(_frame[:, top:bottom, left:right])
        # save with frame index (start from 1) and sequence length (total available frames)
        save_path = video_dir/f"frame_{frame_no+1}_{total_frames}.jpg"
        face_crop.save(save_path)

In [132]:
for _, row in tqdm(face_detections_df.iterrows()):
    fname = row['fname']
    face_detections = row['face_detections']
    bboxes = [o['detections'] for o in face_detections]
    freq = row['sample_freq']
    total_frames = row['n_frames']
    crop_and_save(video_path, fname, crop_path, bboxes, freq, total_frames)

3134it [37:16,  1.40it/s]
