## load the necessary packages and define the functions

In [38]:
import random
from glob import glob
import os
import json
from imutils.video import FileVideoStream
import cv2
from PIL import Image, ImageFont, ImageDraw
import numpy as np

def yield_images_from_video(video_path):
    v_cap = FileVideoStream(video_path).start()
    v_len = int(v_cap.stream.get(cv2.CAP_PROP_FRAME_COUNT))

    for j in range(v_len):
        # get video frame
        frame = v_cap.read()

        if frame is not None:
            img_RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            yield img_RGB


## load a random vid and its prediction made by `cltl-facedetection`

In [47]:

random_vid = random.choice(glob('/home/tk/datasets/MELD/MELD.Raw/train/train_splits/*.mp4'))

prediction_ = '/home/tk/datasets/MELD/visual-features/box-probs-coarse-landmarks/train/' + random_vid.split('/')[-1].replace('.mp4', '.json')

with open(prediction_, 'r') as stream:
    pred = json.load(stream)


images_all = []

for img in yield_images_from_video(random_vid):
    images_all.append(img)

print(f'number of frames in the vid: {len(images_all)}')
print(f'number of frames in the boxes: {len(pred["boxes"])}')
print(f'number of frames in the frames: {len(pred["probs"])}')
print(f'number of frames in the landmarks: {len(pred["landmarks"])}')

number of frames in the vid: 56
number of frames in the boxes: 56
number of frames in the frames: 56
number of frames in the landmarks: 56


## Let's annotate them frame by frame

In [48]:
for (idx, img), boxes, probs, landmarks in \
        zip(enumerate(images_all), pred['boxes'].values(), pred['probs'].values(), pred['landmarks'].values()):
    # print(len(boxes), len(probs), len(landmarks))

    for i, (box, prob, landmark) in enumerate(zip(boxes, probs, landmarks)):
        x1, y1, x2, y2 = round(box[0]), round(box[1]), round(box[2]), round(box[3])
        w = x2 - x1
        h = y2 - y1

        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)

        img_PIL = Image.fromarray(img)
        d = ImageDraw.Draw(img_PIL)
        d.text((x1, y1), str(round(prob, 3)), 
                font=ImageFont.truetype("SansSerif.ttf", 25), fill=(255, 255, 0))
        img = np.array(img_PIL)

        for (x, y) in landmark:
            x = round(x)
            y = round(y)
            cv2.circle(img, (x, y), 2, (0, 0, 255), -1)

    img_PIL = Image.fromarray(img)
    d = ImageDraw.Draw(img_PIL)
    d.text((0,0), str(idx+1) + f'/{len(images_all)}', 
            font=ImageFont.truetype("SansSerif.ttf", 50), fill=(255, 255, 255))
    img = np.array(img_PIL)

    cv2.imshow("annotated", cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    key = cv2.waitKey(-1)

cv2. destroyAllWindows()