In [2]:
%cd /content/drive/MyDrive/Colab Notebooks/OxHack20
!pip install yolov5
!pip install face_detection

/content/drive/MyDrive/Colab Notebooks/OxHack20
Collecting yolov5
  Downloading yolov5-6.0.6-py36.py37.py38-none-any.whl (837 kB)
[K     |████████████████████████████████| 837 kB 5.0 MB/s 
Collecting fire
  Downloading fire-0.4.0.tar.gz (87 kB)
[K     |████████████████████████████████| 87 kB 6.3 MB/s 
Collecting thop
  Downloading thop-0.0.31.post2005241907-py3-none-any.whl (8.7 kB)
Collecting boto3>=1.19.1
  Downloading boto3-1.21.8-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 46.4 MB/s 
[?25hCollecting sahi>=0.8.9
  Downloading sahi-0.9.0-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 6.6 MB/s 
[?25hCollecting PyYAML>=5.3.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 39.8 MB/s 
[?25hCollecting s3transfer<0.6.0,>=0.5.0
  Downloading s3transfer-0.5.2-py3-none-any.whl (79 kB)
[K     |█████

Collecting face_detection
  Downloading face_detection-0.2.2.tar.gz (20 kB)
Building wheels for collected packages: face-detection
  Building wheel for face-detection (setup.py) ... [?25l[?25hdone
  Created wheel for face-detection: filename=face_detection-0.2.2-py3-none-any.whl size=25574 sha256=31068a30d51428ed5338f9eaf3a127d66363566ce5084dbd1dff6a49cc5a18b6
  Stored in directory: /root/.cache/pip/wheels/dd/2d/84/c7a9ce1760b00f8b1d431c9564b108acfb98f588c18d4a1a9c
Successfully built face-detection
Installing collected packages: face-detection
Successfully installed face-detection-0.2.2


This function uses 2 pre-trained models: Yolo version 5 for human object detection, and RetinaNetMobile for face detection. 

The **input** of function is any lecture video, preferable from Oxford Mathematics Institute Lecture Theatre 1. 

The **output** of function is a json file indicating timestamp, detected lecturer, and his/her face:

```
file name: videoname.json
{
  time_stamp: index i frame number
  boss_position: [xmin, xmax, ymin, ymax]
  boss_status: True/False.     True indicate there is face on line
  face_status: (-1,-1,-1,-1) if none, otherwise gives [xmin,xmax,ymin,ymax]
}


```



In [3]:
import yolov5
import cv2
import json
import face_detection
import warnings

def check_inside(box1,box2,epsilon = 3):
  #box in form [x1, x2, y1, y2]
  # return True if box1 totally inside box2
  ax1,ax2,ay1,ay2 = box1
  bx1,bx2,by1,by2 = box2
  return (ax1+3 >= bx1 and ax2 <= bx2+3 and ay1+3>=by1 and ay2<=by2+3)

def transform_face_box(box_pre):
  #box_pre in form # x1, y1, x2, y2,conf
  #return box in form [x1,x2,y1,y2]
  x1, y1, x2, y2,conf = box_pre
  return [x1,x2,y1,y2]

def transform_man_box(box_pre):
  #box_pre in form # x1, y1, x2, y2
  #return box in form [x1,x2,y1,y2]
  x1, y1, x2, y2 = box_pre
  return [x1,x2,y1,y2]

def generate_game_file(video_path):
  # Output a json file in form
  '''
    videoname.json
    {
    time_stamp: index i frame number
    boss_position: [x1, x2, y1, y2]
    boss_status: True/False.     True indicate there is face on line
    face_status: (-1,-1,-1,-1) if none, otherwise gives [x1,x2,y1,y2]
    }

  '''
  # load model of human detection 
  model = yolov5.load('yolov5s.pt')
  detector = face_detection.build_detector(
  "RetinaNetMobileNetV1", confidence_threshold=.5, nms_iou_threshold=.3)
  videoCap = cv2.VideoCapture(video_path)
  fps = 30 #round(videoCap.get(cv2.CAP_PROP_FPS))
  # load the pre-trained model of face detection
  
  resol_width = videoCap.get(cv2.CAP_PROP_FRAME_WIDTH)
  resol_height = cv2.CAP_PROP_FRAME_HEIGHT

  
  # for every one frame
  frame_count = 0
  time_stamp = 0
  jsonResolu = (resol_width,resol_height)
  jsonList = []
  gap = 2
  with open('json_data.json', 'w') as outfile:
    while True:
      ret, img=videoCap.read()
      if ret:
        frame_count+=1
        if (frame_count*gap)%fps == 0:
            # perform inference
            results = model(img)
            # parse results
            predictions = results.pred[0]
            boxes = (predictions[:, :4]).tolist() # x2, x1, y2, y1
            scores = (predictions[:, 4]).tolist()
            categories = (predictions[:, 5]).tolist()
            # Initialize the holder
            man_bbox = [-1,-1,-1,-1]
            face_bbox = [-1,-1,-1,-1]
            face_front = False
            #Extract the prof prosition
            if 0.0 in categories:
              # if we detect the prof
              man_index = scores.index(max([scores[i] for i, e in enumerate(categories) if e == 0.0]))
              man_bbox = transform_man_box(boxes[man_index]) # x1, y1, x2, y2
              
              face_bboxes_pre = detector.detect(img).tolist() 
              face_bboxes = [transform_face_box(item) for item in face_bboxes_pre]
              

              for item in face_bboxes:
                if check_inside(item,man_bbox):
                  face_bbox = item
                  face_front = True
                  break
            # write out the facebbox and the man_bbox
            data = {
                'time_stamp': time_stamp,
                'boss_pos': man_bbox,
                'face_pos': face_bbox,
                'face_front':face_front
            }
            
            jsonList.append(data)
            time_stamp+=1
      else:
        jsonfile = {
            "time_gap_per_stamp": round(1/gap,2),
            "resolution":jsonResolu,
            "data":jsonList
        }
        json_str = json.dumps(jsonfile)
        outfile.write(json_str)
        print("game file generated")
        break
if __name__ == "__main__":
  warnings.filterwarnings("ignore")
  generate_game_file('lec_sample.mp4')

Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...


Downloading: "https://folk.ntnu.no/haakohu/RetinaFace_mobilenet025.pth" to /root/.cache/torch/hub/checkpoints/RetinaFace_mobilenet025.pth


  0%|          | 0.00/1.71M [00:00<?, ?B/s]

game file generated
