# Face Recognition.

## Implementation steps:

1. Obtain a video (youtube-dl)
2. Process the video 
  - detect faces
  - compute embeddings
  - recognize faces
  - prepare a table {PERSON_ID, TIME_INTERVALS, FACE_SAMPLES}
  - draw bounding boxes + names
3. Record output file 

## TODO:
- submit python code file + link to the output video


## Used technical stack

- **For face detection and recognition** - face_recognition library
- **For other video processings** - cv2

## MVP specifications

1. Accuracy of face detection and recognition has room for improvement, because there are cases of assigning several `PersonId`s to the same person. 


## Acknowledgements
- [How to use face_recognition library](https://github.com/ageitgey/face_recognition/blob/master/examples/facerec_from_webcam_faster.py)

### A note:
for some reason execution of the program in Colab took more than 40 minutes, and I interrupted it.

In [1]:
!pip install youtube-dl
!pip install opencv-contrib-python==3.4.2.17
!pip install cmake
!pip install face_recognition

Collecting youtube-dl
  Downloading youtube_dl-2021.6.6-py2.py3-none-any.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 5.1 MB/s 
[?25hInstalling collected packages: youtube-dl
Successfully installed youtube-dl-2021.6.6
Collecting opencv-contrib-python==3.4.2.17
  Downloading opencv_contrib_python-3.4.2.17-cp37-cp37m-manylinux1_x86_64.whl (30.6 MB)
[K     |████████████████████████████████| 30.6 MB 25 kB/s 
Installing collected packages: opencv-contrib-python
  Attempting uninstall: opencv-contrib-python
    Found existing installation: opencv-contrib-python 4.1.2.30
    Uninstalling opencv-contrib-python-4.1.2.30:
      Successfully uninstalled opencv-contrib-python-4.1.2.30
Successfully installed opencv-contrib-python-3.4.2.17
Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Collecting face-recognition-models>=0.3.0
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[K     |████████████████████████████████| 100

In [12]:
import youtube_dl
import cv2
import matplotlib.pyplot as plt
import face_recognition
import copy

In [13]:
def print_time_interval(time_start, time_end):
    min_start, sec_start = divmod(time_start, 60)
    min_end, sec_end = divmod(time_end, 60)
    print('[{}:{:.2f} - {}:{:.2f}]'.format(int(min_start), sec_start, int(min_end), sec_end))

In [14]:
def print_table(table):
    for key, value in table.items():
        print('\nPerson ID: {}'.format(key))
        print('Time intervals:')
        for i in range(len(value['TIME_START'])):
            print_time_interval(value['TIME_START'][i], value['TIME_END'][i])
        print('Face sample:')
        plt.imshow(value['FACE_SAMPLE'])
        plt.show()

In [15]:
# Download a video
ydl_opts = {}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
   ydl.download(['https://www.youtube.com/watch?v=gOFt-sz1T2A'])

[youtube] gOFt-sz1T2A: Downloading webpage
[download] YOLO - We Are One ft. Культура Небес (Official Music Video)-gOFt-sz1T2A.webm has already been downloaded and merged


In [16]:
# Open the video
filename = 'YOLO - We Are One ft. Культура Небес (Official Music Video)-gOFt-sz1T2A.webm'
cap = cv2.VideoCapture(filename)
if not cap.isOpened():
    print("Error opening video stream or file")

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))  # 25
frame_size = (frame_width, frame_height)  # (3840, 2160)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = int(frame_count / fps)  # 229.08 sec

# Configure output video
output_filename = 'video_tracking.mp4'
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output = cv2.VideoWriter(output_filename, fourcc, fps, frame_size)

In [None]:
known_face_encodings = []
known_face_names = []

face_locations = []
face_encodings = []
current_face_names = []
previous_face_names = []

process_this_frame = True
personID = 0
frames_counter = 0
table = {}
# PERSON_ID: key in the dictionary
# {FACE_ENCODING: string,
# TIME_START: list seconds with the beginning of the time interval,
# TIME_END: list seconds with the ending of the time interval,
# FACE_SAMPLE: img as array
# }

while cap.isOpened():
    success, frame = cap.read()
    frames_counter += 1

    if success:
        # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
        rgb_frame = frame[:, :, ::-1]

        if process_this_frame:
            # Find all the faces and face encodings in the current frame of video
            face_locations = face_recognition.face_locations(rgb_frame)
            face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)

            current_face_names = []
            for face_encoding in face_encodings:
                # See if the face is a match for the known face(s)
                matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
                name = ''

                # If a match was found in known_face_encodings, just use the first one.
                if True in matches:
                    first_match_index = matches.index(True)
                    name = known_face_names[first_match_index]
                else:
                    name = "person" + str(personID)
                    personID += 1
                    known_face_names.append(name)
                    known_face_encodings.append(face_encoding)
                    table[name] = {'FACE_ENCODING': face_encoding}
                    table[name]['TIME_START'] = []  # TIME_START
                    table[name]['TIME_END'] = []  # TIME_END

                current_face_names.append(name)

            # Save time intervals
            # compare current_face_names and previous_face_names:
            # if someone (in current)&(not in previous) -> person appeared
            # if someone (not in current)&(in previous) -> person disappeared
            new_faces = list(set(current_face_names) - set(previous_face_names))
            disappeared_faces = list(set(previous_face_names) - set(current_face_names))
            for face_name in new_faces:
                table[face_name]['TIME_START'].append(frames_counter / fps)  # append starting time in sec
            for face_name in disappeared_faces:
                table[face_name]['TIME_END'].append(frames_counter / fps)  # append ending time in sec

            previous_face_names = current_face_names

        process_this_frame = not process_this_frame

        # Draw bounding boxes
        for (top, right, bottom, left), name in zip(face_locations,
                                                    current_face_names):  # left=x, top=y, right=x+w, bottom=y+h
            # Save sample
            if 'FACE_SAMPLE' not in table[name]:
                table[name]['FACE_SAMPLE'] = copy.deepcopy(frame[top:bottom, left:right])

            # Draw a box around the face
            cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)

            font = cv2.FONT_HERSHEY_DUPLEX
            cv2.putText(frame, name, (left + 6, bottom + 30), font, 1.0, (0, 0, 255), 1)

        output.write(frame)

    else:
        break

cap.release()
output.release()

print_table(table)
print('The number of people detected and recognized on the video is {}.'.format(len(table.keys())))