Classify 5+ different people in a video, add boxes and name labels and output a new video

In [2]:
# import the necessary packages
from imutils import paths
import face_recognition
import pickle
import cv2
import os

In [3]:
dataset_path='dataset'
encodings_path='encodings.pickle'
detection_method='cnn'
# Image path
print("[INFO] quantifying faces...")
imagePaths = list(paths.list_images(dataset_path))
# initialize list
knownEncodings = []
knownNames = []
# Traverse the image path
for (i, imagePath) in enumerate(imagePaths):
    # Extract the name from the picture path
    print("[INFO] processing image {}/{}".format(i + 1,
                                                 len(imagePaths)))
    name = imagePath.split(os.path.sep)[-2]
    # Load input image and convert from BGR (OpenCV sort)
    # then dlib sort（RGB）
    image = cv2.imread(imagePath)
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Detects the (x, y) coordinates of the bounding box
    # correspond each input face images 
    boxes = face_recognition.face_locations(rgb, model=detection_method)
    # face embedding
    encodings = face_recognition.face_encodings(rgb, boxes)
    # traverse encodings
    for encoding in encodings:
        # put each encoding and name into the set
        # encode
        knownEncodings.append(encoding)
        knownNames.append(name)
# dump the facial encodings + names to disk
print("[INFO] serializing encodings...")
data = {"encodings": knownEncodings, "names": knownNames}
f = open(encodings_path, "wb")
f.write(pickle.dumps(data))
f.close()

[INFO] quantifying faces...
[INFO] processing image 1/14
[INFO] processing image 2/14
[INFO] processing image 3/14
[INFO] processing image 4/14
[INFO] processing image 5/14
[INFO] processing image 6/14
[INFO] processing image 7/14
[INFO] processing image 8/14
[INFO] processing image 9/14
[INFO] processing image 10/14
[INFO] processing image 11/14
[INFO] processing image 12/14
[INFO] processing image 13/14
[INFO] processing image 14/14
[INFO] serializing encodings...


In [4]:
# import the necessary packages
from imutils.video import VideoStream
import face_recognition
import imutils
import pickle
import time
import cv2

# construct the argument parser and parse the arguments

encodings_path='encodings.pickle'
output='test_output.avi'
display_type=0
detection_method='cnn'
# load the known faces and embeddingsavi
print("[INFO] loading encodings...")
data = pickle.loads(open(encodings_path, "rb").read())
print("[INFO] starting video stream...")
#vs = VideoStream(src=0).start()
vs=cv2.VideoCapture('test2.mp4')#input video
writer = None
time.sleep(2.0)
# loop over frames from the video file stream
while True:
    # grab the frame from the threaded video stream
    ret,frame = vs.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break
    # translate the incoming frame from BGR to RGB，then make it 750 pixels wide(faster)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    rgb = imutils.resize(frame, width=750)
    r = frame.shape[1] / float(rgb.shape[1])
    boxes = face_recognition.face_locations(rgb,
                                            model=detection_method)
    encodings = face_recognition.face_encodings(rgb, boxes)
    names = []
    # loop face embedding
    for encoding in encodings:
        # Trying to match every face in the input image to the embedding we know
        matches = face_recognition.compare_faces(data["encodings"],
                                                 encoding)
        name = "Unknown"
        # check if we have matching item
        if True in matches:
            matchedIdxs = [i for (i, b) in enumerate(matches) if b]
            counts = {}
            for i in matchedIdxs:
                name = data["names"][i]
                counts[name] = counts.get(name, 0) + 1
            # the highest vote
            name = max(counts, key=counts.get)
        # renew names
        names.append(name)
    for ((top, right, bottom, left), name) in zip(boxes, names):
        top = int(top * r)
        right = int(right * r)
        bottom = int(bottom * r)
        left = int(left * r)
        # draw people name into video face
        cv2.rectangle(frame, (left, top), (right, bottom),
                      (0, 255, 0), 2)
        y = top - 15 if top - 15 > 15 else top + 15
        cv2.putText(frame, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 255, 0), 2)
        if writer is None and output is not None:
            fourcc = cv2.VideoWriter_fourcc(*"MJPG")
            writer = cv2.VideoWriter(output, fourcc, 20,
                                     (frame.shape[1], frame.shape[0]), True)
        if writer is not None:
            writer.write(frame)
            # Check whether we should display the output frame to the screen
        if display_type > 0:
            cv2.imshow("Frame", frame)
            key = cv2.waitKey(1) & 0xFF
            # if the `q` key was pressed, break from the loop
            if key == ord("q"):
                break
# do a bit of cleanup
cv2.destroyAllWindows()
#vs.stop()
vs.release()
# check to see if the video writer point needs to be released
if writer is not None:
    writer.release()

[INFO] loading encodings...
[INFO] starting video stream...
Can't receive frame (stream end?). Exiting ...
