- write a python script that captures images from your webcam video stream
- Extracts all faces from the image frame (using haarcascades classifier)
- stores the face information into numpy arrays
1. Read and show video stream, capture images
2. Detect Faces and show bounding box (haarcascade)
3. Flatten the largest face image(gray scale) and save in a numpy array
4. Repeat the above for multiple people to generate training data

In [1]:
# first Step is to read and show the images
# Code to read the image in BGR format
import cv2
import numpy as np

#Initialize Web Cam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if ret == False:
        continue
    cv2.imshow("Frame",frame)
    key_pressed = cv2.waitKey(1) & 0xff
    if key_pressed == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [2]:
# Code to read the image in Grey Scale image format
import cv2
import numpy as np

#Initialize Web Cam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if ret == False:
        continue
    gray_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
    cv2.imshow("Frame",frame)
    cv2.imshow("grey_frame",gray_frame)
    key_pressed = cv2.waitKey(1) & 0xff
    if key_pressed == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [3]:
# second step is to put a bound around the face while capturing
# Here we use Haarcascades Classifier
# Code to read the image in Grey Scale image format
import cv2
import numpy as np

#Initialize Web Cam
cap = cv2.VideoCapture(0)

# face Detection

face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")
face_data = []
dataset_path = "./data/"
file_name = input("Enter the name of the person : ")
while True:
    ret, frame = cap.read()
    if ret == False:
        continue
    gray_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray_frame, 1.3, 5)
    if len(faces)==0:
        continue
    faces = sorted(faces, key=lambda f:f[2]*f[3])
    
    # pick the last face (because it has the largest area)
    for face in faces[-1:]:
        # draw bounding box or rectangle
        x,y,w,h = face
        cv2.rectangle(gray_frame, (x,y), (x+w,y+h), (0,255,255), 2)
        # extract (crop out the required face) : region of interest 
        offset = 10
        face_section = gray_frame[y-offset:y+h+offset, x-offset:x+w+offset]
        face_section = cv2.resize(face_section,(100,100))
        face_data.append(face_section)
        print(len(face_section))
    #cv2.imshow("Frame",frame)
    cv2.imshow("gray_frame",gray_frame)
    key_pressed = cv2.waitKey(1) & 0xff
    if key_pressed == ord('q'):
        break

# convert face data list into numpy array
face_data = np.asarray(face_data)
face_data = face_data.reshape((face_data.shape[0],-1))
print(face_data.shape)

# save this data into file system
np.save(dataset_path+file_name+'.npy',face_data)
print("Data Saved Successfully!! :)")

cap.release()
cv2.destroyAllWindows()

Enter the name of the person : Ajay Sai
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
(108, 10000)
Data Saved Successfully!! :)


- Recognize faces using some classification algorithm - like logistic, KNN, SVM etc.
- load the training data (numpy arrays of all the persons)
        1. x-values are stored in the numpy arrays
        2. y-values we need to assign for each person
- Read a video stream using opencv
- extract faces out of it
- use knn to find the prediction of the face (int)
- map the predicted id to name of the user
- Display the predictions on the screen - bounding box and name.

In [4]:
import cv2
import numpy as np
import os

In [5]:
########## KNN code #########
def distance(v1,v2):
    #Eucledian
    return np.sqrt(((v1-v2)**2).sum())

In [6]:
def knn(train, test, k=5):
    dist = []
    for i in range(train.shape[0]):
        # Get the vector and label
        ix = train[i,:-1]
        iy = train[i,-1]
        # Compute the distance from test point
        d = distance(test, ix)
        dist.append([d,iy])
    # sort based on the distance and get top k
    dk = sorted(dist, key=lambda x: x[0])[:k]
    # Retrieve only the labels
    labels = np.array(dk)[:,-1]
    
    # Get frequency and corresponding label
    output = np.unique(labels, return_counts=True)
    # Find max frequency and corresponding label
    index = np.argmax(output[1])
    return output[0][index]

In [7]:
import cv2
import numpy as np
# initialize web cam
cap = cv2.VideoCapture(0)

# Face Detection
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")

#Data Preparation
class_id = 0 # labels for the given file
names = {}   # mapping id with name
dataset_path = "./data/"
face_data = []
labels = []
for fx in os.listdir(dataset_path):
    if fx.endswith('npy'):
        # create a mapping between class_id and name
        names[class_id] = fx[:-4]
        data_item = np.load(dataset_path+fx)
        face_data.append(data_item)
        
        # create lables for the class
        target = class_id * np.ones((data_item.shape[0],))
        class_id+=1
        labels.append(target)
face_dataset = np.concatenate(face_data, axis=0)
face_labels = np.concatenate(labels, axis=0).reshape((-1,1))
train_set = np.concatenate((face_dataset,face_labels),axis=1)
print(train_set.shape)

#test our algorithm
while True:
    ret, frame = cap.read()
    if ret == False:
        continue
    gray_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray_frame, 1.3, 5)
    if len(faces)==0:
        continue
    
    # pick the last face (because it has the largest area)
    for face in faces:
        # draw bounding box or rectangle
        x,y,w,h = face
        
        # extract (crop out the required face) : region of interest 
        offset = 10
        face_section = gray_frame[y-offset:y+h+offset, x-offset:x+w+offset]
        face_section = cv2.resize(face_section,(100,100))
        
        # predict
        out = knn(train_set, face_section.flatten())
        
        # Display the output on the screen
        pred_name = names[int(out)]
        cv2.putText(gray_frame, pred_name, (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1,(255,0,0),2,cv2.LINE_AA)
        cv2.rectangle(gray_frame, (x,y), (x+w,y+h), (0,255,255), 2)
        
    #cv2.imshow("Frame",frame)
    cv2.imshow("gray_frame",gray_frame)
    key_pressed = cv2.waitKey(1) & 0xff
    if key_pressed == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

(475, 10001)
