In [1]:
# Recognise Faces using some classification algorithm - like Logistic, KNN, SVM etc.


# 1. load the training data (numpy arrays of all the persons)
		# x- values are stored in the numpy arrays
		# y-values we need to assign for each person
# 2. Read a video stream using opencv
# 3. extract faces out of it
# 4. use knn to find the prediction of face (int)
# 5. map the predicted id to name of the user 
# 6. Display the predictions on the screen - bounding box and name

import cv2
import numpy as np 
import os 

########## KNN CODE ############
def distance(v1, v2):
	# Eucledian 
	return np.sqrt(((v1-v2)**2).sum())

def knn(train, test, k=5):
	dist = []
	
	for i in range(train.shape[0]):
		# Get the vector and label
		ix = train[i, :-1]
		iy = train[i, -1]
		# Compute the distance from test point
		d = distance(test, ix)
		dist.append([d, iy])
	# Sort based on distance and get top k
	dk = sorted(dist, key=lambda x: x[0])[:k]
	# Retrieve only the labels
	labels = np.array(dk)[:, -1]
	
	# Get frequencies of each label
	output = np.unique(labels, return_counts=True)
	# Find max frequency and corresponding label
	index = np.argmax(output[1])
	return output[0][index]
################################


In [2]:
#Init Camera
cap = cv2.VideoCapture(0)

# Face Detection
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")

#Data Preparation
class_id=0 #Labels for the given data
names={} #mapping id with name
dataset_path='./data/'

face_data=[]
labels=[]

for fx in os.listdir(dataset_path):
    if fx.endswith('.npy'):
        #create a mapping between class_id and name
        names[class_id]=fx[:-4]
        data_item=np.load(dataset_path+fx)
        face_data.append(data_item)
        
        #create labels for the class
        target=class_id*np.ones((data_item.shape[0],))#the same class_id will be given for every frame of respective label
        class_id+=1
        labels.append(target)
        
face_dataset=np.concatenate(face_data,axis=0)#concatenate all the frames of a individual class_id(The X dataset)
face_labels=np.concatenate(labels,axis=0).reshape((-1,1))#concatenate the label name to a axis(The Y dataset)
train_set=np.concatenate((face_dataset,face_labels),axis=1)#concatenate both the face data and its name to an axis

print(train_set.shape)

#testing

while True:
    ret, frame=cap.read()#read frames from the webcam
    #returns 2 values
    #ret is a boolean value that represents a frame is captured or not
    #frame has the captured frame
    if ret==False:
        continue
    gray_frame=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)#converts BGR image to grayscale. In simple words converts color image to greyscale image
    faces=face_cascade.detectMultiScale(gray_frame,1.3,5)#applying facebound(haarcascade) on the frame
    #the faces object returns a tuple which contains (x,y) co-ordintes->start point and width,height
    if(len(faces)==0):#if no faces
        continue
   
    #pick the last face(because it has the largest area)
    for face in faces[-1:]:
        #draw bounding box for the rectangle
        x,y,w,h=face
        #extract(crop out the required face) : region of interest
        offset=10#offset is for the extra width and height
        face_section=gray_frame[y-offset:y+h+offset,x-offset:x+w+offset]#1st pass y-coordinate then the x-coordinate
        face_section=cv2.resize(face_section,(100,100))#resize image to 100*100 pixels
        #applying knn algorithm(prediction)
        out=knn(train_set,face_section.flatten())
        
        #display output on the screen
        pred_name=names[int(out)]
        cv2.putText(gray_frame,pred_name, (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0),2,cv2.LINE_AA)
        cv2.rectangle(gray_frame, (x,y), (x+w,y+h), (0,255,255), 2)#plot rectangle frame, last 2 parameters indicate rgb color, thcikness of the frame 
        
    #cv2.imshow("Frame",frame)#shows the captured frame
    cv2.imshow("gray_frame",gray_frame)#shows the captured frame in grayscale mode
    #the capture(while loop) should be stopped when a key is pressed
    key_pressed=cv2.waitKey(1)#waits for some milliseconds and returns an integer and the 1st 8-bits represent ascii value of the key pressed
    key_pressed=key_pressed & 0xFF #bit-masking for retrieving the 1st 8-bits
    if key_pressed == ord('q'):#if 'q' is pressed on keyboard then image reading is stopped
        break
cap.release()
cv2.destroyAllWindows()

(55, 10001)
