# Assignment 1: Track 2

Source of the video used to check this algorithm and to set the rectangle's measures, to the following link: https://github.com/intel-iot-devkit/sample-videos?tab=readme-ov-file

File name used: `head-pose-face-detection-male.mp4`

In [1]:
import cv2
import numpy as np

In [None]:
#Loading the video
videoReader = cv2.VideoCapture('C:\\Users\\sofyc\\OneDrive\\Desktop\\UPEC\\CV II\\assignment - recognition and detection\\head-pose-face-detection-male.mp4')

#Check
if not videoReader.isOpened():
    print(f"Error: the video can't be loaded.")
else:
    print("Okay, read well!")

Okay, read well!


### Alternative 1 with CAMShift
In this algorithm we have a continuous detection, i.e. dynamic detection.
- Use of **CAMShift (Continuously Adaptive Mean Shift)**:
CAMShift is an algorithm used to track objects in a video, and it is based on an algorithm called Mean shift.
  - **Mean Shift**: it searches in a iterative way the local maximum of a probability distribution, given the pixels of a certain specific area. It identifies the centroid, i.e. the mass center of the pixels with highest probability.

CAMShift is a kind of extention of Mean Shift, since it is more flexible adapting the window dimension at each change in the detection. It takes a color distribution based on the histogram HSV color space, and returns the tracked changes withing time. In our case, since we are detecting a face, the color of the skin and the hair is constant within time and it can be tracked easily.

- **Region of Interest (ROI)**:
it is a part of the image (face) that is isolated to calculate the color histogram HSV used by the CAMShift detection.

- **HSV (Hue Saturation Value)**:
it is a color space different from RGB (Red, Green, Blue).
  - **Hue**: it is the color component between 0 and 180 in OpenCV.
  - **Saturation**: it is the intensity of color and the higher the saturation, the higher the intensity.
  - **Value**: it is the brightness of the color and the higher the value, the higher the brightness.
It is useful since it lets to track the color besides the brightness. Hence, it is easier to detect a face. We convert the ROI region in HSV in order to create an histogram to represent the distribution of the colors in the intensity of the face.

In [3]:
#Looking at the path for the files of detection
print(cv2.data.haarcascades)

c:\Users\sofyc\OneDrive\Desktop\environment for NN and ML\myenv\Lib\site-packages\cv2\data\


In [None]:
#Load three pre-trained classifiers from OpenCV
#Face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
#Eye detection
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
#Nose detection
nose_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_mcs_nose.xml')

#Loop to read and process each frame of the video
ret, frame = videoReader.read()
if not ret:
    print("Error: it was not possible to read the video.")
    videoReader.release()
    exit()
if frame is None:  #Empty frame
    print("Empty frame, going to the next one.")
    videoReader.release()
    exit()

#Converting the frame to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

#Detection of face as a rectangle
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(120, 120))

#Checking to see if faces were detected
if len(faces) > 0:
    (x, y, w, h) = faces[0]  #Taking the first face detected

    #Extracting the face Region of Interest (ROI)
    roi = frame[y:y + h, x:x + w]
    hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)  #Converting the ROI in HSV color space.

    #Creating a histogram of the ROI
    roi_hist = cv2.calcHist([hsv_roi], [0], None, [256], [0, 180])
    cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)

    #Set up the tracking window for CAMShift
    track_window = (x, y, w, h)

    #Loop through the video frames
    while True:
        #Reading next frame
        ret, frame = videoReader.read()
        if not ret:
            break  #Exit if there are no frames anymore

        #Calculating the back projection
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        dst = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1)

        #Criteria for CAMShift
        criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)  #10 iterations or accuracy of 1

        #Applying CAMShift to get the new location of the face
        ret, track_window = cv2.CamShift(dst, track_window, criteria)
        x, y, w, h = track_window  #Updating the coordinates of the tracked face

        #Updating the ROI for eyes and nose detection in the new tracked location
        face_roi_gray = cv2.cvtColor(frame[y:y + h, x:x + w], cv2.COLOR_BGR2GRAY)

        #Detect eyes and nose within the updated face ROI
        eyes = eye_cascade.detectMultiScale(face_roi_gray, scaleFactor=1.1, minNeighbors=10, minSize=(20, 20))
        nose = nose_cascade.detectMultiScale(face_roi_gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

        #Drawing rectangles for the eyes (only the first two)
        for (ex, ey, ew, eh) in eyes[:2]:  #Limit to two eyes
            cv2.rectangle(frame, (x + ex, y + ey), (x + ex + ew, y + ey + eh), (0, 255, 0), 2)  #Green Rectangle

        #Drawing a rectangle for the nose if detected
        if len(nose) > 0:
            (nx, ny, nw, nh) = nose[0]  #Taking the first nose detected
            cv2.rectangle(frame, (x + nx, y + ny - 5), (x + nx + nw, y + ny + nh - 5), (0, 0, 255), 2)  #Red Rectangle

        #Show the frames
        cv2.imshow("Face Tracking", frame)

        if cv2.waitKey(30) & 0xFF == ord('q'):  #Click 'q' to exit loop.
            break

#Release resources
videoReader.release()
cv2.destroyAllWindows()


### Alternative 2
Static detection. We use the haar cascades of OpenCV in evry single frame totally from 0, to detect face, eyes and nose. There is no continuity between a frame and the successive one. Basically, it doesn't maintain the information of the detection of a frame for the successive one.

In [6]:
#I load three pre-trained classifiers from OpenCV
#Face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
#Eye detection
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
#Nose detection
nose_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_mcs_nose.xml')

#Loop to read and process each frame of the video
while True:
    ret, frame = videoReader.read()
    if not ret:
        print("Error: it was not possible to read the video.")
        break

    if frame is None: #Empty frame
        print("Empty frame, going to the next one.")
        continue

    #Converting the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    #Detection of face as a rectangle
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(120, 120))

    #Checking to see if faces were detected
    if len(faces) > 0:
        (face_x, face_y, face_w, face_h) = faces[0]  #Taking the first face detected

        #Detecting eyes within the face region
        face_roi_gray = gray[face_y:face_y + face_h, face_x:face_x + face_w]
        eyes = eye_cascade.detectMultiScale(face_roi_gray, scaleFactor=1.1, minNeighbors=10, minSize=(20, 20))

        #Draw a rectangle for each detected eye in green
        for (ex, ey, ew, eh) in eyes[:2]:  #We limit at 2 eyes
            #Reducing the rectangle dimensions
            cv2.rectangle(frame, (face_x + ex + 4, face_y + ey + 4), 
                          (face_x + ex + ew - 4, face_y + ey + eh - 4), (0, 255, 0), 2)

        #Detecting the nose within the face region in red
        nose = nose_cascade.detectMultiScale(face_roi_gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        if len(nose) > 0:
            (nx, ny, nw, nh) = nose[0]  #Taking the first nose detected
            vertical_offset = 4  #Value to shift the rectangle up or down in vertical sense
            cv2.rectangle(frame, (face_x + nx + 6, face_y + ny - vertical_offset), 
                  (face_x + nx + nw - 6, face_y + ny + nh - 5 - vertical_offset), (0, 0, 255), 2)
    
    #Show the frames
    cv2.imshow("Face Detection with Eyes and Nose", frame)

    if cv2.waitKey(30) & 0xFF == ord('q'): #Click 'q' to exit loop.
        break

#Release resources
videoReader.release()
cv2.destroyAllWindows()