# 1. Viola-Jones Algorithm for Face Detection

In [None]:
# Viola - Jones Algorithm 
# 1. Training and Detection
# designed to  detect frontal faces - not sides or up 
# we use gray scale images - easier to use with gray scale images

# boxes of different sizes are tried over different patches of the image. Patch or area of image where each feature required 
# for a face is there will be a high potential candidate for a face.
# Paul Viola and Michael Jones (2001)

In [2]:
# Haar-like Features
# edge feaures (eyebrows, can be nose line ), line features (mouth, nose line, eyes) , four-rectangle features
# Above features should be typically present in a face 

# we might get places where we will get haar like features which is similar to of lets say nose but with the help of other 
# features it will decide whether that patch is a face or not 

# to calculate haar liek features - we get the pixel intensities and then average it for both the types and get a difference of
# it, then we have some thershold kto lassify that as haar like features

In [3]:
# Integral Image - of same size of original image - USED TO CALCULATE HAAR LIKE FEATURES EFFICIENTLY
# each cell A(i, j) will sum of all the pixel intensities of matrix of dimension (i, j) and A(i, j) will be at bottom corner 
# of that matrix

# Once integral image is calculated, we just have to do four operations to find each haar like features
# this speeds us viola - jones algo a lot

In [4]:
# TRAINING IN VIOLA - JONES IMAGE detection - 
# image is shrinked to 24x24 image, then the features are looked for 
# in actual paper - 4916 images were used to train the algo. - face imgages 
# 9544 non-face images - they were not 24x24 images and bigger - 350 million subwiindows we had in VJ algo
# constantine p. papageorgiou (1998)

In [5]:
# ADAPTIVE BOOSTING (ADABOOST)
# even a small image of 24x24 will have almost 180k different features, which is very huge
# this is a very big problem while training and also while detecting ace real time 
# ENSEMBLE - POWER OF THE CROWD 
# Boosting Image Retrieval - Kinh Tieu and Paul Viola (2000)

In [6]:
# CASCADING 
# take a subwindow, take top 5 features and see if those features are there in that subwindow, if not present reject 
# the subwindow
# if all of them are present then check for second set of features. if they are not present, then reject 
# and so on 

In [7]:
import cv2

In [17]:
# Loading the cascades

# How do we create a cascades ?
face_cascade = cv2.CascadeClassifier("./data/haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier("./data/haarcascade_eye.xml")

In [50]:
# cascade works for black and white image
# function which is gonna do the detections
def detect(gray, frame):
    # Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles.
    # faces are tuples - x and y which is top left point and width and height
    faces = face_cascade.detectMultiScale(image=gray, scaleFactor=1.3, minNeighbors=5)
    
    # iterate thorugh the faces 
    for (x, y, w, h) in faces:
        # draw a recatangle on the face 
        cv2.rectangle(img=frame, pt1=(x, y), pt2=(x+w, y+h), color=(255, 0, 0), thickness=2)
        
        # getting the region of interest for eyes
        roi_gray = gray[y:y+h, x:x+h]
        roi_frame = frame[y:y+h, x:x+h]
        
        # detecting eyes
        eyes = eye_cascade.detectMultiScale(image=roi_gray, scaleFactor=1.1, minNeighbors=3)
        
        
        for (ex, ey, ew, eh) in eyes:
             # draw the rectangles for eyes
            cv2.rectangle(img=roi_frame, pt1=(ex, ey), pt2=(ex+ew, ey+eh), color=(0, 255, 0), thickness=2)
        
    return frame

In [54]:
video_capture = cv2.VideoCapture(0)

while True:
    # get the last frame from the webcam
    _, frame = video_capture.read()
    
    # We do some colour transformation
    # black and white version of the image 
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # we get the output of our detect function
    canvas = detect(gray, frame)
    
    # # display the outputs 
    cv2.imshow('Video', canvas)
    
    if cv2.waitKey(1) & 0xFF == ord('q'): # If we type on the keyboard:
        break
        
video_capture.release() # We turn the webcam off.
cv2.destroyAllWindows() # We destroy all the windows inside which the images were displayed.

# 2. Detecting emotions from the images

In [33]:
# haarcascades from the opencv repo itself
# https://github.com/opencv/opencv/tree/master/data/haarcascades

In [65]:
smile_cascade = cv2.CascadeClassifier("./data/haarcascade_smile.xml")

def detect_smile(gray, frame):
    faces = smile_cascade.detectMultiScale(image=gray, scaleFactor=1.3, minNeighbors=5)
    
    for (x, y, w, h) in faces:
        cv2.rectangle(img=frame, pt1=(x, y), pt2=(x+w, y+h), color=(255, 0, 0), thickness=2)
        
        roi_gray = gray[y:y+h, x:x+w]
        roi_frame = frame[y:y+h, x:x+w]
        
        eyes = eye_cascade.detectMultiScale(image=roi_gray, scaleFactor=1.1, minNeighbors=22)
        
        for (ex, ey, ew, eh) in eyes:
            cv2.rectangle(img=roi_frame, pt1=(ex, ey), pt2=(ex+ew, ey+eh), color=(0, 255, 0), thickness=2)
            
        smiles = smile_cascade.detectMultiScale(image=roi_gray, scaleFactor=1.7, minNeighbors=22)
        
        for (sx, sy, sw, sh) in smiles:
            cv2.rectangle(img=roi_frame, pt1=(sx, sy), pt2=(sx+sw, sy+sh), color=(0, 0, 255), thickness=2)      
    
    return frame

In [66]:
video_capture = cv2.VideoCapture(0)

while True:
    # get the last frame from the webcam
    _, frame = video_capture.read()
    
    # We do some colour transformation
    # black and white version of the image 
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # we get the output of our detect function
    canvas = detect_smile(gray, frame)
    
    # # display the outputs 
    cv2.imshow('Video', canvas)
    
    if cv2.waitKey(1) & 0xFF == ord('q'): # If we type on the keyboard:
        break
        
video_capture.release() # We turn the webcam off.
cv2.destroyAllWindows() # We destroy all the windows inside which the images were displayed.

# 3. Single Shot Multibox Detection (SSD) Algorithm

In [None]:
# How SSD is different ?
# Image is just seen or basically input once. Patches are not searched iteratively in image with different sizes of patches.

# SSD will break down imagein segments