### Set up the project

In [1]:
# Import standard libraries
import os, uuid, shutil
import cv2

# Load Haar cascade classifier for face detection
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

### Set up the directories

In [2]:
# Anchor is the folder with the images we are going to compare our positive images
dirs = ['positive', 'negative', 'anchor']

for dir in dirs:
    os.makedirs(os.path.join('data', dir))

### Collect the data for the Anchor, Positive and Negative Folders

##### Negative data folder
[Labeled Faces in the WIld Dataset](http://vis-www.cs.umass.edu/lfw/lfw.tgz)

In [3]:
# Load the negative dataset, SOS we have 250x250 size images 
# (which means the positive and anchors need to be the same size) 
!tar -xf lfw.tgz

In [4]:
# Move the uncompressed data to the data/negative folder

negative_path = os.path.join('data', dirs[1])

for person_folder in os.listdir('lfw'):
    for image_file in os.listdir(os.path.join('lfw', person_folder)): 
        old_path_file = os.path.join('lfw', person_folder, image_file)
        new_path_file = os.path.join(negative_path, image_file)
        os.replace(old_path_file, new_path_file)


# Now we can delete the lfw folder
shutil.rmtree('lfw')

#### Positive and Anchor Data Folder

In this part of the project we faced an interesting OpenCv bug, where if we had one VideoCapture loop that included both the anchor and positive folder.
The second if-case had a major delay by the first if-statement and we were only getting a fraction of the images taken, which led us to create a separate cell for each of the cases anchor and positive.

In [5]:
# In this cell we are going to collect the my images in order using OpenCV
# With VideoCapture we choose the webcam driver (in the pc its the 0)
anchor_dir   = os.path.join('data', dirs[2])

capture_obj = cv2.VideoCapture(0)

while capture_obj.isOpened():
    ret, frame = capture_obj.read()
        
    # Convert frame to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Perform face detection
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=4, minSize=(60, 60))

    # Iterate through the faces needed 
    for (x, y, w, h) in faces:
        # Draw rectangle around the face on the frame (optional)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

        # Crop the detected face region
        face_roi = frame[y:y + h, x:x + w]

        # Add cropped face to the anchor image folder
        if cv2.waitKey(1) & 0xFF == ord('a'):
            img_file_a = os.path.join(anchor_dir, '{}.jpg'.format(uuid.uuid1()))
            cv2.imwrite(img_file_a, cv2.resize(face_roi, (250,250)))


    # Return the frames, in order to be sure about the area
    cv2.imshow("Collect Data", frame)

    # Quit the procedure of data collection
    if cv2.waitKey(1) & 0XFF == ord('q'):
        break


# Free the webcam driver and close the python window
capture_obj.release()
cv2.destroyAllWindows()



In [8]:
# In this cell we are going to collect the my images in order using OpenCV
# With VideoCapture we choose the webcam driver (in the pc its the 0)
positive_dir = os.path.join('data', dirs[0])

capture_obj = cv2.VideoCapture(0)

while capture_obj.isOpened():
    ret, frame = capture_obj.read()
        
    # Convert frame to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Perform face detection, for maximum two people in case of double verification
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=4, minSize=(60, 60))

    # Iterate through the faces needed 
    for (x, y, w, h) in faces:
        # Draw rectangle around the face on the frame (optional)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

        # Crop the detected face region
        face_roi = frame[y:y + h, x:x + w]

        # Add cropped face to the anchor image folder
        if cv2.waitKey(1) & 0xFF == ord('p'):
            img_file_a = os.path.join(positive_dir, '{}.jpg'.format(uuid.uuid1()))
            cv2.imwrite(img_file_a, cv2.resize(face_roi, (250,250)))


    # Return the frames, in order to be sure about the area
    cv2.imshow("Collect Data", frame)

    # Quit the procedure of data collection
    if cv2.waitKey(1) & 0XFF == ord('q'):
        break


# Free the webcam driver and close the python window
capture_obj.release()
cv2.destroyAllWindows()



: 