### Set up the project

In [5]:
# Import standard libraries
import random, os, uuid, shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt

### Set up the directories

In [6]:
# Anchor is the folder with the images we are going to compare our positive images
dirs = ['positive', 'negative', 'anchor']

for dir in dirs:
    os.makedirs(os.path.join('data', dir))

### Collect the data for the Anchor, Positive and Negative Folders

##### Negative data folder
[Labeled Faces in the WIld Dataset](http://vis-www.cs.umass.edu/lfw/lfw.tgz)

In [7]:
# Load the negative dataset, SOS we have 250x250 size images 
# (which means the positive and anchors need to be the same size) 
!tar -xf lfw.tgz

In [8]:
# Move the uncompressed data to the data/negative folder

negative_path = os.path.join('data', dirs[1])

for person_folder in os.listdir('lfw'):
    for image_file in os.listdir(os.path.join('lfw', person_folder)): 
        old_path_file = os.path.join('lfw', person_folder, image_file)
        new_path_file = os.path.join(negative_path, image_file)
        os.replace(old_path_file, new_path_file)


# Now we can delete the lfw folder
shutil.rmtree('lfw')

#### Positive and Anchor Data Folder

In this part of the project we faced an interesting OpenCv bug, where if we had one VideoCapture loop that included both the anchor and positive folder.
The second if-case had a major delay by the first if-statement and we were only getting a fraction of the images taken, which led us to create a separate cell for each of the cases anchor and positive.

In [9]:
# Crop image function in order to get to the 250x250 size
def crop_img(img, scale=1.0):
    center_x, center_y = img.shape[1] / 2, img.shape[0] / 2
    width_scaled, height_scaled = img.shape[1] * scale, img.shape[0] * scale
    left_x, right_x = center_x - width_scaled / 2, center_x + width_scaled / 2
    top_y, bottom_y = center_y - height_scaled / 2, center_y + height_scaled / 2
    img_cropped = img[int(top_y):int(bottom_y), int(left_x):int(right_x)]
    return img_cropped

In [10]:
# In this cell we are going to collect the my images in order using OpenCV
# With VideoCapture we choose the webcam driver (in the pc its the 0)
anchor_dir   = os.path.join('data', dirs[2])

capture_obj = cv2.VideoCapture(0)

while capture_obj.isOpened():
    ret, frame = capture_obj.read()
        
    # Add image to the anchor image folder
    if cv2.waitKey(1) & 0XFF  == ord('a'):
        img_file_a = os.path.join(anchor_dir, '{}.jpg'.format(uuid.uuid1()))
        cv2.imwrite(img_file_a, crop_img(frame, 0.5))


    # Return the frames, in order to be sure about the area
    cv2.imshow("Collect Data", frame)

    # Quit the procedure of data collection
    if cv2.waitKey(1) & 0XFF == ord('q'):
        break


# Free the webcam driver and close the python window
capture_obj.release()
cv2.destroyAllWindows()



KeyboardInterrupt: 

In [None]:
# In this cell we are going to collect the my images in order using OpenCV
# With VideoCapture we choose the webcam driver (in the pc its the 0)
positive_dir = os.path.join('data', dirs[0])

capture_obj = cv2.VideoCapture(0)

while capture_obj.isOpened():
    ret, frame = capture_obj.read()
        
    # Add image to the positive image folder
    if cv2.waitKey(1) & 0XFF == ord('p'):
        img_file_a = os.path.join(positive_dir, '{}.jpg'.format(uuid.uuid1()))
        cv2.imwrite(img_file_a, crop_img(frame, 0.5))


    # Return the frames, in order to be sure about the area
    cv2.imshow("Collect Data", frame)

    # Quit the procedure of data collection
    if cv2.waitKey(1) & 0XFF == ord('q'):
        break


# Free the webcam driver and close the python window
capture_obj.release()
cv2.destroyAllWindows()