# Image Collection, Labelling & Data Augmentation

## Setup

In [None]:
# Provide operating system dependent functionality
import os
# Provides some common string operations
import string
# OpenCV packages for python
!pip install opencv-python
import cv2
# Package used to create unique id's
import uuid
# Python Image Library
from PIL import Image, ImageEnhance
# Python Library for arrays
import numpy as np
# XML parser
from xml.dom import minidom
# Offers high level operations on files
import shutil

In [None]:
# Creates the file hierarchy for the project
paths = {
    'collected_images' : os.path.join('Tensorflow', 'Data', 'Images', 'Collected'),
    'background_images' : os.path.join('Tensorflow', 'Data', 'Images', 'Background'),
    'image_labelling_tool' : os.path.join('Image-Labelling-Tool', 'labelImg'),
    'augmented_images' : os.path.join('Tensorflow', 'Data', 'Images', 'Augmented'),
    'training_images' : os.path.join('Tensorflow', 'Data', 'Images', 'train'),
    'testing_images' : os.path.join('Tensorflow', 'Data', 'Images', 'test')    
}

for path in paths.items():
    if not os.path.exists(path[1]):
        !mkdir {path[1]}

## 1. Creating the dataset

### A. Image Collection

In [None]:
# Creates a directory for each label for which images need to be captured
classes = ['Accept', 'Delete']
for label in string.ascii_uppercase:
    if label != 'J' and label != 'Z':
        classes.append(label)

for label in classes:
    label_path = os.path.join(paths['collected_images'], label)
    if not os.path.exists(label_path):
        !mkdir {label_path}

In [None]:
# Used to capture images for the dataset

# Number of Images to be taken for each class
number_of_images = 20

exit = False
for label in classes:
    # Establish video stream
    camera = cv2.VideoCapture(0)
    print(f'Collecting images for {label}')
    for number in range(number_of_images):
        print(f'Collecting image number {number}')
        # Path to save image at. uuid ensures each image has a unique name
        image_path = os.path.join(paths['collected_images'], label, f'collected.{uuid.uuid1()}.jpg')
        while True:
            # Read each frame from the video stream
            ret, frame = camera.read()
            # Display the frame from the video stream
            cv2.imshow('Frame', frame)
            # Take picture when the E key is pressed
            if cv2.waitKey(1) & 0xFF == ord('e'):
                cv2.imwrite(image_path, frame)
                break
            # Exit if the Q key is pressed
            if cv2.waitKey(1) & 0xFF == ord('q'):
                exit = True
                print('Exited')
                break
        if exit:
            break
        else:
            continue
    if exit:
        break
    else:
        continue

# Release all resources    
camera.release()
# Close all cv2 windows
cv2.destroyAllWindows()

In [None]:
# Used to capture background images for data augmentation

exit = False
camera = cv2.VideoCapture(0)
print("Camera Ready")

while not exit:
    while True:
        ret, frame = camera.read()
        cv2.imshow('frame', frame)
        if cv2.waitKey(1) & 0xFF == ord('e'):
            background_image_path = os.path.join(paths['background_images'], f'background.{uuid.uuid1()}.jpg')
            cv2.imwrite(background_image_path, frame)
            print('Image Saved')
            break
        if cv2.waitKey(1) & 0xFF == ord('q'):
            exit = True
            print('Exited')
            break

camera.release()
cv2.destroyAllWindows()

### B. Installing LabelImg

In [None]:
# Installs XML processing library required by LabelImg
!pip install --upgrade pyqt5 lxml
# Clones and installs LabelImg (Tzutalin. LabelImg. Git code (2015))
!git clone https://github.com/tzutalin/labelImg {paths['image_labelling_tool']}
!cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

### C. Image Labelling

In [None]:
# Opens LabelImg
!cd {paths['image_labelling_tool']} && python labelImg.py

## 2. Data Augmentation

### A. Creating the mask

In [None]:
# Create a mask of input image to isolate the shape of the hand sign
def create_mask(input_image, fill = True, show_mask = False):
    
    # Convert PIL Image to numpy array
    input_image_array = np.asarray(input_image)
    input_image_array = cv2.cvtColor(input_image_array, cv2.COLOR_BGR2RGB)
    
    # Define Upper and Lower Values of HSV color space of green
    u_green = np.array([250, 250, 100])
    l_green = np.array([30, 30, 0])
    
    # Create a mask by thresholding the HSV colour space of green
    mask = cv2.inRange(input_image_array, l_green, u_green)
    # Invert Mask
    mask = 255 - mask
    
    # Fill error wholes in the mask
    if fill:
        # Find outer contours
        contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        contours = contours[0] if len(contours) == 2 else contours[1]
        # Set the mask to be completely black
        mask = np.zeros_like(input_image_array)
        # Draw the contours on the mask and fill the contour with white
        for contour in contours:
            cv2.drawContours(mask, [contour], 0, (255,255,255), -1)

    # Anti-alias the mask, blur then stretch
    # Apply blur
    mask = cv2.GaussianBlur(mask, (0,0), sigmaX=2, sigmaY=2, borderType = cv2.BORDER_DEFAULT)
    
    # Apply linear stretch so that 127.5 goes to 0, but 255 stays 255
    mask = (2*(mask.astype(np.float32))-255.0).clip(0,255).astype(np.uint8)
    
    # Debug tool that displays the mask generated
    if show_mask:
        cv2.imshow('Mask', mask)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    # Convert mask to a PIL Image from a numpy array
    mask = (Image.fromarray(mask)).convert('L')
    
    return mask

### B. Background replacement and rescaling

In [None]:
# Creates white and black backgrounds
rgb_backgrounds = []
white_rgb_background = {
    'rgb_image_name' : f'{255}.{255}.{255}',
    'rgb_image' : Image.new('RGB',(640,480),(255,255,255))
}
rgb_backgrounds.append(white_rgb_background)

black_rgb_background = {
    'rgb_image_name' : f'{0}.{0}.{0}',
    'rgb_image' : Image.new('RGB',(640,480),(0,0,0))
}
rgb_backgrounds.append(black_rgb_background)

In [None]:
# Scale factors used for rescaling data augmentation on the training dataset
Scale_Factors = [0.8,0.9,1]
# Scale factors used for rescaling data augmentation on the testing dataset
# Scale_Factors = [1]

# Iterate through each class folder
for label in classes:
    label_path = os.path.join(paths['collected_images'], label)
    # Iterate through each collected image
    for file in os.listdir(label_path):
        # For each image file
        if file.endswith('jpg'):
            # Open image
            file_path = os.path.join(label_path, file)
            file_image = Image.open(file_path)
            file_name = file.replace('.jpg','')
            
            # Open the corresponding XML
            file_xml_path = os.path.join(label_path, f'{file_name}.xml')
            file_xml = minidom.parse(file_xml_path)
            
            # Read Bounding Box Dimensions
            bounding_box_x_min = int(file_xml.getElementsByTagName('xmin')[0].firstChild.data)
            bounding_box_y_min = int(file_xml.getElementsByTagName('ymin')[0].firstChild.data)
            bounding_box_x_max = int(file_xml.getElementsByTagName('xmax')[0].firstChild.data)
            bounding_box_y_max = int(file_xml.getElementsByTagName('ymax')[0].firstChild.data)
            bounding_box_width = bounding_box_x_max - bounding_box_x_min
            bounding_box_height = bounding_box_y_max - bounding_box_y_min
            
            # Create a mask for the image 
            if label != 'O' and label != 'F':
                # Create a mask for the image 
                mask = create_mask(file_image)
            else:
                # Create a mask for the image but do not use contour fill
                mask = create_mask(file_image, fill = False)
                
            for scale in Scale_Factors:
                #Scale image and mask
                new_size = int(round(file_image.size[0] * scale)), int(round(file_image.size[1] * scale))
                scaled_image = file_image.resize(new_size)
                scaled_mask = mask.resize(new_size)
                        
                #Calculate scaled bounding box dimensions and offset
                scaled_bounding_box_width = int(bounding_box_width * scale)
                scaled_bounding_box_height = int(bounding_box_height * scale)
                scaled_bounding_box_x_offset = int(round(bounding_box_x_min * scale))
                scaled_bounding_box_y_offset = round(bounding_box_y_min * scale)
                paste_position_x = int((640 - new_size[0]) / 2)
                paste_position_y = int((480 - new_size[1]) / 2)
            
                # Iterate through each background image
                for background_file in os.listdir(paths['background_images']):
                    # For each background image
                    if background_file.endswith('jpg'):
                        # Open background image
                        background_file_path = os.path.join(paths['background_images'], background_file)
                        background_file_image = Image.open(background_file_path)
                        background_file_name = background_file.replace('.jpg','')
                        
                        #Copy the background iage
                        resulting_file_image = background_file_image.copy()
                        
                        # Paste the collected image on top of the background image using the mask
                        # Essentially replacing the background of the collected image
                        resulting_file_image.paste(scaled_image, (paste_position_x, paste_position_y), scaled_mask)

                        # Generate name for resulting augmented files
                        resulting_file_name = f'augmented-{file_name}-{background_file_name}-scale-{scale}'
                        resulting_file_image_name = f'{resulting_file_name}.jpg'
                        
                        # Copy the XMl file of the orginal image
                        resulting_file_xml = file_xml
                        
                        # Update the name of the corresponding image file in the XML file
                        resulting_file_xml.getElementsByTagName('filename')[0].firstChild.data = resulting_file_image_name

                        # Update bounding box values
                        resulting_file_xml.getElementsByTagName('xmin')[0].firstChild.data = int(paste_position_x + scaled_bounding_box_x_offset)
                        resulting_file_xml.getElementsByTagName('ymin')[0].firstChild.data = int(paste_position_y + scaled_bounding_box_y_offset)
                        resulting_file_xml.getElementsByTagName('xmax')[0].firstChild.data = int(paste_position_x + scaled_bounding_box_x_offset + scaled_bounding_box_width)
                        resulting_file_xml.getElementsByTagName('ymax')[0].firstChild.data = int(paste_position_y + scaled_bounding_box_y_offset + scaled_bounding_box_height)

                        # Generate paths for where the augmented files will be saved
                        resulting_file_image_path = os.path.join(paths['augmented_images'], resulting_file_image_name)
                        resulting_file_xml_path = os.path.join(paths['augmented_images'], f'{resulting_file_name}.xml')

                        # Save the new augmented image file
                        resulting_file_image.save(resulting_file_image_path)

                        # Open a new document with the augmented xml's path and write it
                        with open(resulting_file_xml_path, 'w') as new_document:
                            new_document.write(resulting_file_xml.toxml())

                       

                # Iterate through each rgb background image
                for rgb_background in rgb_backgrounds:
                    # Copy background image
                    resulting_file_image = rgb_background['rgb_image'].copy()

                    # Paste the collected image on top of the background image using the mask
                    # Essentially replacing the background of the collected image
                    resulting_file_image.paste(scaled_image, (paste_position_x, paste_position_y), scaled_mask)

                    # Generate name for resulting augmented files
                    resulting_file_name = f'augmented-{file_name}-{rgb_background["rgb_image_name"]}-scale-{scale}'
                    resulting_file_image_name = f'{resulting_file_name}.jpg'

                    # Copy the XMl file of the orginal image
                    resulting_file_xml = file_xml

                    # Update the name of the corresponding image file in the XML file
                    resulting_file_xml.getElementsByTagName('filename')[0].firstChild.data = resulting_file_image_name
                    
                    # Update bounding box values
                    resulting_file_xml.getElementsByTagName('xmin')[0].firstChild.data = int(paste_position_x + scaled_bounding_box_x_offset)
                    resulting_file_xml.getElementsByTagName('ymin')[0].firstChild.data = int(paste_position_y + scaled_bounding_box_y_offset)
                    resulting_file_xml.getElementsByTagName('xmax')[0].firstChild.data = int(paste_position_x + scaled_bounding_box_x_offset + scaled_bounding_box_width)
                    resulting_file_xml.getElementsByTagName('ymax')[0].firstChild.data = int(paste_position_y + scaled_bounding_box_y_offset + scaled_bounding_box_height)

                    # Generate paths for where the augmented files will be saved
                    resulting_file_image_path = os.path.join(paths['augmented_images'], resulting_file_image_name)
                    resulting_file_xml_path = os.path.join(paths['augmented_images'], f'{resulting_file_name}.xml')

                    # Save the new augmented image file
                    resulting_file_image.save(resulting_file_image_path)

                    # Open a new document with the augmented xml's path and write it
                    with open(resulting_file_xml_path, 'w') as new_document:
                        new_document.write(resulting_file_xml.toxml())

## Move all images and xml files into the training or testing folder

In [None]:
# Move into training folder
for file in os.listdir(paths['augmented_images']):
    shutil.move(os.path.join(paths['augmented_images'], file), os.path.join(paths['training_images'], file))

# Move into training folder
for file in os.listdir(paths['augmented_images']):
    shutil.move(os.path.join(paths['augmented_images'], file), os.path.join(paths['testing_images'], file))

## Citations

Tzutalin. LabelImg. Git code (2015). https://github.com/tzutalin/labelImg