In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import xml.etree.ElementTree as ET
import zipfile

## unzip the exported dataset from CVAT  
set zipfile name as CVAT dataset

In [None]:
# Parameters Setting

zipfile_name = 'Img8_13_15'

# ==================================================

with zipfile.ZipFile(f'{zipfile_name}.zip', 'r') as zf:
    if not os.path.exists(f'{zipfile_name}'):
        os.makedirs(f'{zipfile_name}')

    zf.extractall(f'{zipfile_name}')

## Label Box Extraction
`def image_names_parser(path)` -> return all image names from the annotations
`def labels_parser(path, image_name)` -> return label boxs of corresponding image

test: check if the box is in the right place


In [None]:
def image_names_parser(path):
    tree = ET.parse(f'{path}/annotations.xml')
    root = tree.getroot()
    image_names = []
    for image in root.findall('image'):
        image_names.append(image.get('name'))
    return image_names

def labels_parser(path, image_name):
    tree = ET.parse(f'{path}/annotations.xml')
    root = tree.getroot()

    labels = {}
    for i, label in enumerate(root.iter('label')):
        labels[label.find('name').text] = i

    image_node = None
    for image in root.findall('image'):
        if image.get('name') == image_name:
            image_node = image

    boxs = []
    coordinates = ['xtl', 'ytl', 'xbr', 'ybr']
    for i in range(len(labels)):
        boxs.append([])
    for box in image_node.findall('box'):
        points = []
        for coordinate in coordinates:
            points.append(int(box.get(coordinate).split(".")[0]))
        boxs[labels[box.get('label')]].append(points)

    return boxs
boxs = labels_parser(zipfile_name, 'WLT_350_210926 P_2111131 WLT_350_210926__008 P_2111131_processed.JPG')
for i in range(len(boxs)):
    print(boxs[i])

## Check if the annotations is shown as they are on CVAT
set the index of images and the feature type in the CVAT annotations

Flag of the feature type
- strong hyperbola: 0
- weak hyperbola: 1
- strong reflector: 2
- weak reflector: 3
- other: 4
- no feature: 5

In [None]:
# Parameters Setting

index = 0
flag = 2

# ==================================================

test_image = image_names_parser(zipfile_name)[index]
boxs = labels_parser(zipfile_name, test_image)

print(test_image)
img = cv2.imread(f'processed/{test_image}')
box_width = 3
for box in boxs[flag]:
    for i in range(3):
        img[box[1] - box_width:box[1] + box_width, box[0]:box[2], i] = 90 * i * np.ones_like(img[box[1] - box_width:box[1] + box_width, box[0]:box[2], i])
        img[box[3] - box_width:box[3] + box_width, box[0]:box[2], i] = 90 * i * np.ones_like(img[box[3] - box_width:box[3] + box_width, box[0]:box[2], i])
        img[box[1]:box[3], box[0] - box_width:box[0] + box_width, i] = 90 * i * np.ones_like(img[box[1]:box[3], box[0] - box_width:box[0] + box_width, i])
        img[box[1]:box[3], box[2] - box_width:box[2] + box_width, i] = 90 * i * np.ones_like(img[box[1]:box[3], box[2] - box_width:box[2] + box_width, i])

plt.imshow(img)
plt.show()

In [None]:
def check_out_of_box(sliding_window, box):
    if sliding_window[2] < box[0] or sliding_window[0] > box[2]:
        return True
    if sliding_window[3] < box[1] or sliding_window[1] > box[3]:
        return True
    return False

def check_box(sliding_window, box, overlapping):
    if check_out_of_box(sliding_window, box):
        return False
    
    box_area = (box[2] - box[0]) * (box[3] - box[1])
    sliding_window_area = (sliding_window[2] - sliding_window[0]) * (sliding_window[3] - sliding_window[1])
    overlap_area = (min(sliding_window[2], box[2]) - max(sliding_window[0], box[0])) * (min(sliding_window[3], box[3]) - max(sliding_window[1], box[1]))
    if overlap_area / min(box_area, sliding_window_area) < overlapping:
        return False    
    
    return True

In [None]:
def crop_image(save_path, image_name, boxs, flag, overlapping, sliding_window_size=200, step=100):
    img = cv2.imread(f'{save_path}/{image_name}')
    feature_type = ['strong hyperbola', 'weak hyperbola', 'strong reflector', 'weak reflector', 'other', 'no feature']
    # exclude the left axis and data above 0 ms
    # Boundary constant of images
    IMAGE_START = 64
    AXIS_ZERO = 121
    img = img[AXIS_ZERO:, IMAGE_START:]

    if not os.path.exists(f'{save_path}/cropped/{sliding_window_size}_{step}/{flag}'):
        os.makedirs(f'{save_path}/cropped/{sliding_window_size}_{step}/{flag}')

    for j in range(0, img.shape[0] - 1, step):
        count = 0
        for path in os.listdir(f'{save_path}/cropped/{sliding_window_size}_{step}/{flag}'):
            if path.startswith(f'{j}_'):
                count += 1

        for i in range(0, img.shape[1] - 1, step):
            if j + sliding_window_size > img.shape[0] - 1 or i + sliding_window_size > img.shape[1] - 1:
                continue

            sliding_window = [i, j, i + sliding_window_size, j + sliding_window_size]
            if flag < 5:
                for box in boxs[flag]:
                    # Check if the box is overlapping over 80%, if not don't label it            
                    if check_box(sliding_window, box, overlapping):
                        sub_img = img[j:j + sliding_window_size, i:i + sliding_window_size]
                        cv2.imwrite(f'{save_path}/cropped/{sliding_window_size}_{step}/{flag}/{j}_{count}.jpg', sub_img)
                        count += 1
                        break
            else:
                all_boxs = [box for label_type in boxs for box in label_type]
                is_out_of_box = True
                for box in all_boxs:
                    if not check_out_of_box(sliding_window, box):
                        is_out_of_box = False
                        break
                if is_out_of_box:
                    sub_img = img[j:j + sliding_window_size, i:i + sliding_window_size]
                    cv2.imwrite(f'{save_path}/cropped/{sliding_window_size}_{step}/{flag}/{j}_{count}.jpg', sub_img)
                    count += 1

    print(f'Finished generating images of {feature_type[flag]}')

## Set the save path of cropped images
The cropped image save in the processed directory in default.

### Parameter
- save_path: default in 'processed'
- flag: feature type. If all kind of feature is required, run through flag 0-5
- sliding_window_size: size of the cropped images. 200 x 200 pixels in default
- step: step of sliding window. 100 pixels in default
- overlapping: acceptance rate of overlapping of sliding windows and feature box. 0.8 in default, which means that sliding window covers above 80% of feature box

Flag of the feature type
- strong hyperbola: 0
- weak hyperbola: 1
- strong reflector: 2
- weak reflector: 3
- other: 4
- no feature: 5

In [None]:
# Parameters Setting

save_path = 'processed'
flag = 0
sliding_window_size = 200
step = 40
overlapping = 0.8

# ==================================================

image_names = image_names_parser(zipfile_name)
for image_name in image_names:
    boxs = labels_parser(zipfile_name, image_name)
    count = crop_image(save_path, image_name, boxs, flag, overlapping, sliding_window_size=sliding_window_size, step=step)