# Read the Data We Got

In [1]:
import skimage.io
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline
import shutil
import os
import numpy as np
import xml.etree.ElementTree as ET
import pathlib
import cv2 as cv

In [2]:
def parse_file(filename, compact=False):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(float(bbox.find('xmin').text)),
                              int(float(bbox.find('ymin').text)),
                              int(float(bbox.find('xmax').text))-int(float(bbox.find('xmin').text)),
                              int(float(bbox.find('ymax').text))-int(float(bbox.find('ymin').text))]
        if not compact:
            objects.append(obj_struct)
        else:
            objects.append(obj_struct['bbox'])

    return objects

In [3]:
# Plot images
def plot_varroa_(i):
    fig, ax = plt.subplots(1, 1, figsize=(12, 12))

    im = skimage.io.imread(image_names[i])
    for anno in annotations[i]:
        rect = patches.Rectangle((anno[0], anno[1]), anno[2], anno[3], linewidth=1, edgecolor='r',facecolor='none')
        ax.add_patch(rect)

    ax.imshow(im)
    ax.axis('off')
    plt.show()

In [10]:
def xywh2contour(rectangle):
    x, y, w, h = tuple(rectangle)
    contour = np.zeros((4,2))
    contour[0,:] = [x, y]
    contour[1,:] = [x+w, y]
    contour[2,:] = [x+w, y+h]
    contour[3,:] = [x, y+h]
    contour = contour.reshape((4, 1, 2)).astype(np.int32)
    return contour
#xywh2contour([931, 1074, 24, 25])    

In [27]:
def contour2xywh(contour):
    contour = contour
    x, y = np.min(contour[:,0][:,0]), np.min(contour[:,0][:,1])
    x_w, y_h = np.max(contour[:,0][:,0]), np.max(contour[:,0][:,1])

    w = np.abs(x-x_w)
    h = np.abs(y-y_h)
    
    return [x, y, w, h]

x = xywh2contour([931, 1074, 24, 25])    
contour2xywh(x)

[931, 1074, 24, 25]

In [5]:
def prepare_images_and_masks(label, dataset_name):
    ANNOTATIONS_PATH = os.path.join(ROOT_DIR, 'datasets', 'project-data', 'annotations', label)
    IMAGES_PATH = os.path.join(ROOT_DIR, 'datasets', 'project-data', 'images', label)
    REFERENCE_FILE = os.path.join(ROOT_DIR, 'datasets', 'project-data', f'{label}.txt')

    with open(REFERENCE_FILE, 'r') as f:
        ids = f.readlines()
        
    ids = [i.replace('\n', '') for i in ids]
    annotations = [parse_file(os.path.join(ANNOTATIONS_PATH, i) + '.xml', compact=True) for i in ids]
    image_names = [os.path.join(IMAGES_PATH, i) + '.jpg' for i in ids]

    pathlib.Path(f'datasets/{dataset_name}').mkdir(parents=True, exist_ok=True) 
    for i, token in enumerate(ids):
        # create directories
        pathlib.Path(f'datasets/{dataset_name}/{token}').mkdir(parents=True, exist_ok=True) 
        pathlib.Path(f'datasets/{dataset_name}/{token}/images').mkdir(parents=True, exist_ok=True) 
        pathlib.Path(f'datasets/{dataset_name}/{token}/masks').mkdir(parents=True, exist_ok=True) 

        # filling these directories with images
        img = cv.imread(image_names[i], 0)
        cv.imwrite(os.path.join(f'datasets/{dataset_name}/{token}/images' , f'{token}.jpg'), img)

        # filling these directories with masks
        blank = np.zeros(img.shape)
        for j, anno in enumerate(annotations[i]):
            cnt = xywh2contour(anno)
            img_mask = cv.drawContours( blank.copy(), [xywh2contour(anno)], 0, (255,255,255) ,-1)
            cv.imwrite(os.path.join(ROOT_DIR, f'datasets/{dataset_name}/{token}/masks' , f'{j}.jpg'), img_mask)

        if len(annotations[i]) == 0:
            cv.imwrite(os.path.join(ROOT_DIR, f'datasets/{dataset_name}/{token}/masks' , f'0.jpg'), blank)
    

In [6]:
ROOT_DIR = os.getcwd()

## Dealing with Train Data

In [7]:
prepare_images_and_masks(label='train', dataset_name='stage1_train')

KeyboardInterrupt: 

In [12]:
# i = 4
# print(f"Images: {image_names[i]}")
# print(f"Dataset called {ids[i]}")
# print(f'Number of varroas: {len(annotations[i])}')
# print(f'Varroa in {annotations[i]}')
# plot_varroa_(i)

## Dealing with Test Data

In [8]:
prepare_images_and_masks(label='test', dataset_name='stage1_test')

## Dealing with Validation Data

In [9]:
prepare_images_and_masks(label='validation', dataset_name='stage1_validation')