In [None]:
from jupyter_bbox_widget import BBoxWidget
import ipywidgets as widgets
import os
import json
import base64

In [None]:
def encode_image(filepath):
    with open(filepath, 'rb') as f:
        image_bytes = f.read()
    encoded = str(base64.b64encode(image_bytes), 'utf-8')
    return "data:image/jpg;base64,"+encoded

In [None]:
CLASS_LABELS = ['apple', 'orange', 'pear']
TRAIN_DATA_ANNOTATIONS_PATH = './train_data.txt'
TEST_DATA_ANNOTATIONS_PATH = './test_data.txt'
CUSTOM_DATASET_DIR = '/nvdli-nano/data/objectdetection/images/custom'
UNLABELLED_DATA_DIR = f'{CUSTOM_DATASET_DIR}/raw'
ANNOTATIONS_PATH = f'{UNLABELLED_DATA_DIR}/annotations.json'


In [None]:
files = sorted(os.listdir(UNLABELLED_DATA_DIR))
annotations = {}

In [None]:
# a progress bar to show how far we got
w_progress = widgets.IntProgress(value=0, max=len(files), description='Progress')
# the bbox widget
w_bbox = BBoxWidget(
    image = encode_image(os.path.join(UNLABELLED_DATA_DIR, files[0])),
    classes=CLASS_LABELS
)

# combine widgets into a container
w_container = widgets.VBox([
    w_progress,
    w_bbox,
])

In [None]:
# when Skip button is pressed we move on to the next file
@w_bbox.on_skip
def skip():
    w_progress.value += 1
    # open new image in the widget
    image_file = files[w_progress.value]
    w_bbox.image = encode_image(os.path.join(UNLABELLED_DATA_DIR, image_file))
    # here we assign an empty list to bboxes but 
    # we could also run a detection model on the file
    # and use its output for creating inital bboxes
    w_bbox.bboxes = [] 

# when Submit button is pressed we save current annotations
# and then move on to the next file
@w_bbox.on_submit
def submit():
    image_file = files[w_progress.value]
    # save annotations for current image
    annotations[image_file] = w_bbox.bboxes
    with open(ANNOTATIONS_PATH, 'w') as f:
        json.dump(annotations, f, indent=4)
    # move on to the next file
    skip()

In [None]:
w_container

In [None]:
def data_row_from_file(file, annotations_data):
  file_path = f'{UNLABELLED_DATA_DIR}/{file}'
  single_image_annotations = annotations_data[file]
  row = file_path
  for single_image_annotation in single_image_annotations:
      x = single_image_annotation['x']
      y = single_image_annotation['y']
      width = single_image_annotation['width']
      height = single_image_annotation['height']
      label = single_image_annotation['label']
      x_min = x
      y_min = y
      x_max = x_min + width
      y_max = y_min + height
      row = f'{row} {label} {x_min} {y_min} {x_max} {y_max}'
  return f'{row}\n'

In [None]:
import json
import random

files = []
annotations_data = {}

with open(ANNOTATIONS_PATH) as json_data:
    annotations_data = json.load(json_data)
    files = list(annotations_data.keys())
    
random.shuffle(files)

train_data = files[:int((len(files)+1)*.80)] #Remaining 80% to training set
test_data = files[int((len(files)+1)*.80):] #Splits 20% data to test set


with open(TRAIN_DATA_ANNOTATIONS_PATH, 'w') as f:
    for file in train_data:
        row = data_row_from_file(file, annotations_data)
        f.write(row)
        
with open(TEST_DATA_ANNOTATIONS_PATH, 'w') as f:
    for file in test_data:
        row = data_row_from_file(file, annotations_data)
        f.write(row)

In [None]:
from pascal_voc.pascal_voc import PASCALVOC07
from easydict import EasyDict as edict
import os

config = edict()
config.author = "anonymous"
config.root = "annotation"
config.folder = "VOC2007"
config.annotation = "PASCAL VOC2007"
config.segmented = "0"
config.difficult = "0"
config.truncated = "0"
config.pose = "Unspecified"
config.database = "Custom"
config.depth = "3"



trainval_anno = os.path.join(TRAIN_DATA_ANNOTATIONS_PATH)
test_anno = os.path.join(TEST_DATA_ANNOTATIONS_PATH)

val_ratio = 0

p = PASCALVOC07(trainval_anno, test_anno, val_ratio, CUSTOM_DATASET_DIR, config)
p.build(True)