In [1]:
import sys
sys.path.append('../')

import os
import cv2
import argparse
import shutil
import numpy as np
from lxml import etree
import tqdm
import matplotlib.pyplot as plt
import glob
from tools.draw_utils import draw_inference
from tools.image_utils import is_image


In [2]:
def get_coordinates(box_element):
    xmin = int(float(box_element.get('xtl')))
    ymin = int(float(box_element.get('ytl')))
    xmax = int(float(box_element.get('xbr')))
    ymax = int(float(box_element.get('ybr')))
    return xmin, ymin, xmax, ymax


def parse_cvat(xml_path):
    anns = []
    
    root = etree.parse(xml_path).getroot()

    task = root.find('.//task')
    task_id = task.find('id').text
    task_name = task.find('name').text
    
    for image_tag in root.iterfind('image'):
        image_info = {}
        
        image_info['filename'] = os.path.basename(image_tag.get('name'))
        image_info['task_id'] = task_id
        image_info['task_name'] = task_name
        image_info['symbols'] = []
        image_info['rubs'] = []
        image_info['kops'] = []
        image_info['price_areas'] = []
        image_info['discount_areas'] = []
        
        for box_tag in image_tag:
            if box_tag.tag != 'box':
                raise Exception(f'Not box! box: {box_tag.tag}, xml_path: {xml_path}')

            label = box_tag.get('label')   
            xmin, ymin, xmax, ymax = get_coordinates(box_tag)
            
            if label in ['Symbol']:
                symbol_value = str(box_tag[0].text)
                image_info['symbols'].append([symbol_value, xmin, ymin, xmax, ymax])
            elif label in ['Roubles']:
                image_info['rubs'].append([xmin, ymin, xmax, ymax])
            elif label in ['kopecks']:
                image_info['kops'].append([xmin, ymin, xmax, ymax])
            elif label in ['Price Area']:
                image_info['price_areas'].append([xmin, ymin, xmax, ymax])
            elif label in ['Discount Area']:
                if box_tag[0].get('name') == 'Orientation':
                    image_info['discount_areas'].append([box_tag[0].text, xmin, ymin, xmax, ymax])
                else:
                    raise Exception(f'Unknown attribute name: {box_tag[0].get("name")}, xml_path: {xml_path}')
            else:
                raise Exception(f'Unknown label! label: \'{label}\', xml_path: {xml_path}')

        anns.append(image_info)
    return anns


### COLLECT ANNOTATIONS AND IMAGES

In [3]:

# # dataset_dir = '/home/ml/datasets/price_tags/DANONE'
# # dataset_dir = '/home/ml/datasets/price_tags/EFKO'
# # dataset_dir = '/home/ml/datasets/price_tags/FERRERO'
# # dataset_dir = '/home/ml/datasets/price_tags/NESTLE'
# # dataset_dir = '/home/ml/datasets/price_tags/SHWARZ'



# ann_dir = os.path.join(dataset_dir, 'anns')
# img_dir = os.path.join(dataset_dir, 'imgs')


# # move annotations
# paths = glob.glob(os.path.join(dataset_dir, '**', '*.xml'), recursive=True)
# paths = [i for i in paths if not i.startswith(ann_dir)]
# paths = [i for i in paths if not i.startswith(img_dir)]
# print(f'annotations: {len(paths)}')
# for i, p in enumerate(paths, 1):
#     os.makedirs(ann_dir, exist_ok=True)
#     save_p = os.path.join(ann_dir, f'annotations_{i}.xml')
#     os.rename(p, save_p)


# # move images
# paths = glob.glob(os.path.join(dataset_dir, '**'), recursive=True)
# paths = [i for i in paths if is_image(i)]
# paths = [i for i in paths if not i.startswith(ann_dir)]
# paths = [i for i in paths if not i.startswith(img_dir)]
# print(f'images: {len(paths)}')
# for p in paths:
#     os.makedirs(img_dir, exist_ok=True)
#     img_name = os.path.basename(p)
#     save_p = os.path.join(img_dir, img_name)
#     os.rename(p, save_p)


# # delete everything except 'ann_dir' and 'img_dir'
# paths = glob.glob(os.path.join(dataset_dir, '*'))
# paths.remove(ann_dir)
# paths.remove(img_dir)
# print(f'delete: {len(paths)}')
# for p in paths:
#     if os.path.isfile(p):
#         os.remove(p)
#     else:
#         shutil.rmtree(p)



#### READ AND SAVE DATA

In [4]:

# dataset_dir = '/home/ml/datasets/price_tags/DANONE'
# dataset_dir = '/home/ml/datasets/price_tags/EFKO'
# dataset_dir = '/home/ml/datasets/price_tags/FERRERO'
# dataset_dir = '/home/ml/datasets/price_tags/NESTLE'
dataset_dir = '/home/ml/datasets/price_tags/SHWARZ'


ann_dir = os.path.join(dataset_dir, 'anns')
img_dir = os.path.join(dataset_dir, 'imgs')


ann_paths = glob.glob(os.path.join(ann_dir, 'annotations_*.xml'))

img_paths = []
imgs = []
anns = []
files_not_found = []

for ann_path in ann_paths:
    
    ann_info_list = parse_cvat(ann_path)
    
    for ann_info in ann_info_list:
        img_path = os.path.join(img_dir, ann_info['filename'])
        
        if not os.path.exists(img_path):
            files_not_found.append(img_path)
            continue
        
        img = cv2.imread(img_path)
        
        ann = []
        for i in ann_info['symbols']:
            if i[0] == '%':
                i[0] = 10
            else:
                i[0] = int(i[0])
            ann.append(i)
        for i in ann_info['rubs']:
            i.insert(0, 11)
            ann.append(i)
        for i in ann_info['kops']:
            i.insert(0, 12)
            ann.append(i)
        for i in ann_info['price_areas']:
            i.insert(0, 13)
            ann.append(i)
        for i in ann_info['discount_areas']:
            if i[0] == 'Horizontal':
                i[0] = 14
            elif i[0] == 'Vertical':
                i[0] = 15
            ann.append(i)
        
        img_paths.append(img_path)
        imgs.append(img)
        anns.append(ann)


img_paths = np.array(img_paths)
imgs = np.array(imgs)
anns = np.array(anns)
print(f'img_paths: {len(img_paths)}')
print(f'imgs: {len(imgs)}')
print(f'anns: {len(anns)}')
print(f'\nFILES NOT FOUND: {len(files_not_found)}')

# ****************************************************
print(f'\n\nMake unique!')
img_paths, unique_img_idxs = np.unique(img_paths, return_index=True)
imgs = imgs[unique_img_idxs]
anns = anns[unique_img_idxs]
print(f'unique img_paths: {len(img_paths)}')
print(f'unique imgs: {len(imgs)}')
print(f'unique anns: {len(anns)}')

if len(imgs) != len(anns):
    print('The lengths dont match !!!!!!!')

# ****************************************************
print(f'\n\nFilter empty anns!')
empty_anns = np.array([True if len(a) == 0 else False for a in anns])
print(f'empty anns: {np.count_nonzero(empty_anns)}')
f = ~empty_anns
img_paths = img_paths[f]
imgs = imgs[f]
anns = anns[f]
print(f'img_paths: {len(img_paths)}')
print(f'imgs: {len(imgs)}')
print(f'anns: {len(anns)}')
# ****************************************************


img_paths: 1245
imgs: 1245
anns: 1245

FILES NOT FOUND: 0


Make unique!
unique img_paths: 1245
unique imgs: 1245
unique anns: 1245


Filter empty anns!
empty anns: 7
img_paths: 1238
imgs: 1238
anns: 1238


In [None]:
# np.save(dataset_dir + '_filenames.npy', img_paths)
# np.save(dataset_dir + '_images.npy', imgs)
# np.save(dataset_dir + '_annotations.npy', anns)


#### plot samples

In [None]:
# start_stop = slice(0,200)
# start_stop = np.random.randint(len(img_paths), size=200)

for img_path, img, ann in zip(img_paths[start_stop], imgs[start_stop], anns[start_stop]):
    print(img_path)
    
    draw_inference(img,
         products_list=[ann],
#          titles=['gt'],
         titles=[os.path.basename(img_path)],
         show=True,
         plot_size=500,
         dpi=75)