In [101]:
import os
from pathlib import Path
import shutil
from distutils.dir_util import copy_tree

import pandas as pd
import json
import time
import datetime
from PIL import Image

In [102]:
import numpy as np
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import scipy.stats
import itertools

# https://github.com/qwirky-yuzu/COCO-to-YOLO/blob/main/coco_to_yolo.py
# https://github.com/pylabel-project/samples/blob/main/coco2yolov5.ipynb
try:
    import pylabel
except:
    !pip install pylabel
finally:
    from pylabel import importer

import matplotlib.patches as patches
import matplotlib
import matplotlib.colors as mcolors

plt.rcParams.update({'font.size': 22})
style_label = 'fivethirtyeight' #plt.style.available 

In [103]:
import labelutilits as utl

In [104]:
try:
    import dsatools 
except:
    !pip install dsatools
finally:
    import dsatools.utilits as ut

In [105]:
path = os.getcwd() 
print(utl.list_dirs(path))
anno_path = utl.get_anno_path(path,'train')
anno_path

['labelutilits', 'part1', 'part2', 'part3', 'part4', 'part5', 'part6', 'part7', 'part8', 'part9', 'train']


'D:\\ASBEST!\\ALLVEINS\\train\\annotation.json'

In [106]:
utl.anno_info(anno_path)

loading annotations into memory...
Done (t=0.16s)
creating index...
index created!


{'name': 'train',
 'anno_path': 'D:\\ASBEST!\\ALLVEINS\\train\\annotation.json',
 'anno_fname': 'annotation.json',
 'image_dir_path': 'D:\\ASBEST!\\ALLVEINS\\train',
 'length': 249,
 'anno_number': 4237,
 'class_id': [1, 2],
 'class_names': ['asbest', 'stone'],
 'height': [2048, 3456],
 'width': [2592, 5184],
 'COCO_obj': <pycocotools.coco.COCO at 0x22cee2af8d0>,
 'image_fname_example': 'D:\\ASBEST!\\ALLVEINS\\train\\14-02-21_5_1_9_part1.bmp'}

In [113]:
data = Annotation(anno_path).\
        set_cat_names(new_names = ['stones', 'asbest']).\
        filter_cat(cat_ids=[2]).\
        new_image_dir(new_dir = '').rest_ids().save('veins_anno.json').data_dict()

In [114]:
data = Annotation(anno_path).\
        set_cat_names(new_names = ['stones', 'asbest']).\
        filter_cat(cat_ids=[1]).\
        new_image_dir(new_dir = '').rest_ids().save('stone_anno.json').data_dict()

In [112]:
data['annotations'][0]

{'id': 1,
 'image_id': 1,
 'category_id': 1,
 'segmentation': [[526.01,
   1634.26,
   688.99,
   1517.85,
   847.31,
   1317.62,
   935.78,
   1238.46,
   912.5,
   1075.48,
   800.75,
   903.19,
   702.96,
   800.75,
   437.54,
   758.84,
   232.65,
   810.06,
   88.3,
   959.07,
   55.71,
   1159.3,
   51.05,
   1308.31,
   214.03,
   1443.34,
   418.91,
   1527.16]],
 'area': 535795.0,
 'bbox': [51.05, 758.84, 884.73, 875.42],
 'iscrowd': 0,
 'attributes': {'occluded': False}}

In [97]:
class Annotation:
    ''' 
    Class for annotation in json coco format processing.
    
    Parameters
    ----------
    anno_path: string, 
      path for annotation file
    image_dir_path: string,
      path for image directory
    
    Methods
    ---------
    open_data: Open data in json format
    set_cat_names: New class (categories) names
    filter_cat: Rest only selected category
    info: Return summaraized information from annotation
    new_image_dir: Replace image dir path
    save: Save data in json format
    data_dict:Return data in format dict[list[dict]]
    rest_ids: Reset category ids; image ids; anno_ids
    '''
 
    def __init__(self, anno_path, image_dir_path = None):
        self.anno_path = anno_path
        self.image_dir_path = image_dir_path
        if self.image_dir_path == None:
            self.image_dir_path = os.path.split(anno_path)[0]
        self.open_data(self.anno_path)

    def open_data(self,anno_path):
        ''' Open data in json format '''
        self.anno_path = anno_path
        with open(self.anno_path) as json_file:
            self.data = json.load(json_file)
        return self

    def set_cat_names(self,new_names = ['']):
        ''' New class (categories) names,
            work only if length of new class 
            list same as le of cat_ids'''
        self.data = _set_cat_names(self.data, new_names = new_names)
        return self
    
    def filter_cat(self, cat_ids = None):
        '''Rest only selected category 
           if None filter only images contains some labeling'''
        self.data = _filter_cat(self.data, cat_ids = cat_ids)
        return self
    
    def new_image_dir(self, new_dir = ''):
        ''' Replace image dir path'''
        self.data = _replace_image_dir(self.data, new_dir = new_dir)
        self.image_dir_path = new_dir
        return self
    
    def rest_ids(self):
        ''' Reset category ids; image ids; anno_ids'''
        self.data = _reset_ids(self.data)
        return self
        
    def add_image_path_2_anno(self):
        return self.new_image_dir(self.image_dir_path)
    
    def data_dict(self):
        ''' Return data in format dict[list[dict]]'''
        return self.data
    
    def info(self):
        '''
        Return summaraized information from annotation
        
        Returns
        -----------
        dict['string':[int, string]].   

        Notes
        -------------
        output inclues: 
        * dataset name;
        * path to annotation;
        * annotation file name;
        * path to image directory;
        * length of dataset (images number);
        * anno_number (number of instances for all images);
        * categories (class) ids: identification number of each class;
        * class_names: names of classes;
        * supercategory: names of supercategories;        
        * height: image heights;
        * width: image widths;
        * image_fname_example: image file name example.
        '''
        info =  _get_data_info(self.data)
        info['image_dir_path']  = self.image_dir_path
        info = {**info, 
                'anno_path':self.anno_path,
                'anno_fname':os.path.split(self.anno_path)[1]}
        return info
    
    def save(self, new_path = None, replace_path = False):
        ''' Save data in json format,
            if path is none anno_path utilized'''
        if new_path == None: new_path = self.anno_path
        with open(new_path, 'w') as f:
            json.dump(self.data, f)
        if replace_path:
            if os.path.split(new_path)[0] = '':
                new_path = os.path.join(
                                os.path.split(self.anno_path)[0],
                                new_path)
            self.anno_path = new_path
            
        return self
    
    def get_anno_path(self) :
        return anno_path
    
    
#---------------------------------------
def _open(anno_path):
    ''' Open data in json format
    Parameters
    ----------
    anno_path: string, 
      path annotation file.
    
    Returns
    ----------
    dict[list[dict]],
      coco format dict for json save.
    '''
    with open(anno_path) as json_file:
        data = json.load(json_file)
        json_file.close()
    return data
#---------------------------------------
def _set_cat_names(data, new_names):
    '''Set categories (class) names
    
    Parameters
    ----------
    data: dict[list[dict]], 
      coco format dict from json.
    new_names: list[string],
      new names for classes (categories).
      
    Returns
    ----------
    dict[list[dict]],
      coco format dict for json save.
    '''
    new_names = list(np.atleast_1d(new_names))
    if len(new_names) != len(data['categories']):
        raise ValueError('''len(new_names) != len(data['categories'])''')
    for i,name in enumerate(new_names):
        data['categories'][i]['name'] = name
    return data
#---------------------------------------
def _cat_ids(data, cat_ids = None):
    '''Set categories (class) ids
    
    Parameters
    ----------
    data: dict[list[dict]], 
      coco format dict from json.
    cat_ids: list[string],
      new category id, all for classes (categories).
      
    Returns
    ----------
    dict[list[dict]],
      coco format dict for json save.
    '''
    if cat_ids == None: 
        cat_ids = [id_['id'] for id_ in data['categories']]
    else: 
        cat_ids = list(np.atleast_1d(cat_ids))
        data['categories'] = [x for x in data['categories'] if x['id'] in cat_ids]
    return cat_ids, data
#---------------------------------------
def _filter_cat(data, cat_ids = None):

    cat_ids, data = _cat_ids(data = data, cat_ids = cat_ids)

    data['annotations'] =  list(filter(lambda x:x['category_id'] in cat_ids, data['annotations']))
    list_ids = list(set(map(lambda x:x['image_id'], data['annotations'])))
    data['images'] = list(filter(lambda x:x['id'] in list_ids, data['images']))
    return data

#---------------------------------------
def _replace_image_dir(data, new_dir=''):
    
    for i in range(len(data['images'])):
#         fname = os.path.basename(data['images'][i]['file_name']).split('\\')[-1]
        fname = os.path.split(data['images'][0]['file_name'])[-1]
        data['images'][i]['file_name'] = os.path.join(new_dir, fname)
    return data

#---------------------------------------
def _get_data_info(data):
    desc = dict()
    desc['cat_ids']   = [x['id'] for x in data['categories']]
    desc['class_names'] = [x['name'] for x in data['categories']]
    desc['supercategory'] = [x['supercategory'] for x in data['categories']]
    desc['width']  = list({x['width'] for x in data['images']})
    desc['height'] = list({x['height'] for x in data['images']})
    desc['length']      = len(data['images'])
    desc['anno_number'] = len(data['annotations'])
    desc['fname_example']  = data['images'][0]['file_name'] 
    image_dir_path = os.path.split(desc['fname_example'])[0]
    name_dataset = os.path.split(os.path.split(image_dir_path)[0])[1]
    desc['dataset_name']  = name_dataset

    return desc

#---------------------------------------
def _reset_ids(data):
    tmp_cat_id = dict()
    for i in range(len(data['categories'])):
        tmp_cat_id.update({data['categories'][i]['id']: i+1})#  = data['categories'][i]['id']
        data['categories'][i]['id'] = i+1
    #---------------------------------

    #---------------------------------
    tmp_img_id = dict()
    for i in range(len(data['images'])):
        tmp_img_id.update({data['images'][i]['id']:i+1})
        data['images'][i]['id'] = i+1
    #---------------------------------    

    #---------------------------------
    tmp_anno_id = np.zeros(len(data['annotations']), dtype = int)

    for i in range(len(data['annotations'])):
        data['annotations'][i]['category_id'] =\
            tmp_cat_id[data['annotations'][i]['category_id']]
        data['annotations'][i]['image_id'] =\
            tmp_img_id[data['annotations'][i]['image_id']]
        data['annotations'][i]['id'] = i+1
    return data

In [116]:
newpath = 'rere.json'


('', 'rere.json')

In [99]:
data = Annotation(anno_path).\
        set_cat_names(new_names = ['stones', 'asbest']).\
        filter_cat(cat_ids=[2]).\
        new_image_dir(new_dir = '').rest_ids().data_dict()

In [100]:
data['categories']

[{'id': 1, 'name': 'asbest', 'supercategory': ''}]

In [17]:
def _correct_anno_img_path(data, image_dir_path=None):
    '''
    Correct image pathers in annotation file 
    in correspondance with real image path.
    
    Parameters
    ----------
    anno_path: string, 
      path annotation file.
    image_dir_path: string,
      path iamge directory; 
      if none anno file and images are in the same directory. 

    Returns
    ----------
    dict[string:string],
      old and new entries in annotation.
    '''
#     coco_anno_dict,  coco, image_dir_path = anno2coco(anno_path, image_dir_path)
    df = pd.DataFrame(columns = ['old_path', 'new_path'])

    for i in range(len(data['images'])):  
        fpth = data['images'][i]['file_name'].split('/')[-1]

        if image_dir_path !=None:
            fpth = os.path.join(image_dir_path, fpth)
            
        if image_id == None or data['images'][i]['id'] in image_id:
            data['images'][i]['file_name'] = fpth
        
        dict_ = {'old_path': data['images'][i]['file_name'],                        
                 'new_path': fpth},
        
        df = pd.concat([df, pd.DataFrame(dict_)])

    return df

In [31]:
data = _open(anno_path)
data = _set_cat_names(data, new_names = ['stones', 'asbest'])
data = _filter_cat(data, cat_ids = 2)

In [None]:
def correct_anno_img_names(anno_path, image_dir_path=None, image_id = None):
    '''
    Correct image pathers in annotation file 
    in correspondance with real image path.
    
    Parameters
    ----------
    anno_path: string, 
      path annotation file.
    image_dir_path: string,
      path iamge directory; 
      if none anno file and images are in the same directory. 
    image_id: list[int],
      images to correct, all if None.

    Returns
    ----------
    dict[string:string],
      old and new entries in annotation.
    '''
#     coco_anno_dict,  coco, image_dir_path = anno2coco(anno_path, image_dir_path)
    df = pd.DataFrame(columns = ['old_path', 'new_path'])
    
    with open(anno_path) as json_file:
        data = json.load(json_file)
        json_file.close()

     
    for i in range(len(data['images'])):  
        fpth = data['images'][i]['file_name'].split('/')[-1]

        if image_dir_path !=None:
            fpth = os.path.join(image_dir_path, fpth)
            
        if image_id == None or data['images'][i]['id'] in image_id:
            data['images'][i]['file_name'] = fpth
        
        dict_ = {'old_path': data['images'][i]['file_name'],                        
                 'new_path': fpth},
        
        df = pd.concat([df, pd.DataFrame(dict_)])

    with open(anno_path, 'w') as f:
        json.dump(data, f)
    
    return df


In [7]:
with open(anno_path) as json_file:
    data = json.load(json_file)
    json_file.close()