In [1]:
import os
from pathlib import Path
import shutil
from distutils.dir_util import copy_tree

import pandas as pd
import json
import time
import datetime
from PIL import Image

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import scipy.stats
import itertools

import matplotlib.patches as patches
import matplotlib
import matplotlib.colors as mcolors

plt.rcParams.update({'font.size': 22})
style_label = 'fivethirtyeight' #plt.style.available 

In [3]:
import labelutilits as utl

In [4]:
try:
    import pylabel
except:
    !pip install pylabel
finally:
    from pylabel import importer

In [5]:
try:
    import dsatools 
except:
    !pip install dsatools
finally:
    import dsatools.utilits as ut

### SEARCH FOR DATABASE PARTS

In [6]:
path = os.getcwd() # PROJECT PATH
dir_names = utl.list_dirs(path)
print(dir_names)

['labelutilits', 'part1', 'part2', 'part3', 'part4', 'part5', 'part6', 'part7', 'part8', 'part9']


In [7]:
path = os.getcwd() # PROJECT PATH
dir_names = utl.list_dirs(path)[1:] #VALID DATABASE 
print('DATABASE PARTS:', dir_names)

DATABASE PARTS: ['part1', 'part2', 'part3', 'part4', 'part5', 'part6', 'part7', 'part8', 'part9']


In [8]:
dir_id = 1
anno_path = utl.get_anno_path(path, dir_names[dir_id])
print('Example of data part', anno_path)
print('\nData part content')
utl.print_dir_description(dir_names[dir_id])
print('\nSearch for labeled objects in the data dir')
labeld_image_ids = utl.check_anno_labels(anno_path, cat_ids = None)
labeld_image_ids

Example of data part D:\ASBEST!\ALLVEINS\part2\part2.json

Data part content
dirs []
content {'images': 225, 'dirs': 0, 'json': 1, 'xml': 1, 'csv': 0, 'txt': 0, 'other': 0}
not content []
anno cvat ['part2.json']
anno xml ['annotations.xml']
cnt img content 225

Search for labeled objects in the data dir
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


[143, 170, 178, 191, 193, 219, 224]

### CREATE data with labeled annotations

In [9]:
path = os.getcwd() 
dir_names = utl.list_dirs(path)[1:]

annodf = utl.collec_newanno(path, dir_names)


part1
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
images:69.0, instances:1736

part2
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
images:7.0, instances:120

part3
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
images:53.0, instances:501

part4
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
No labeled data

part5
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
No labeled data

part6
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
images:18.0, instances:108

part7
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
images:13.0, instances:64

part8
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
images:33.0, instances:259

part9
loading annotations into memory...
Done (t=0.05s)
creating index...
index create

In [10]:
annodf.sample(1)

Unnamed: 0,anno,class_id,class_names,img_desc,new_anno_ids,new_file_name,new_image_id,old_anno_ids,old_file_name,old_image_id
189,"[{'id': 2757, 'image_id': 190, 'category_id': ...","[1, 2]","[stone, asbest]","{'id': 190, 'width': 2592, 'height': 2048, 'fi...","[2757, 2758, 2759, 2760, 2761, 2762, 2763, 276...",D:\ASBEST!\ALLVEINS\part8\ManualDetailedImage_...,190.0,"[228, 229, 230, 231, 232, 233, 234, 235, 236, ...",ManualDetailedImage_12_52_28_150.bmp,30.0


### CREATE NEW JSON FILE WITH ANNOTATION

In [11]:
new_anno_path  = utl.create_json(annodf, new_anno_name = 'annotation.json')
print(new_anno_path)

D:\ASBEST!\ALLVEINS\annotation.json


In [12]:
utl.anno_info(new_anno_path, image_dir_path = None)

loading annotations into memory...
Done (t=0.10s)
creating index...
index created!


{'name': 'ALLVEINS',
 'anno_path': 'D:\\ASBEST!\\ALLVEINS\\annotation.json',
 'anno_fname': 'annotation.json',
 'image_dir_path': 'D:\\ASBEST!\\ALLVEINS',
 'length': 249,
 'anno_number': 4237,
 'class_id': [1, 2],
 'class_names': ['asbest', 'stone'],
 'height': [2048, 3456],
 'width': [2592, 5184],
 'COCO_obj': <pycocotools.coco.COCO at 0x187f13944a8>,
 'image_fname_example': 'D:\\ASBEST!\\ALLVEINS\\part1\\14-02-21_5_1_9.bmp'}

### COPY ALL LABELED DATA INTO ONE DIRECTORY

In [13]:
df, anno_path = utl.copy2train(new_anno_path, new_img_dir = 'train', project_path = None, copy_anno=True )

Directory D:\ASBEST!\ALLVEINS\train created


In [14]:
df.sample(1)

Unnamed: 0,copied,new file name,new path,old path
68,True,15-05-06_26_2_8_part1.bmp,D:\ASBEST!\ALLVEINS\train\15-05-06_26_2_8_part...,D:\ASBEST!\ALLVEINS\part1\15-05-06_26_2_8.bmp


In [15]:
utl.anno_info(anno_path)

loading annotations into memory...
Done (t=0.11s)
creating index...
index created!


{'name': 'train',
 'anno_path': 'D:\\ASBEST!\\ALLVEINS\\train\\annotation.json',
 'anno_fname': 'annotation.json',
 'image_dir_path': 'D:\\ASBEST!\\ALLVEINS\\train',
 'length': 249,
 'anno_number': 4237,
 'class_id': [1, 2],
 'class_names': ['asbest', 'stone'],
 'height': [2048, 3456],
 'width': [2592, 5184],
 'COCO_obj': <pycocotools.coco.COCO at 0x187f0d6eb38>,
 'image_fname_example': 'D:\\ASBEST!\\ALLVEINS\\train\\14-02-21_5_1_9_part1.bmp'}