In [1]:
# Tool to craw all files with a specific extension from a directory and subdirectories

# Load dependancies
from tkinter import filedialog
from tkinter import * # File dialog
import os
import matplotlib.pyplot as plt # For plotting
import numpy as np
from shutil import copyfile # Copy file
import cv2
from skimage.color import rgb2gray
from skimage import img_as_ubyte
from skimage import measure
from skimage.measure import approximate_polygon
import glob # For importing multiple files
import json

In [2]:
# User input

# Select file type for import
select_ext = '*.png'
# Define output file type
output_ext = '.JSON'
# Define suffix
suffix = '_label'

# Object profile
prof = 'PROFILE'
obj_prof = 'hollow'

# Quality
qual = 'QUALITY'
obj_qual = 'good' # == 1
# Remaining are bad == 0

# Binary class == 1l
binary_class = 1

In [3]:
# Get user-specific folder selection
%gui tk
root = Tk()
root.withdraw()
root.update()
folder_selected = filedialog.askdirectory()
root.destroy() # Destroy root window

In [4]:
# Create a list with all loaded files
listFiles = glob.glob( os.path.join(folder_selected, select_ext) )

In [38]:
# Create a mask only with 1 class
# If multiple classes, loop through 1 class first (all contours in 1 class)
# then over images





In [6]:
f_img = listFiles[0]
f_img = cv2.imread(f_img)
# Find unique RGB values for classes (not working for 1 color color)
uniques = np.unique(f_img.reshape(-1, f_img.shape[-1]), axis=0)
# If binary class, assign number of calsses to 1
if len(uniques) == 256:
    num_classes = 1
    # Use pseudo color for binary class (create tuples)
    # uniques = [ (0,0,0), (255,255,255) ]
    # Remove black color (reserved for background)
    colors_trunc = [ (255,255,255) ]
    
else:
    # Number of classes
    num_classes = len(uniques)
    # Remove black color (reserved for background)
    colors_trunc = np.delete(uniques, (0), axis=0)
    # Convert array to tuple
    colors_trunc = [tuple(x) for x in colors_trunc.tolist()]

In [7]:
num_classes

2

In [90]:
# FINAL

# Loop through images
    # Loop through regions
    # Craete JSON dict

counter = 0
# Get number of images from list
n_img = len(listFiles)
main_dict = {}  
for ii in range (0, n_img):
    counter = counter + 1
    # Get file path from list
    filename = listFiles[ii]
    # Read image
    img = cv2.imread(filename)
    # Get file size - needed for JSON
    f_size = os.path.getsize(filename)
    # Get filname without path
    th_fname = os.path.basename(filename)
    # Swap BGR to RGB
    t_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Find unique RGB values for classes
    uniques = np.unique(t_img.reshape(-1, t_img.shape[-1]), axis=0)
    # Number of classes
    num_classes = len(uniques)
    
    # Obtain binary image by applying simple Threshold
    ret, thres_img = cv2.threshold(cv2.cvtColor(t_img, cv2.COLOR_RGB2GRAY),
                0, 255, cv2.THRESH_BINARY)
    
    

    # Find all your connected components (white blobs)
    nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(thres_img, connectivity=8)
    # connectedComponentswithStats yields every seperated component with information on each of them, such as size
    # the following part is just taking out the background which is also considered a component, but most of the time we don't want that.
    sizes = stats[1:, -1]; nb_components = nb_components - 1

    # minimum size of particles we want to keep (number of pixels)
    #here, it's a fixed value, but you can set it as you want, eg the mean of the sizes or whatever
    min_size = 5  

    # Create new image excluding small particles
    clean_bw = np.zeros((output.shape), np.uint8)
    #for every component in the image, you keep it only if it's above min_size
    for i in range(0, nb_components):
        if sizes[i] >= min_size:
            clean_bw[output == i + 1] = 255
        
        
    # Retrieves all of the contours and reconstructs a full hierarchy of nested contours
    # contours, hier = cv2.findContours(thres_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # Take external contours only
    contours, hier = cv2.findContours(clean_bw, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    

    # Get number of contours
    n_contour = len(contours)
    
    # Preallocate empty dict
    region_dict = {}
    
    # Loop through contour in th-image
    for jj in range (0, n_contour):
        # Create empty canvas
        mask_cont = np.zeros(thres_img.shape,np.uint8)
        # Draw contours
        objectID = jj
        # For fill use -1, for line use 1
        line_thickness = 1
        # Set max value for contour
        contour_col = 255
        # Burn contour on mask
        cv2.drawContours(mask_cont, contours, objectID, contour_col, line_thickness)
        # Get contour coordinates by keeping only non-zero values
        pixelpoints = np.transpose(np.nonzero(mask_cont))
        # pixelpoints column structure: X-Y
        # Flip columns to Y-X (for images)
        #y_coord = pixelpoints[:,0].tolist()
        #x_coord = pixelpoints[:,1].tolist()

        # OR:
        th_cont = contours[jj]
        th_cont = np.squeeze(th_cont)
        x_coord = th_cont[:,0].tolist()
        y_coord = th_cont[:,1].tolist()
        # (trunctated) dict for regions which will be added to the main JSON file ANNOTATION under 'regions'
        reg = {
            str(jj): {
              "shape_attributes": {
                "name": "polygon",
                "all_points_x": x_coord,
                "all_points_y": y_coord,
              },
              "region_attributes": {}
            }
        }
        region_dict.update(reg)
    
    # Main dict for JSON file for th-image
    annotation = {
        th_fname + str(f_size): {
        "fileref": "",
        "size": f_size,
        "filename": th_fname,
        "base64_img_data": "",
        "file_attributes": {},
        "regions": region_dict
        }
    }
    # Add main dict with all 'regions' (with x-y coordinates for polygons)
    main_dict.update(annotation)
    ### Print output
    print('Output: ##### Create JSON-dict: %d out of %d images. #####' % (counter, n_img) ) 
    
print('Output: ##### JSON-file completed. #####')


# print(json.dumps(annotation, indent=4))
with open('data_full.json', 'w', encoding='utf-8') as output_file:
    json.dump(main_dict, output_file, ensure_ascii=False, indent=4) 


xxxxx %d 0
xxxxx %d 1
xxxxx %d 2
xxxxx %d 3
xxxxx %d 4
xxxxx %d 5
xxxxx %d 6
xxxxx %d 7
xxxxx %d 8
xxxxx %d 9
xxxxx %d 10
xxxxx %d 11
xxxxx %d 12
xxxxx %d 13
xxxxx %d 14
xxxxx %d 15
xxxxx %d 16
xxxxx %d 17
xxxxx %d 18
xxxxx %d 19
xxxxx %d 20
xxxxx %d 21
xxxxx %d 22
xxxxx %d 23
xxxxx %d 24
xxxxx %d 25
xxxxx %d 26
xxxxx %d 27
Output: ##### Create JSON-dict: 1 out of 10 images. #####
xxxxx %d 0
xxxxx %d 1
xxxxx %d 2
xxxxx %d 3
xxxxx %d 4
xxxxx %d 5
xxxxx %d 6
xxxxx %d 7
xxxxx %d 8
xxxxx %d 9
xxxxx %d 10
xxxxx %d 11
xxxxx %d 12
xxxxx %d 13
xxxxx %d 14
xxxxx %d 15
xxxxx %d 16
xxxxx %d 17
xxxxx %d 18
xxxxx %d 19
xxxxx %d 20
xxxxx %d 21
xxxxx %d 22
xxxxx %d 23
xxxxx %d 24
xxxxx %d 25
xxxxx %d 26
xxxxx %d 27
xxxxx %d 28
xxxxx %d 29
Output: ##### Create JSON-dict: 2 out of 10 images. #####
xxxxx %d 0
xxxxx %d 1
xxxxx %d 2
xxxxx %d 3
xxxxx %d 4
xxxxx %d 5
xxxxx %d 6
xxxxx %d 7
xxxxx %d 8
xxxxx %d 9
xxxxx %d 10
xxxxx %d 11
xxxxx %d 12
xxxxx %d 13
xxxxx %d 14
xxxxx %d 15
xxxxx %d 16
xxxxx %d 17
xx