In [192]:
import cv2
import os
import json

In [194]:
def load_image(path):
    """
    Loads an image into memory as cv2 image object.
    
    :param path: String of path to image.
    :return: cv2 object of image.
    """
    return cv2.imread(path)

In [195]:
def save_image(image, output_directory, filename):
    """
    Saves an image.
    
    :param image: The image to save.
    :param output_path: The directory to write the file to. 
    :param filename: The filename of the output image.
    """
    path = os.path.join(output_directory, filename)
    cv2.imwrite(path, image)

In [196]:
def crop_image(image, x, y, width, height):
    """
    Crops an image to a rectangle.
    
    :param x: The x-coordinate of the bounding box to crop.
    :param y: The y-coordinate of the bounding box to crop.
    :param width: The width of the box to crop.
    :param height: The height of the box to crop.
    :return: The cropped image.
    """
    return image[y:y+height, x:x+width]

In [197]:
def load_json(json_file_path):
    """
    Loads a json file
    
    :param via_json_file_path: The path to the json file.
    :return: The loaded json file object.
    """
    with open(json_file_path) as file:
        return json.load(file)

In [198]:
def get_filename_and_regions_from_via_json(via_json_file_path):
    """
    Parses the output via json to get a dictionary where they key is the filename and the value is an array of
    json objects that describe the the regions to crop.
    
    :param via_json_file_path: The path to the via json file. 
    :return: A dictionary where the keys are the filenames and the values are the regions associated with the file.
    """
    json = load_json(via_json_file_path)
    image_data_info = json["_via_img_metadata"]
    return {image_data_info[entry]["filename"]: image_data_info[entry]["regions"] for entry in image_data_info}

In [199]:
def get_shape(region):
    """
    Takes in a region and gets the shape
    
    :param region: The region to get the shape of.
    :return: The shape of the region.
    """
    return region["shape_attributes"]["name"]
    

In [200]:
def get_rect_bbox(region):
    """
    Gets the bounding box of a region.
    
    :param region: The region to get the bounding box of.
    :return: List in the form [x, y, width, height] defining a rectangle.
    """
    shape_attributes = region["shape_attributes"]
    if get_shape(region) == "rect":
        return [shape_attributes["x"], 
                shape_attributes["y"], 
                shape_attributes["width"], 
                shape_attributes["height"]]
    else:
        return None

In [201]:
def get_region_type(region):
    """
    Gets the type of the region.
    
    :param region: The region to get the type of.
    :return: The type of the region.
    """
    if "type" in region["region_attributes"]:
        return region["region_attributes"]["type"]
    else:
        return None

In [202]:
def create_directory_name_from_filename(filename):
    """
    Creates a directory name from a filename.
    
    :param filename: The name of the file to use to create a directory name from.
    :return: Directory name.
    """
    return filename.split(".")[0]

In [203]:
def create_directory(path, directory_name):
    """
    Creates a directory if it doesn't already exist
    
    :param path: The folder the directory will be made in.
    :param directory_name: The name of the directory to make.
    :return: The filepath to the directory.
    """
    cleaned_directory_name = clean_directory_name(directory_name)
    full_path = os.path.join(path, cleaned_directory_name)
    if not os.path.exists(full_path):
        os.makedirs(full_path)
    
    return full_path
    

In [209]:
def clean_directory_name(directory_name):
    """
    Cleans a directory name.
    
    :param directory_name: The directory output name.
    :return: The cleaned string.
    """
    cleaned = directory_name.replace("/", "-")
    cleaned = cleaned.replace(" ", "-")
    cleaned = cleaned.lower()
    return cleaned

In [230]:
def main(via_json_path, image_directory, output_directory, file_name_regions_dict):
    """
    Main entry point for the program.
    
    :param via_json_path: The path to the json file that describes the regions in the images.
    :param image_directory: Directory path which contains the images the json file describes.
    :param output_directory: The path to the output directory to write all the data to.\
    :param file_name_regions_dict: A dictionary where the keys are the filenames and the values are a list of regions.
    """
    for filename, regions in file_name_regions_dict.items():
        image = load_image(os.path.join(image_directory, filename))
        if image is None:
            print("file {} could not be found".format(filename))
            continue
            
        current_file_output_directory = create_directory(output_directory, create_directory_name_from_filename(filename))
        for i, region in enumerate(regions):
            bbox = get_rect_bbox(region)
            region_type = get_region_type(region)
            if region_type is None:
                print("file {} had an uncategorized box".format(filename))
                region_type = "uncategorized"
                
            cropped_image = crop_image(image, *bbox)
            region_type_output_directory = create_directory(current_file_output_directory, region_type)
            save_image(cropped_image, region_type_output_directory, "{}.jpg".format(i))

In [236]:
DATA_DIRECTORY = "../data"
IMAGE_DIRECTORY = os.path.join(data_directory, "test-2")
VIA_JSON_PATH = "../data/test-2/via_project_23Nov2021_2h12m (1).json"
OUTPUT_DIRECTORY = "../output/test-2"

In [237]:
file_name_regions_dict = get_filename_and_regions_from_json(VIA_JSON_PATH)

In [238]:
main(VIA_JSON_PATH, IMAGE_DIRECTORY, OUTPUT_DIRECTORY, file_name_regions_dict)

file user1855_4444.jpg had an uncategorized box
file user2571_1476.jpg could not be found
file user2572_3323.jpg could not be found
file user2573_3958.jpg could not be found
file user2574_4577.jpg could not be found
file user2575_1477.jpg could not be found
file user2576_94.jpg could not be found
file user2577_2098.jpg could not be found
file user2578_3959.jpg could not be found
file user2578_3960.jpg could not be found
file user2579_3324.jpg could not be found
file user2580_2099.jpg could not be found
file user2583_97.jpg could not be found
file user2584_1478.jpg could not be found
file user2586_2721.jpg could not be found
file user2587_2101.jpg could not be found
file user2588_3325.jpg could not be found
file user2591_2725.jpg could not be found
file user2592_3326.jpg could not be found
file user2593_98.jpg could not be found
file user2594_1482.jpg could not be found
file user1848_1922.jpg could not be found
file user1849_4885.jpg could not be found
file user1853_666.jpg could not be