## Data imports

In [1]:
import datetime
import json
import re
import fnmatch
from utils.preprocess import pycococreatortools
from utils.preprocess.tif_process import *
from utils.preprocess.slice_dataset import slice

Specify parameters of dataset

In [6]:
# root path for saving the tif and shp file.
ROOT = r'/home/ari/data/ZU/fixed_20220222/train'
img_path = 'Raster'
shp_path = 'Razmetka2'

# root path for saving the mask.
ROOT_DIR = ROOT + '/dataset_1256'
IMAGE_DIR = os.path.join(ROOT_DIR, "uchastok_2019")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "annotations")

clip_size = 1256

## Configs (do not change)

In [7]:
INFO = {
    "description": "ZemleUchastki",
    "url": "",
    "version": "0.1.1",
    "year": 2021,
    "contributor": "Ibragim, Shamil",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "",
        "url": ""
    }
]

CATEGORIES = [
    {
        'id': 1,
        'name': 'uchastok',
        'supercategory': 'land',
    },
]

## Code (do not change)

In [8]:
def filter_for_jpeg(root, files):
    # file_types = ['*.jpeg', '*.jpg']
    file_types = ['*.tiff', '*.tif']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]

    return files

def filter_for_annotations(root, files, image_filename):
    # file_types = ['*.png']
    file_types = ['*.tif']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
    # file_name_prefix = basename_no_extension + '.*'
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    # files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]
    files = [f for f in files if basename_no_extension == os.path.splitext(os.path.basename(f))[0].split('_', 1)[0]]

    return files

def from_mask_to_coco(root, MARK, IMAGE, ANNOTATION):
    ROOT_DIR = root + '/' + MARK
    IMAGE_DIR = ROOT_DIR + '/' + IMAGE
    ANNOTATION_DIR = ROOT_DIR + '/' + ANNOTATION
    if os.path.exists(ROOT_DIR):
        coco_output = {
            "info": INFO,
            "licenses": LICENSES,
            "categories": CATEGORIES,
            "images": [],
            "annotations": []
        }

        image_id = 1
        segmentation_id = 1

        # filter for jpeg images
        for root, _, files in os.walk(IMAGE_DIR):
            image_files = filter_for_jpeg(root, files)

            # go through each image
            for image_filename in image_files:
                image = Image.open(image_filename)
                image_info = pycococreatortools.create_image_info(
                    image_id, os.path.basename(image_filename), image.size)
                coco_output["images"].append(image_info)

                # filter for associated png annotations
                for root, _, files in os.walk(ANNOTATION_DIR):
                    annotation_files = filter_for_annotations(root, files, image_filename)

                    # go through each associated annotation
                    for annotation_filename in annotation_files:

                        print(annotation_filename)
                        class_id = [x['id'] for x in CATEGORIES if x['name'] in annotation_filename][0]

                        category_info = {'id': class_id, 'is_crowd': 'crowd' in image_filename}
                        binary_mask = np.asarray(Image.open(annotation_filename)
                                                 .convert('1')).astype(np.uint8)

                        annotation_info = pycococreatortools.create_annotation_info(
                            segmentation_id, image_id, category_info, binary_mask,
                            image.size, tolerance=2)

                        if annotation_info is not None:
                            coco_output["annotations"].append(annotation_info)

                        segmentation_id = segmentation_id + 1

                image_id = image_id + 1

        with open('{}/uchastok_{}2021.json'.format(ROOT_DIR, MARK), 'w') as output_json_file:
            json.dump(coco_output, output_json_file)
    else:
        print(ROOT_DIR + ' does not exit!')

## Launching code

In [9]:
def main():
    clip_from_file(clip_size, ROOT, img_path, shp_path, ROOT_DIR)
    slice(ROOT_DIR, train=0.8, eval=0.15, test=0.05)
    from_mask_to_coco(ROOT_DIR, 'train', "uchastok_2021", "annotations")
    from_mask_to_coco(ROOT_DIR, 'eval', "uchastok_2021", "annotations")
    from_mask_to_coco(ROOT_DIR, 'test', "uchastok_2021", "annotations")

if __name__ == "__main__":
    main()

create dir /home/ari/data/ZU/fixed_20220222/train/dataset_1256/annotations
create dir /home/ari/data/ZU/fixed_20220222/train/dataset_1256/uchastok_2021


KeyboardInterrupt: 