In [2]:
from matplotlib import pyplot as plt
import numpy as np
from pprint import pprint

import warnings

import rasterio
from rasterio.windows import Window
import geopandas as gpd
from rasterio.features import rasterize

from collections import defaultdict
import random

from PIL import Image

from pystac import (Catalog, CatalogType, Item, Asset, LabelItem, Collection)
from shapely.geometry import box, Point, Polygon, MultiPolygon

from skimage.util.shape import view_as_windows
from skimage import measure

import cv2

from pycocotools import mask as pycoco_Mask

import secrets
import os

In [3]:
import imageio
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.segmaps import SegmentationMapsOnImage
from imgaug.augmentables.batches import UnnormalizedBatch

In [69]:
!pip install cython
!pip install pycocotools

/bin/sh: 1: sudo: not found
Collecting pycocotools
  Downloading pycocotools-2.0.0.tar.gz (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 23.5 MB/s eta 0:00:01
[?25hBuilding wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25ldone
[?25h  Created wheel for pycocotools: filename=pycocotools-2.0.0-cp36-cp36m-linux_x86_64.whl size=275976 sha256=1dac38bb895ace89ea5fdb420647c78d10eb379184fde31142accb1ef5ef14ce
  Stored in directory: /root/.cache/pip/wheels/64/7a/c0/ac8f633d11a5f1a6902c72acb9fa828a2bb3639afba4e94a6c
Successfully built pycocotools
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0.0


In [4]:
train1_cat = Catalog.from_file('/storage/Open_Cities_AI_Challenge/Data/train_tier_1/catalog.json')

In [5]:
collections = {cols.id:cols for cols in train1_cat.get_children()}
collections

{'acc': <Collection id=acc>,
 'mon': <Collection id=mon>,
 'ptn': <Collection id=ptn>,
 'kam': <Collection id=kam>,
 'dar': <Collection id=dar>,
 'znz': <Collection id=znz>,
 'nia': <Collection id=nia>}

In [6]:
def get_dict_of_collections():
    ids = defaultdict(list)
    ids_labels = defaultdict(list)
    for a in collections:
        for i in collections[a].get_all_items():
            if 'label' not in i.id:
                ids[a].append(i.id)
            else:
                ids_labels[a].append(i.id)
    return ids, ids_labels

In [7]:
ids, ids_labels = get_dict_of_collections()

In [8]:
for a in ids:
    print(a, ids[a])
for a in ids_labels:
    print(a, ids_labels[a])

acc ['665946', 'a42435', 'ca041a', 'd41d81']
mon ['401175', '493701', '207cc7', 'f15272']
ptn ['abe1a3', 'f49f31']
kam ['4e7c7f']
dar ['a017f9', 'b15fce', '353093', 'f883a0', '42f235', '0a4c40']
znz ['33cae6', '3b20d4', '076995', '75cdfa', '9b8638', '06f252', 'c7415c', 'aee7fd', '3f8360', '425403', 'bd5c14', 'e52478', 'bc32f1']
nia ['825a50']
acc ['665946-labels', 'a42435-labels', 'ca041a-labels', 'd41d81-labels']
mon ['401175-labels', '493701-labels', '207cc7-labels', 'f15272-labels']
ptn ['abe1a3-labels', 'f49f31-labels']
kam ['4e7c7f-labels']
dar ['a017f9-labels', 'b15fce-labels', '353093-labels', 'f883a0-labels', '42f235-labels', '0a4c40-labels']
znz ['33cae6-labels', '3b20d4-labels', '076995-labels', '75cdfa-labels', '9b8638-labels', '06f252-labels', 'c7415c-labels', 'aee7fd-labels', '3f8360-labels', '425403-labels', 'bd5c14-labels', 'e52478-labels', 'bc32f1-labels']
nia ['825a50-labels']


In [9]:
def get_list_of_collections_no_labels():
    ids_list = []
    for a in collections:
        for i in collections[a].get_all_items():
            if 'label' not in i.id:
                ids_list.append((a, i.id))
    return ids_list

In [10]:
def get_list_of_collections_full():
    ids_list = []
    for a in collections:
        for i in collections[a].get_all_items():
            ids_list.append((a, i.id))
    return ids_list

In [11]:
ids_list = get_list_of_collections_no_labels()

In [12]:
def build_out_href_ids():
    for a in ids:
        for i in collections[str(a)].get_all_items():
            print(i.id)
            #pprint(i.properties)
            if 'label' in i.id:
                gpd.read_file(i.make_asset_hrefs_absolute().assets['labels'].href)
                pass
            else: 
                #print('raster metadata:')
                rasterio.open(i.make_asset_hrefs_absolute().assets['image'].href).meta

In [13]:
build_out_href_ids()

665946
665946-labels
a42435
a42435-labels
ca041a
ca041a-labels
d41d81
d41d81-labels
401175
401175-labels
493701
493701-labels
207cc7
207cc7-labels
f15272
f15272-labels
abe1a3
abe1a3-labels
f49f31
f49f31-labels
4e7c7f
4e7c7f-labels
a017f9
a017f9-labels
b15fce
b15fce-labels
353093
353093-labels
f883a0
f883a0-labels
42f235
42f235-labels
0a4c40
0a4c40-labels
33cae6
33cae6-labels
3b20d4
3b20d4-labels
076995
076995-labels
75cdfa
75cdfa-labels
9b8638
9b8638-labels
06f252
06f252-labels
c7415c
c7415c-labels
aee7fd
aee7fd-labels
3f8360
3f8360-labels
425403
425403-labels
bd5c14
bd5c14-labels
e52478
e52478-labels
bc32f1
bc32f1-labels
825a50
825a50-labels


In [14]:
def get_rasters_labels_and_image():
    raster_and_label_and_images_list = defaultdict(list)
    for a in ids:
        for b in ids[a]:
            image = collections[str(a)].get_item(id=str(b))
            labels = collections[str(a)].get_item(id=str(b)+'-labels')
            #labels_gdf = gpd.read_file(labels.assets['labels'].href)
            raster = rasterio.open(image.assets['image'].href)
            
            #print(raster.res)
            raster_and_label_and_images_list[a].append([b, raster, labels, image])
            
    return raster_and_label_and_images_list

In [15]:
def center(minx, miny, maxx, maxy):
    center_x = (maxx+minx)/2
    center_y = (maxy+miny)/2
    return(Point(center_x, center_y))

def set_random_center_within_poly(x_max, y_max, s, image_poly, raster, y_labels_gdf):
    is_center = False
    mask_threshold_met = False
    mask_ratio_met = False
    while not is_center:
        while not mask_threshold_met:
            x_sample, y_sample = random.randrange(0, x_max), random.randrange(0, y_max)
            test_window = Window(x_sample, y_sample, s, s)
            test_box = box(*rasterio.windows.bounds(test_window, raster.meta['transform']))
            test_box_gdf = gpd.GeoDataFrame(geometry=[test_box], crs=raster.meta['crs'])
            with warnings.catch_warnings():
            # ignore all caught warnings
                warnings.filterwarnings("ignore")
            # execute code that will generate warnings
                test_box_gdf = test_box_gdf.to_crs({'init':'epsg:4326'})
            test_chip = gpd.sjoin(y_labels_gdf, test_box_gdf, how='inner', op='intersects')
            test_chip_shapes = [(geom, 255) for geom in test_chip.geometry]
            if len(test_chip_shapes) > 3:
                mask_threshold_met = True
            else:
                break
            minx, miny, maxx, maxy = [test_box_gdf.bounds[a][0] for a in test_box_gdf.bounds]
            center_point = center(minx, miny, maxx, maxy)
            is_center = center_point.within(image_poly)
            if mask_threshold_met and not is_center:
                mask_threshold_met = False
    return(x_sample, y_sample)

In [16]:
def aug_seq():
    seq = iaa.Sequential([
        iaa.Fliplr(.5), # horizontal flips
        iaa.Flipud(.5), # vertical flips
        # Apply affine transformations to each image.
        # Scale/zoom them, translate/move them, rotate them and shear them.
        iaa.Affine(
        #scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        #translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            rotate=(-35, 35),
            shear=(-10, 10)),

        
    ], random_order=True) # apply augmenters in random order
    return seq

In [17]:
def window_creation(chips_x, chips_y, ISZ):
    dim_Max = chips_x.shape[0]
    windows = dim_Max // ISZ
    x_windows = view_as_windows(chips_x, (ISZ,ISZ,3), ISZ)[:windows,:windows,0,:,:]
    y_windows = view_as_windows(chips_y, (ISZ,ISZ), ISZ)[:windows,:windows,:,:]
    return x_windows, y_windows, windows

def chip_verification(x_windows, y_windows, dim_Max, ISZ, min_label_coverage, max_label_coverage):
    images_aug = aug_seq()
    
    x_chips_verified = []
    y_chips_verified = []
    for x in range(dim_Max):
        for y in range(dim_Max):
            label_coverage = np.count_nonzero(y_windows[x][y])/(ISZ*ISZ)
            if label_coverage >= min_label_coverage and label_coverage <= max_label_coverage:
                
                x_aug, y_aug = images_aug(image = x_windows[x][y], segmentation_maps = 
                                          np.expand_dims(np.expand_dims(
                                              np.array(y_windows[x][y]), axis=3), axis = 0))
                
                #print(x_aug.shape, y_aug.shape)
                
                x_chips_verified.append(x_aug)
                x_chips_verified.append(x_windows[x][y])
                y_chips_verified.append(np.squeeze(y_aug))
                y_chips_verified.append(y_windows[x][y])
    
                #x_chips_verified.append(x_windows[x][y])
                #y_chips_verified.append(y_windows[x][y])
    
    return np.array(x_chips_verified), np.expand_dims(np.array(y_chips_verified), axis = 3)

In [18]:
def mask_to_polygons(mask, epsilon=.01, min_area=1.):
    # __author__ = Konstantin Lopuhin
    # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly

    # first, find contours with cv2: it's much faster than shapely
    # image, (remove from original) 
    contours, hierarchy = cv2.findContours(
        ((mask == 1) * 255).astype(np.uint8),
        cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS)
    # create approximate contours to have reasonable submission size
    approx_contours = [cv2.approxPolyDP(cnt, epsilon, True)
                       for cnt in contours]
    if not contours:
        return MultiPolygon()
    # now messy stuff to associate parent and child contours
    cnt_children = defaultdict(list)
    child_contours = set()
    assert hierarchy.shape[0] == 1
    # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
    for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
        if parent_idx != -1:
            child_contours.add(idx)
            cnt_children[parent_idx].append(approx_contours[idx])
    # create actual polygons filtering by area (removes artifacts)
    all_polygons = []
    for idx, cnt in enumerate(approx_contours):
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            assert cnt.shape[1] == 1
            poly = Polygon(
                shell=cnt[:, 0, :],
                holes=[c[:, 0, :] for c in cnt_children.get(idx, [])
                       if cv2.contourArea(c) >= min_area])
            all_polygons.append(poly)
    # approximating polygons might have created invalid ones, fix them
    all_polygons = MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(0)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])
    return all_polygons

def group_of_polygons(polygon_group, im_size):
    polygon_mask_group = []
    polygons = [polygon_group.geoms[a] for a in range(len(polygon_group.geoms))]
    #polygons = list(polygons)
    for _ in polygons:
        img_mask = np.zeros([im_size, im_size], np.uint8)
        if not polygons:
            return img_mask
        int_coords = lambda x: np.array(x).round().astype(np.int32)
        exterior = int_coords(_.exterior.coords)
        interior = [int_coords(pi.coords) for pi in _.interiors]
        if len(interior) == 0:
            cv2.fillConvexPoly(img_mask, exterior, 1)
            cv2.fillPoly(img_mask, interior, 0)
        else:
            cv2.fillConvexPoly(img_mask, exterior, 1)
            cv2.fillPoly(img_mask, interior, 0)
        polygon_mask_group.append(img_mask)
    return polygon_mask_group

In [19]:
def binary_mask_to_polygon(binary_mask, tolerance=0):
    """Converts a binary mask to COCO polygon representation
    Args:
        binary_mask: a 2D binary numpy array where '1's represent the object
        tolerance: Maximum distance from original points of polygon to approximated
            polygonal chain. If tolerance is 0, the original coordinate array is returned.
    """
    polygons = []
    # pad mask to close contours of shapes which start and end at an edge
    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
    contours = measure.find_contours(padded_binary_mask, 0.5)
    contours = np.subtract(contours, 1)
    for contour in contours:
        contour = close_contour(contour)
        contour = measure.approximate_polygon(contour, tolerance)
        if len(contour) < 3:
            continue
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        # after padding and subtracting 1 we may get -0.5 points in our segmentation 
        segmentation = [0.5 if i <= 0 else i for i in segmentation]
        #print('segmentation:', segmentation)
        polygons.append([segmentation]) # x,y pairs in sequence, no sub-grouping, [] for each polygon mask 

    return polygons

def close_contour(contour):
    if not np.array_equal(contour[0], contour[-1]):
        contour = np.vstack((contour, contour[0]))
    return contour

def resize_binary_mask(array, new_size):
    image = Image.fromarray(array.astype(np.uint8)*255)
    image = image.resize(new_size)
    return np.asarray(image).astype(np.bool_)

In [24]:
folder_dir = '/storage/Open_Cities_AI_Challenge/Data/train_tier_1/Mask_RCNN Dataset/'

In [20]:
def create_meta_image(S = 4096, test = True, ISZ = 512):#, coco_output_train = {}, coco_output_val = {}):
    
    #coco_output_train = coco_output_train
    #coco_output_val = coco_output_val
    
    s = S

    raster_labels_and_images_list = get_rasters_labels_and_image()
    
    #x_out = np.zeros((meta_x * s, meta_x * s, 3), dtype='uint8')
    #y_out = np.zeros((meta_x * s, meta_x * s), dtype='uint8')
 
    added_round_2 = [
                     ('znz', 'aee7fd')]

    
    
    #JSON under 100mb in size
    already_added = [('znz', 'bd5c14'),('nia', '825a50'),('znz', '06f252'),('znz', '076995'),('znz', '33cae6'),
                     ('kam', '4e7c7f'),('znz', 'bc32f1'),('znz', 'c7415c'),('znz', '75cdfa'),
                     ('mon', 'f15272'),('acc', 'd41d81'),('acc', 'ca041a'),('znz', '3f8360'),('mon', '493701'),
                     ('dar', 'b15fce'),('acc', 'a42435'),('dar', '42f235')]
    
    
    no_initial_results = [('znz', 'aee7fd')]
    

    broken_window_size = [('znz', '9b8638')] # This is the broken window size dataset
    
    
    # This is a collection of very few buildings, or has sub-sets which are very poorly labeled
    ids_maybe_round_two = [('znz', 'e52478'), ('dar', '353093'),('znz', '425403'),('znz', '3b20d4')]
    ids_need_manual_selection = [('dar', '0a4c40')]
    ids_high_zoom = [('acc', '665946')] # Not included
    ids_low_zoom = [('ptn', 'abe1a3'),('ptn', 'f49f31')] # Also not included
    
    
    
    
    
    image_id = 0
    segmentation_id = 0
    
    INFO = {
    "description": "Building Dataset - DrivenData Open Cities AI Challenge:\
    Segmenting Buildings for Disaster Resilience",
    "url": "",
    "version": "0.2.0",
    "year": 2020,
    "contributor": "risuo",
    }

    LICENSES = [
        {
            "id": 1,
            "name": "Attribution-NonCommercial-ShareAlike License",
            "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
        }
    ]

    CATEGORIES = [
        {
            'id': 1,
            'name': 'buiding',
            'supercategory': 'buildings',
        },
    ]

    coco_output_val = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }
    
    coco_output_trn = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }

    for a in added_round_2:
        print(a)
        for img_indx, item in enumerate(raster_labels_and_images_list[str(a[0])]):
            if item[0] == a[1]:

                raster   = raster_labels_and_images_list[str(a[0])][img_indx][1]
                y_labels = raster_labels_and_images_list[str(a[0])][img_indx][2]
                image    = raster_labels_and_images_list[str(a[0])][img_indx][3]
                y_labels_gdf = gpd.read_file(y_labels.assets['labels'].href)
                x_max = raster.meta['width']
                y_max = raster.meta['height']
                print('max dimensions:', x_max, y_max)
                x_min = 0
                y_min = 0
                strides_x = (x_max // s) + 1
                strides_y = (y_max // s) + 1
                print('strides:', strides_x, strides_y)

                
                for i in range(strides_x):
                    for j in range(strides_y):
                        #print('stride:', i, j)
                        x_sample, y_sample = (x_min + (s * i)), (y_min + (s * j))
                        test_window = Window(x_sample, y_sample, s, s)
                        test_box = box(*rasterio.windows.bounds(test_window, raster.meta['transform']))
                        test_box_gdf = gpd.GeoDataFrame(geometry=[test_box], crs=raster.meta['crs'])
                        with warnings.catch_warnings():
                            warnings.filterwarnings("ignore")
                            test_box_gdf = test_box_gdf.to_crs({'init':'epsg:4326'})
                        test_chip = gpd.sjoin(y_labels_gdf, test_box_gdf, how='inner', op='intersects')
                        test_chip_shapes = [(geom, 255) for geom in test_chip.geometry]

                        
                        if len(test_chip_shapes) == 0:
                            break
                        else:
                            x_offset, y_offset = x_sample, y_sample
                            window = Window(x_offset, y_offset, s, s)
                            win_box = box(*rasterio.windows.bounds(window, raster.meta['transform']))
                            win_box_gdf = gpd.GeoDataFrame(geometry=[win_box], crs=raster.meta['crs'])
                            

                            with warnings.catch_warnings():
                                # ignore all caught warnings
                                warnings.filterwarnings("ignore")
                                # execute code that will generate warnings
                                win_box_gdf = win_box_gdf.to_crs({'init':'epsg:4326'})


                            win_arr = raster.read(window=window)
                            win_arr = np.moveaxis(win_arr, 0, 2)

                            x_out = win_arr[:s, :s, 0:3]

                            gdf_chip = gpd.sjoin(y_labels_gdf, win_box_gdf, how='inner', op='intersects')
                            burn_val = 255
                            shapes = [(geom, burn_val) for geom in gdf_chip.geometry]
                            chip_tfm = rasterio.transform.from_bounds(*win_box_gdf.bounds.values[0], s, s)

                            labels_array_stacked = rasterize(shapes, (s, s), transform=chip_tfm, dtype='uint8')
                                          
                            y_out = labels_array_stacked[:s, :s]
                            
                            x_windows, y_windows, dim_Max = window_creation(x_out, y_out, ISZ)
                            del x_out, y_out
                            

                            x_chips_verified, y_chips_verified = chip_verification(x_windows, y_windows, dim_Max, ISZ, 
                                                                                   min_label_coverage = .10,
                                                                                   max_label_coverage = .90)
                            
                            print(x_chips_verified.shape, y_chips_verified.shape)
                            
                            #for _ in range(5):
                            #    fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5))
                            #    ax1.imshow(x_chips_verified[_])
                            #    ax2.imshow(x_chips_verified[_])
                            #    ax2.imshow(np.squeeze(y_chips_verified[_]), alpha=0.5)
                            #    plt.show()
                            #    plt.close()
                            #return
                            del x_windows, y_windows
                            
                            if len(x_chips_verified) < 1:
                                break

                            print('final shape x_chips_verified, y_chips_verified:', x_chips_verified.shape, y_chips_verified.shape)
                            print(len(x_chips_verified))
                                
                            coco_output_trn, coco_output_val, image_id, segmentation_id = convert_dataset_to_files(x_chips_verified, y_chips_verified, 
                                                                                                                   coco_output_trn, coco_output_val, 
                                                                                                                   image_id, segmentation_id, test, ISZ)
                                #print(image_id)
                                #print(coco_output["annotations"])
                            with open('/storage/coco json files/instances_shape_train_' + str(a[0]) + '_' + str(a[1]) + '.json', 'w') as output_json_file_1:
                                json.dump(coco_output_trn, output_json_file_1)
                            with open('/storage/coco json files/instances_shape_val_' + str(a[0]) + '_' + str(a[1]) + '.json', 'w') as output_json_file_2:
                                json.dump(coco_output_val, output_json_file_2)


In [21]:
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np


In [22]:
def create_image_info(image_id, file_name, image_size):

    image_info = {
            "id": image_id,
            "file_name": file_name,
            "width": image_size[0],
            "height": image_size[1]
    }

    return image_info

In [23]:
def train_val_split(x_chips_verified, y_chips_verified):
    threshold = int((len(x_chips_verified) * .85) // 1)
    x_trn, y_trn = x_chips_verified[:threshold], y_chips_verified[:threshold]
    x_val, y_val = x_chips_verified[threshold:], y_chips_verified[threshold:]
    return x_trn, y_trn, x_val, y_val

In [24]:
def convert_dataset_to_files(x_chips_verified, y_chips_verified, coco_output_trn, coco_output_val, image_id, segmentation_id, test, ISZ):
    image_id = image_id
    segmentation_id = segmentation_id
    coco_output_trn = coco_output_trn
    coco_output_val = coco_output_val
    ISZ = ISZ
    print('Converting:', len(x_chips_verified), ':files.')
    
    x_trn, y_trn, x_val, y_val = train_val_split(x_chips_verified, y_chips_verified)
    
    for _ in range(len(x_trn)):
        x_file = x_trn[_]
        y_file = y_trn[_]
        name = secrets.token_urlsafe(32)

        x_file = Image.fromarray(x_file)
        # Formatted for pre-set Google Drive Folder Structure
        image_loc_id = '/content/drive/My Drive/Detectron2 Datasets/OCAI Building Segmentation/' + 'train' + '/' + name + '.png'
        x_file.save('/storage/output_train_files/' + name + '.png')

        
        image_info = create_image_info(image_id, image_loc_id, (ISZ, ISZ))
        coco_output_trn["images"].append(image_info)

        #mask_coordinates = binary_mask_to_polygon(np.squeeze(y_file), tolerance=0)

        msk = y_file.clip(max=1)
        msk = mask_to_polygons(msk)
        msk = group_of_polygons(msk, 512)

        for mask in msk:

            mask_coordinates = binary_mask_to_polygon(np.squeeze(mask), tolerance=0)
            segmentation_id = segmentation_id
            class_id = 1
            is_crowd = 0
            binary_mask = resize_binary_mask(mask, (ISZ, ISZ))
            segmentation = mask_coordinates[0]
            binary_mask_encoded = pycoco_Mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
            area = pycoco_Mask.area(binary_mask_encoded)
            bounding_box = pycoco_Mask.toBbox(binary_mask_encoded)

            annotation_info = {
                "id": segmentation_id,
                "image_id": image_id,
                "category_id": class_id,
                "iscrowd": is_crowd,
                "area": area.tolist(),
                "bbox": bounding_box.tolist(),
                "segmentation": segmentation,
                "width": binary_mask.shape[1],
                "height": binary_mask.shape[0],
            }

            coco_output_trn["annotations"].append(annotation_info)

            segmentation_id += 1
        image_id += 1
    
    for _ in range(len(x_val)):
        x_file = x_val[_]
        y_file = y_val[_]
        name = secrets.token_urlsafe(32)


        x_file = Image.fromarray(x_file)
        # Formatted for pre-set Google Drive Folder Structure
        image_loc_id = '/content/drive/My Drive/Detectron2 Datasets/OCAI Building Segmentation/' + 'val' + '/' + name + '.png'
        x_file.save('/storage/output_val_files/' + name + '.png')

        image_info = create_image_info(image_id, image_loc_id, (ISZ, ISZ))
        coco_output_val["images"].append(image_info)

        msk = y_file.clip(max=1)
        msk = mask_to_polygons(msk)
        msk = group_of_polygons(msk, 512)


        for mask in msk:
            
            mask_coordinates = binary_mask_to_polygon(np.squeeze(mask), tolerance=0)
            segmentation_id = segmentation_id
            class_id = 1
            is_crowd = 0
            binary_mask = resize_binary_mask(mask, (ISZ, ISZ))
            segmentation = mask_coordinates[0]
            binary_mask_encoded = pycoco_Mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
            area = pycoco_Mask.area(binary_mask_encoded)
            bounding_box = pycoco_Mask.toBbox(binary_mask_encoded)

            annotation_info = {
                "id": segmentation_id,
                "image_id": image_id,
                "category_id": class_id,
                "iscrowd": is_crowd,
                "area": area.tolist(),
                "bbox": bounding_box.tolist(),
                "segmentation": segmentation,
                "width": binary_mask.shape[1],
                "height": binary_mask.shape[0],
            }

            coco_output_val["annotations"].append(annotation_info)

            segmentation_id += 1
        image_id += 1
    
    
    
    return coco_output_trn, coco_output_val, image_id, segmentation_id

In [30]:
import json

with open('/storage/coco json files/instances_shape_train.json') as f:
    coco_output_trn = json.load(f)

with open('/storage/coco json files/instances_shape_val.json') as g:
    coco_output_val = json.load(g)


In [25]:
%whos

Variable                            Type           Data/Info
------------------------------------------------------------
Asset                               type           <class 'pystac.item.Asset'>
Catalog                             ABCMeta        <class 'pystac.catalog.Catalog'>
CatalogType                         type           <class 'pystac.catalog.CatalogType'>
Collection                          ABCMeta        <class 'pystac.collection.Collection'>
Image                               module         <module 'PIL.Image' from <...>t-packages/PIL/Image.py'>
Item                                ABCMeta        <class 'pystac.item.Item'>
LabelItem                           ABCMeta        <class 'pystac.label.LabelItem'>
MultiPolygon                        type           <class 'shapely.geometry.<...>ltipolygon.MultiPolygon'>
Point                               type           <class 'shapely.geometry.point.Point'>
Polygon                             type           <class 'shapely.geom

In [26]:
create_meta_image(S = 4096, test = True, ISZ = 512)

('mon', 'f15272')
max dimensions: 20418 17953
strides: 5 5




(34, 512, 512, 3) (34, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (34, 512, 512, 3) (34, 512, 512, 1)
34
Converting: 34 :files.




(72, 512, 512, 3) (72, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (72, 512, 512, 3) (72, 512, 512, 1)
72
Converting: 72 :files.




(78, 512, 512, 3) (78, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (78, 512, 512, 3) (78, 512, 512, 1)
78
Converting: 78 :files.




(50, 512, 512, 3) (50, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (50, 512, 512, 3) (50, 512, 512, 1)
50
Converting: 50 :files.




(60, 512, 512, 3) (60, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (60, 512, 512, 3) (60, 512, 512, 1)
60
Converting: 60 :files.




(104, 512, 512, 3) (104, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (104, 512, 512, 3) (104, 512, 512, 1)
104
Converting: 104 :files.




(114, 512, 512, 3) (114, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (114, 512, 512, 3) (114, 512, 512, 1)
114
Converting: 114 :files.




(128, 512, 512, 3) (128, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (128, 512, 512, 3) (128, 512, 512, 1)
128
Converting: 128 :files.




(2, 512, 512, 3) (2, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (2, 512, 512, 3) (2, 512, 512, 1)
2
Converting: 2 :files.




(0,) (0, 1)
('acc', 'd41d81')
max dimensions: 40868 42719
strides: 10 11




(26, 512, 512, 3) (26, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (26, 512, 512, 3) (26, 512, 512, 1)
26
Converting: 26 :files.




(118, 512, 512, 3) (118, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (118, 512, 512, 3) (118, 512, 512, 1)
118
Converting: 118 :files.




(128, 512, 512, 3) (128, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (128, 512, 512, 3) (128, 512, 512, 1)
128
Converting: 128 :files.




(128, 512, 512, 3) (128, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (128, 512, 512, 3) (128, 512, 512, 1)
128
Converting: 128 :files.




(128, 512, 512, 3) (128, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (128, 512, 512, 3) (128, 512, 512, 1)
128
Converting: 128 :files.




(128, 512, 512, 3) (128, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (128, 512, 512, 3) (128, 512, 512, 1)
128
Converting: 128 :files.




(80, 512, 512, 3) (80, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (80, 512, 512, 3) (80, 512, 512, 1)
80
Converting: 80 :files.




(6, 512, 512, 3) (6, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (6, 512, 512, 3) (6, 512, 512, 1)
6
Converting: 6 :files.




(100, 512, 512, 3) (100, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (100, 512, 512, 3) (100, 512, 512, 1)
100
Converting: 100 :files.




(128, 512, 512, 3) (128, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (128, 512, 512, 3) (128, 512, 512, 1)
128
Converting: 128 :files.




(128, 512, 512, 3) (128, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (128, 512, 512, 3) (128, 512, 512, 1)
128
Converting: 128 :files.




(126, 512, 512, 3) (126, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (126, 512, 512, 3) (126, 512, 512, 1)
126
Converting: 126 :files.




(124, 512, 512, 3) (124, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (124, 512, 512, 3) (124, 512, 512, 1)
124
Converting: 124 :files.




(118, 512, 512, 3) (118, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (118, 512, 512, 3) (118, 512, 512, 1)
118
Converting: 118 :files.




(20, 512, 512, 3) (20, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (20, 512, 512, 3) (20, 512, 512, 1)
20
Converting: 20 :files.




(6, 512, 512, 3) (6, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (6, 512, 512, 3) (6, 512, 512, 1)
6
Converting: 6 :files.




(96, 512, 512, 3) (96, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (96, 512, 512, 3) (96, 512, 512, 1)
96
Converting: 96 :files.




(78, 512, 512, 3) (78, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (78, 512, 512, 3) (78, 512, 512, 1)
78
Converting: 78 :files.




(26, 512, 512, 3) (26, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (26, 512, 512, 3) (26, 512, 512, 1)
26
Converting: 26 :files.




(2, 512, 512, 3) (2, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (2, 512, 512, 3) (2, 512, 512, 1)
2
Converting: 2 :files.




(46, 512, 512, 3) (46, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (46, 512, 512, 3) (46, 512, 512, 1)
46
Converting: 46 :files.




(22, 512, 512, 3) (22, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (22, 512, 512, 3) (22, 512, 512, 1)
22
Converting: 22 :files.
('acc', 'ca041a')
max dimensions: 65882 77778
strides: 17 19




(38, 512, 512, 3) (38, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (38, 512, 512, 3) (38, 512, 512, 1)
38
Converting: 38 :files.




(40, 512, 512, 3) (40, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (40, 512, 512, 3) (40, 512, 512, 1)
40
Converting: 40 :files.




(10, 512, 512, 3) (10, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (10, 512, 512, 3) (10, 512, 512, 1)
10
Converting: 10 :files.




(0,) (0, 1)




(16, 512, 512, 3) (16, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (16, 512, 512, 3) (16, 512, 512, 1)
16
Converting: 16 :files.




(76, 512, 512, 3) (76, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (76, 512, 512, 3) (76, 512, 512, 1)
76
Converting: 76 :files.




(84, 512, 512, 3) (84, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (84, 512, 512, 3) (84, 512, 512, 1)
84
Converting: 84 :files.




(84, 512, 512, 3) (84, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (84, 512, 512, 3) (84, 512, 512, 1)
84
Converting: 84 :files.




(76, 512, 512, 3) (76, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (76, 512, 512, 3) (76, 512, 512, 1)
76
Converting: 76 :files.




(102, 512, 512, 3) (102, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (102, 512, 512, 3) (102, 512, 512, 1)
102
Converting: 102 :files.




(80, 512, 512, 3) (80, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (80, 512, 512, 3) (80, 512, 512, 1)
80
Converting: 80 :files.




(18, 512, 512, 3) (18, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (18, 512, 512, 3) (18, 512, 512, 1)
18
Converting: 18 :files.
('znz', 'aee7fd')
max dimensions: 40551 40592
strides: 10 10
('znz', '3f8360')
max dimensions: 33220 38104
strides: 9 10




(2, 512, 512, 3) (2, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (2, 512, 512, 3) (2, 512, 512, 1)
2
Converting: 2 :files.
('mon', '493701')
max dimensions: 22333 21783
strides: 6 6




(8, 512, 512, 3) (8, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (8, 512, 512, 3) (8, 512, 512, 1)
8
Converting: 8 :files.




(114, 512, 512, 3) (114, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (114, 512, 512, 3) (114, 512, 512, 1)
114
Converting: 114 :files.




(124, 512, 512, 3) (124, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (124, 512, 512, 3) (124, 512, 512, 1)
124
Converting: 124 :files.




(98, 512, 512, 3) (98, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (98, 512, 512, 3) (98, 512, 512, 1)
98
Converting: 98 :files.




(48, 512, 512, 3) (48, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (48, 512, 512, 3) (48, 512, 512, 1)
48
Converting: 48 :files.




(0,) (0, 1)




(78, 512, 512, 3) (78, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (78, 512, 512, 3) (78, 512, 512, 1)
78
Converting: 78 :files.




(126, 512, 512, 3) (126, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (126, 512, 512, 3) (126, 512, 512, 1)
126
Converting: 126 :files.




(104, 512, 512, 3) (104, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (104, 512, 512, 3) (104, 512, 512, 1)
104
Converting: 104 :files.




(106, 512, 512, 3) (106, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (106, 512, 512, 3) (106, 512, 512, 1)
106
Converting: 106 :files.




(54, 512, 512, 3) (54, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (54, 512, 512, 3) (54, 512, 512, 1)
54
Converting: 54 :files.




(8, 512, 512, 3) (8, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (8, 512, 512, 3) (8, 512, 512, 1)
8
Converting: 8 :files.




(28, 512, 512, 3) (28, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (28, 512, 512, 3) (28, 512, 512, 1)
28
Converting: 28 :files.




(106, 512, 512, 3) (106, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (106, 512, 512, 3) (106, 512, 512, 1)
106
Converting: 106 :files.




(68, 512, 512, 3) (68, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (68, 512, 512, 3) (68, 512, 512, 1)
68
Converting: 68 :files.




(50, 512, 512, 3) (50, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (50, 512, 512, 3) (50, 512, 512, 1)
50
Converting: 50 :files.




(12, 512, 512, 3) (12, 512, 512, 1)
final shape x_chips_verified, y_chips_verified: (12, 512, 512, 3) (12, 512, 512, 1)
12
Converting: 12 :files.


In [107]:
px = [1, 2, 3, 4, 5]
py = [6, 7, 8, 9, 10]


poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
print(poly)

poly = [p for x in poly for p in x]

poly

[(1.5, 6.5), (2.5, 7.5), (3.5, 8.5), (4.5, 9.5), (5.5, 10.5)]


[1.5, 6.5, 2.5, 7.5, 3.5, 8.5, 4.5, 9.5, 5.5, 10.5]

In [None]:
for a in range(15):
    print('DONE DONE DONE DONE DONE')

In [None]:
[('znz', 'bd5c14'), ('nia', '825a50'), ('kam', '4e7c7f'), ('znz', 'bc32f1'), ('znz', 'c7415c'), ('znz', '75cdfa'), ('mon', '493701'), ('znz', '9b8638'), ('acc', 'd41d81'), ('acc', 'ca041a'), ('znz', 'aee7fd'), ('dar', '42f235'), ('acc', 'a42435'), ('znz', '3f8360'), ('znz', '06f252'), ('znz', '076995'), ('mon', 'f15272'), ('znz', '33cae6'), ('dar', 'b15fce'), ('acc', '665946'), ('ptn', 'abe1a3'), ('ptn', 'f49f31'), ('dar', '353093'), ('znz', '425403'), ('znz', '3b20d4')]
('znz', 'bd5c14')
max dimensions: 38249 38276
strides: 8
('nia', '825a50')
max dimensions: 8927 14848
strides: 2
('kam', '4e7c7f')
max dimensions: 39270 40024
strides: 8
('znz', 'bc32f1')
max dimensions: 40180 40155
strides: 9
('znz', 'c7415c')
max dimensions: 33449 46458
strides: 7
('znz', '75cdfa')
max dimensions: 49709 49743
strides: 10
('mon', '493701')
max dimensions: 22333 21783
strides: 5
('znz', '9b8638')
max dimensions: 36100 45163
strides: 8
('acc', 'd41d81')
max dimensions: 40868 42719
strides: 9
('acc', 'ca041a')
max dimensions: 65882 77778
strides: 14
('znz', 'aee7fd')
max dimensions: 40551 40592
strides: 9
('dar', '42f235')
max dimensions: 51203 47750
strides: 11
('acc', 'a42435')
max dimensions: 57540 39162
strides: 12
('znz', '3f8360')
max dimensions: 33220 38104
strides: 7
('znz', '06f252')
max dimensions: 50692 50642
strides: 11
('znz', '076995')
max dimensions: 39020 46763
strides: 8
('mon', 'f15272')
max dimensions: 20418 17953
strides: 5
('znz', '33cae6')
max dimensions: 37113 34306
strides: 8
('dar', 'b15fce')
max dimensions: 44870 42558
strides: 9
('acc', '665946')
max dimensions: 84466 150147
strides: 17
('ptn', 'abe1a3')
max dimensions: 7840 7787
strides: 2
('ptn', 'f49f31')
max dimensions: 6605 4185
strides: 2
('dar', '353093')
max dimensions: 52969 59329
strides: 11
('znz', '425403')
max dimensions: 39850 39878
strides: 8
('znz', '3b20d4')
max dimensions: 44162 44134
strides: 9