# PREPROCESSING

The purpose of this Notebook is to split all 1km x 1km regions stored as .tif-files into smaller tiles that are suitable as network inputs. Tiles will be saved as .jpg-images in a new folder along with point-labels and shape-labels as .npy-files.

The final structure of the output folder will look like this:

Folder name: **512x512** (width x hight of tiles) 
- **images**: contains individual tiles as .jpg-images with name {region name} _ {tile number}.jpg 
- **points_trees / points_trees**: point-labels as .npy-files, matched via filename
- **shapes_trees / shapes_buildings**: shape-labels as .npy-files, matched via filename
- **image_sets_trees / image_sets_buildings**: contains a .txt-file for each split (train, val, test) with the filenames of the assigned tiles
   

In [1]:
import os
import math
import fiona
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.notebook import tqdm
import rasterio as rio
from shapely.geometry import Point, Polygon

## Get all Images and Label-Files from Directory

In [2]:
DATA_PATH = "/home/jovyan/work/satellite_data/jannis/Denmark SDFE"
SAVE_PATH = "/home/jovyan/work/processed"

IMAGES_DIR = "normal_orto"
IMAGETYPE = ".tif"

regions = []
PATH = os.path.join(DATA_PATH, IMAGES_DIR)
for _, _, files in os.walk(PATH):
    for file in files:
        if file.endswith(IMAGETYPE):
            regions.append(file)
            
DATA_TREES_POINTS = "/home/jovyan/work/DENMARK/85blocks_trees.shp" #"trees_vector/85blocks_trees.shp"
DATA_TREES_SHAPES = "/home/jovyan/work/DENMARK/85blocks_trees_shapes.shp"
DATA_BUILDINGS = "buildings_vector/bygninger_2017_uden_kolonihavn.shp"
labels = [os.path.join(DATA_PATH, DATA_BUILDINGS)]
            
print(f"Found {len(regions)} Regions and {len(labels)} Label-Files")

Found 85 Regions and 1 Label-Files


## Set Tile Size and Overlap

To ensure equal tile sizes, overap is computed dynamically based on the amount of vertical and horizontal tiles. When executing this cell, make sure that a number of tiles is selected, that ensures sufficient overlap between the images.

In [3]:
tiles_h = 40
tiles_v = 40
width = 256
height = 256

example_src = rio.open(os.path.join(PATH, regions[0]))
ncols, nrows = example_src.meta['width'], example_src.meta['height']
h_overlap = ((tiles_h * width) - ncols) / (tiles_h - 1)
v_overlap = ((tiles_v * height) - nrows) / (tiles_v - 1)

print(f"Generating {tiles_h * tiles_v} tiles per region with: \n - tile size: {width} x {height} px \n - region size: {ncols} x {nrows} px \n - vertical overlap: {v_overlap} px \n - horizontal overlap: {h_overlap} px")

Generating 1600 tiles per region with: 
 - tile size: 256 x 256 px 
 - region size: 8000 x 8000 px 
 - vertical overlap: 57.43589743589744 px 
 - horizontal overlap: 57.43589743589744 px


## Load Shapes and Points

Fiona loads shapefile data and groups points and polygons into two separate dataframes. Dataframes are used due to their built-in efficient selection and function-application methods.

In [4]:
point_labels = []
shape_labels = []
for file in labels:
    with fiona.open(file) as shapefile:
        for feature in tqdm(shapefile):
            if feature['geometry']['type'] == "Point":
                point = feature["geometry"]['coordinates'][:2]
                x = point[0]
                y = point[1]
                point_labels.append([Point(point), x, y])
            elif feature['geometry']['type'] == "Polygon":
                shape = feature["geometry"]['coordinates'][0]
                poly = Polygon(shape)
                shape_labels.append([poly])
        
point_labels = pd.DataFrame(data=point_labels, columns=["Point", "X", "Y"])
shape_labels = pd.DataFrame(data=shape_labels, columns=["Shape"])
print(f"Found {len(point_labels)} Point Labels and {len(shape_labels)} Shape Labels")

  0%|          | 0/118842 [00:00<?, ?it/s]

Found 0 Point Labels and 118842 Shape Labels


## Collection Generator FOR TREES

The outer loop iterates over the different regions, the inner loop constructs the tiles. Tiles are saved as a dictionary to a list. Checkpoints are saved every 10 regions so that the list can be cleared - this checkpoint interval can be tuned according to the available memory.

Both shapes and points dataframe need to be filled for this cell to work correctly.

In [None]:
collection = []
counter = 0
checkpoint = 10

for region in tqdm(regions):
    src = rio.open(os.path.join(PATH, region))
    name_clean = region.replace(".tif","")
    
    # region as window and shapely polygon
    ncols, nrows = src.meta['width'], src.meta['height']
    bounds = list(src.bounds)
    big_window = rio.windows.Window(col_off = 0, row_off = 0, width = ncols, height = nrows)
    big_poly = Polygon([(bounds[0], bounds[1]), (bounds[2], bounds[1]), (bounds[2], bounds[3]), (bounds[0], bounds[3])])
    
    # filter X and Y
    region_points = point_labels[point_labels['Point'].apply(lambda p: p.within(big_poly))].copy()
    region_shapes = shape_labels[shape_labels['Shape'].apply(lambda s: s.intersects(big_poly))].copy()
    
    # cut shapes to region bounds
    region_shapes['Shape'] = region_shapes['Shape'].apply(lambda s: s.intersection(big_poly))
    region_shapes['Shape'] = region_shapes['Shape'].apply(lambda s: s if s.geom_type != 'MultiPolygon' else list(s))
    region_shapes = region_shapes.explode('Shape')
    region_shapes['Type'] = region_shapes['Shape'].apply(lambda s: s.geom_type)
    region_shapes = region_shapes[region_shapes['Type'] == 'Polygon']
    region_shapes['ShapeX'] = region_shapes['Shape'].apply(lambda s: s.exterior.coords.xy[0])
    region_shapes['ShapeY'] = region_shapes['Shape'].apply(lambda s: s.exterior.coords.xy[1])
    
    # translate to pixels
    region_points['X'] = region_points['X'].apply(lambda x: (x - bounds[0])*8)
    region_points['Y'] = region_points['Y'].apply(lambda y: (1000 - (y - bounds[1]))*8)
    region_points['Point'] = list(zip(region_points.X, region_points.Y))
    region_shapes['ShapeX'] = region_shapes['ShapeX'].apply(lambda lx: [(x - bounds[0])*8 for x in lx])
    region_shapes['ShapeY'] = region_shapes['ShapeY'].apply(lambda ly: [(1000 - (y - bounds[1]))*8 for y in ly])
    region_shapes['Shape'] = list(zip(region_shapes.ShapeX, region_shapes.ShapeY))
    region_shapes['Shape'] = region_shapes['Shape'].apply(lambda t: list(zip(t[0], t[1])))
    
    # traverse tiles column bv column, row by row
    for row in tqdm(range(tiles_v)):
        row_off = int(row * (height - v_overlap))
        for col in range(tiles_v):
            col_off = int(col * (width - h_overlap))
            # define tile bounds
            tile_window = rio.windows.Window(col_off = col_off, row_off = row_off, width = width, height = height).intersection(big_window)
            tile_poly = Polygon([(col_off, row_off), (col_off+width, row_off), (col_off+width, row_off+height), (col_off, row_off+height)])
            # read image
            src_image = src.read(window = tile_window)[:3]
            image = np.stack((src_image[0], src_image[1], src_image[2]), axis = 2)
            # get points and shapes in tile 
            tile_points = region_points[region_points['Point'].apply(lambda p: Point(p).within(tile_poly))].copy()
            tile_shapes = region_shapes[region_shapes['Shape'].apply(lambda s: Polygon(s).intersects(tile_poly))].copy()
            #cut to tile bounds
            tile_shapes['Shape'] = tile_shapes['Shape'].apply(lambda s: Polygon(s).intersection(tile_poly))
            tile_shapes['Shape'] = tile_shapes['Shape'].apply(lambda s: s if s.geom_type != 'MultiPolygon' else list(s))
            tile_shapes = tile_shapes.explode('Shape')
            tile_shapes['Type'] = tile_shapes['Shape'].apply(lambda s: s.geom_type)
            tile_shapes = tile_shapes[tile_shapes['Type'] == 'Polygon']
            tile_shapes['ShapeX'] = tile_shapes['Shape'].apply(lambda s: s.exterior.coords.xy[0])
            tile_shapes['ShapeY'] = tile_shapes['Shape'].apply(lambda s: s.exterior.coords.xy[1])
            # translate to new dimensions
            tile_points['X'] = tile_points['X'].apply(lambda x: round(x - col_off))
            tile_points['Y'] = tile_points['Y'].apply(lambda y: round(y - row_off))
            tile_shapes['ShapeX'] = tile_shapes['ShapeX'].apply(lambda lx: [round(x - col_off) for x in lx])
            tile_shapes['ShapeY'] = tile_shapes['ShapeY'].apply(lambda ly: [round(y - row_off) for y in ly])
            # put shape coordinates together
            tile_shapes['pShape'] = list(zip(tile_shapes.ShapeX, tile_shapes.ShapeY))
            tile_shapes['pShape'] = tile_shapes['pShape'].apply(lambda t: list(zip(t[0], t[1])))
            # to Numpy
            np_points = tile_points[['X', 'Y']].to_numpy()
            np_shapes = tile_shapes['pShape'].to_numpy()
            # add to collection
            collection.append({"file": name_clean, "tile": str(col + (row * tiles_v) + 1), "image": image, "points": np_points, "npoints": len(np_points), "shapes": np_shapes, "nshapes": len(np_shapes)})
            
    if counter != 0 and counter % checkpoint == 0 :
        print(f"Saving checkpoint at {counter}")
        np.save(os.path.join(SAVE_PATH, "checkpoint_"+str(counter)), collection)
        collection = []
        
    counter += 1

np.save(os.path.join(SAVE_PATH, "checkpoint_"+str(counter)), collection)
print(f"Generated Collection...")

## Collection Generator FOR BUILDINGS

This cell differs from the previous one in that point-labels are not loaded from the dataframe above but inferred as reference points from the shapes. Output is the same.

In [5]:
collection = []
counter = 0
checkpoint = 10

for region in tqdm(regions):
    src = rio.open(os.path.join(PATH, region))
    name_clean = region.replace(".tif","")
    
    # region as window and shapely polygon
    ncols, nrows = src.meta['width'], src.meta['height']
    bounds = list(src.bounds)
    big_window = rio.windows.Window(col_off = 0, row_off = 0, width = ncols, height = nrows)
    big_poly = Polygon([(bounds[0], bounds[1]), (bounds[2], bounds[1]), (bounds[2], bounds[3]), (bounds[0], bounds[3])])
    
    # filter X and Y
    region_shapes = shape_labels[shape_labels['Shape'].apply(lambda s: s.intersects(big_poly))].copy()
    
    # cut shapes to region bounds
    region_shapes['Shape'] = region_shapes['Shape'].apply(lambda s: s.intersection(big_poly))
    region_shapes['Shape'] = region_shapes['Shape'].apply(lambda s: s if s.geom_type != 'MultiPolygon' else list(s))
    region_shapes = region_shapes.explode('Shape')
    region_shapes['Type'] = region_shapes['Shape'].apply(lambda s: s.geom_type)
    region_shapes = region_shapes[region_shapes['Type'] == 'Polygon']
    region_shapes['ShapeX'] = region_shapes['Shape'].apply(lambda s: s.exterior.coords.xy[0])
    region_shapes['ShapeY'] = region_shapes['Shape'].apply(lambda s: s.exterior.coords.xy[1])
    
    # translate to pixels
    region_shapes['ShapeX'] = region_shapes['ShapeX'].apply(lambda lx: [(x - bounds[0])*8 for x in lx])
    region_shapes['ShapeY'] = region_shapes['ShapeY'].apply(lambda ly: [(1000 - (y - bounds[1]))*8 for y in ly])
    region_shapes['Shape'] = list(zip(region_shapes.ShapeX, region_shapes.ShapeY))
    region_shapes['Shape'] = region_shapes['Shape'].apply(lambda t: list(zip(t[0], t[1])))
    
    # traverse tiles column bv column, row by row
    for row in tqdm(range(tiles_v)):
        row_off = int(row * (height - v_overlap))
        for col in range(tiles_v):
            col_off = int(col * (width - h_overlap))
            # define tile bounds
            tile_window = rio.windows.Window(col_off = col_off, row_off = row_off, width = width, height = height).intersection(big_window)
            tile_poly = Polygon([(col_off, row_off), (col_off+width, row_off), (col_off+width, row_off+height), (col_off, row_off+height)])
            # read image
            src_image = src.read(window = tile_window)[:3]
            image = np.stack((src_image[0], src_image[1], src_image[2]), axis = 2)
            # get points and shapes in tile 
            tile_shapes = region_shapes[region_shapes['Shape'].apply(lambda s: Polygon(s).intersects(tile_poly))].copy()
            #cut to tile bounds
            tile_shapes['Shape'] = tile_shapes['Shape'].apply(lambda s: Polygon(s).intersection(tile_poly))
            tile_shapes['Shape'] = tile_shapes['Shape'].apply(lambda s: s if s.geom_type != 'MultiPolygon' else list(s))
            tile_shapes = tile_shapes.explode('Shape')
            tile_shapes['Type'] = tile_shapes['Shape'].apply(lambda s: s.geom_type)
            tile_shapes = tile_shapes[tile_shapes['Type'] == 'Polygon']
            tile_shapes['ShapeX'] = tile_shapes['Shape'].apply(lambda s: s.exterior.coords.xy[0])
            tile_shapes['ShapeY'] = tile_shapes['Shape'].apply(lambda s: s.exterior.coords.xy[1])
            # translate to new dimensions
            tile_shapes['ShapeX'] = tile_shapes['ShapeX'].apply(lambda lx: [round(x - col_off) for x in lx])
            tile_shapes['ShapeY'] = tile_shapes['ShapeY'].apply(lambda ly: [round(y - row_off) for y in ly])
            # put shape coordinates together
            tile_shapes['pShape'] = list(zip(tile_shapes.ShapeX, tile_shapes.ShapeY))
            tile_shapes['pShape'] = tile_shapes['pShape'].apply(lambda t: list(zip(t[0], t[1])))
            #get representative point
            tile_shapes['Point'] = tile_shapes['pShape'].apply(lambda t: Polygon(t).representative_point())
            tile_shapes['PointX'] = tile_shapes['Point'].apply(lambda p: round(list(p.coords)[0][0]))
            tile_shapes['PointY'] = tile_shapes['Point'].apply(lambda p: round(list(p.coords)[0][1]))
            # to Numpy
            np_points = tile_shapes[['PointX', 'PointY']].to_numpy()
            np_shapes = tile_shapes['pShape'].to_numpy()
            # add to collection
            collection.append({"file": name_clean, "tile": str(col + (row * tiles_v) + 1), "image": image, "points": np_points, "npoints": len(np_points), "shapes": np_shapes, "nshapes": len(np_shapes)})
            
    if counter != 0 and counter % checkpoint == 0 :
        print(f"Saving checkpoint at {counter}")
        np.save(os.path.join(SAVE_PATH, "checkpoint_"+str(counter)), collection)
        collection = []
        
    counter += 1

np.save(os.path.join(SAVE_PATH, "checkpoint_"+str(counter)), collection)
print(f"Generated Collection...")

  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Saving checkpoint at 10


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Saving checkpoint at 20


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Saving checkpoint at 30


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Saving checkpoint at 40


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Saving checkpoint at 50


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Saving checkpoint at 60


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Saving checkpoint at 70


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Saving checkpoint at 80


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Generated Collection...


## Write To Data Directory

First cell creates the necessary directories, remaining cells need to be executed for each checkpoint file.

Remember to choose the right write mode for the text files.

In [6]:
NEW_PATH = os.path.join(SAVE_PATH, f"{width}x{height}")
os.mkdir(NEW_PATH)
IMAGE_PATH = os.path.join(NEW_PATH, "images")
os.mkdir(IMAGE_PATH)
LABEL_PATH = os.path.join(NEW_PATH, "points_buildings")
os.mkdir(LABEL_PATH)
SHAPE_PATH = os.path.join(NEW_PATH, "shapes_buildings")
os.mkdir(SHAPE_PATH)
SETS_PATH = os.path.join(NEW_PATH, "image_sets_buildings")
os.mkdir(SETS_PATH)

In [47]:
file = "checkpoint_85.npy"
collection = np.load(os.path.join(SAVE_PATH, file), allow_pickle=True)
collection = pd.DataFrame(collection)
collection = collection[0].apply(pd.Series)

Images...

In [48]:
TARGET_TYPE = ".jpg"

for index, item in tqdm(collection.iterrows()):
    data = item['image']
    name = item['file'] + "_" + item['tile'] + TARGET_TYPE
    #np.save(os.path.join(IMAGE_PATH, name), data) #change TARGET_TYPE
    img = Image.fromarray(data, 'RGB')
    img.save(os.path.join(IMAGE_PATH, name))

0it [00:00, ?it/s]

Point-Labels...

In [49]:
items_with_label = collection[collection.npoints > 0]

for index, item in tqdm(items_with_label.iterrows()):
    data = item['points']
    name = item['file'] + "_" + item['tile'] + "_points.npy"
    np.save(os.path.join(LABEL_PATH, name), data)

0it [00:00, ?it/s]

Polygon-Shapes...

In [50]:
items_with_shape = collection[collection.nshapes > 0]

for index, item in tqdm(items_with_shape.iterrows()):
    data = item['shapes']
    name = item['file'] + "_" + item['tile'] + "_shapes.npy"
    np.save(os.path.join(SHAPE_PATH, name), data)

0it [00:00, ?it/s]

Create text-files containing the names of all tiles, those containing points and those containing shapes

In [51]:
mode = "a" #"w" for write
all_file = open(os.path.join(SETS_PATH, "all.txt"), mode)
points_file = open(os.path.join(SETS_PATH, "points.txt"), mode) 
shapes_file = open(os.path.join(SETS_PATH, "shapes.txt"), mode) 

for index, item in collection.iterrows():
    name = item['file'] + "_" + item['tile'] + "\n"
    all_file.write(name)
    if item.nshapes > 0:
        shapes_file.write(name)
    if item.npoints > 0:
        points_file.write(name) 

all_file.close() 
points_file.close()     
shapes_file.close()

# IMAGE NORMALIZATION

These cells may be used to a large amount of images in order to determine means and standard deviations for image normalization during the training phase.

In [None]:
import torch
import numpy as np
from torch.utils.data import DataLoader
import datasets
from helpers import io

In [None]:
DATA_PATH = "/home/jovyan/work/DENMARK/256x256"
images_path = os.paths.join(DATA_PATH, "image_sets", "all.txt")
images = [name.replace("\n","") for name in io.readText(images_path)]

dataset = datasets.getDataset(name = "denmark_points", 
                              path = DATA_PATH,
                              images = images,
                              n_classes = 2,
                              transform = None)

sampler = torch.utils.data.RandomSampler(dataset)
loader = DataLoader(dataset, sampler = sampler, batch_size = len(dataset), drop_last = True, num_workers = 1)

dataiter = iter(loader)
batch = dataiter.next()

print(np.mean(batch['images'].numpy(), axis = (0, 2, 3)), "\n", np.std(batch['images'].numpy(), axis = (0, 2, 3)))
#print(np.mean(batch['images'].numpy()), "\n", np.std(batch['images'].numpy())) sanity check without axis

# CONVOLUTIONAL ORIENTED BOUNDARIES

Generates COB-Images for Dataset

In [None]:
import os
import torch
import numpy as np
from tqdm.notebook import tqdm
from skimage.io import imread
from torchvision.utils import save_image

from models.cobnet import COBNet
from helpers.cob.dataset import COBtransform

## Enter Settings and Search Files

In [None]:
PATH = "/home/jovyan/work/processed/1024x1024"
IMAGES_PATH = os.path.join(PATH, "images")
SAVE_PATH = os.path.join(PATH, "cob")
os.mkdir(SAVE_PATH)

TYPE = ".jpg"
IMAGE_WIDTH = 1024
STATE_DICT = "/home/jovyan/work/runs/X_COBNET/cp_or.pth.tar"

In [None]:
images = []

for _, _, files in os.walk(IMAGES_PATH):
    for file in files:
        if file.endswith(TYPE):
            images.append(file)

n_images = len(images)
print(f"Found {n_images} images")

## Normalize and Run Model

In [None]:
means = [0.492, 0.475, 0.430]
stds = [0.176, 0.173, 0.176]
transform = COBtransform(means, stds, IMAGE_WIDTH)

model = COBNet()
model.load_state_dict(torch.load(STATE_DICT))

for i in tqdm(range(n_images)):
    image = imread(os.path.join(IMAGES_PATH, images[i]))
    image = transform(images = image[np.newaxis, ...])[0]
    image = np.stack((image[:,:,0], image[:,:,1], image[:,:,2]), axis = 0)
    img_tensor = torch.tensor(image[np.newaxis, ...]).float()
    
    model.eval()
    with torch.no_grad():
        cob = model(img_tensor)
        
    data = cob['y_fine'].sigmoid()
    path = os.path.join(SAVE_PATH, images[i])
    save_image(data, path)
    
print("Generated all images")

# EXPLORATION REGION

In [None]:
os.mkdir(SAVE_PATH) # comment out if existing
cob_file = open(os.path.join(PATH, "image_sets", "cob.txt"), "w")

for i in range(len(cob_collection)):
    data = cob_collection[i]['y_fine'].sigmoid()
    path = os.path.join(SAVE_PATH, images[i])
    cob_file.write(name) 
    save_image(data, path)
    
cob_file.close()

In [None]:
# How many 250x250 tiles actually have a label?
have = len(collection[collection.npoints > 0])
total = len(collection)
print(f"From a total of {total} tiles, {have} have a label assigned ({have/total*100} %)")