In [4]:
import numpy as np
import os
from PIL import Image

import json
import geopandas

import warnings
warnings.simplefilter("ignore", UserWarning)

# Preparation

In [5]:
max_x_tile = 217
max_y_tile = 162 

x_tiles = ["{:04d}".format(number) for number in range(max_x_tile)]
y_tiles = ["{:04d}".format(number) for number in range(max_y_tile)]
# Format should always be "XXXXX"
all_tiles = ["{:05d}".format(number) for number in range(max_x_tile*max_y_tile)]

#print(os.getcwd())

# Rename the image files

In [6]:
print("Start renaming the images")

base_src_path = "split_images"
base_target_path = "Images"

for i in range(max_x_tile):
    for j in range(max_y_tile):
        
        # e.g. 'split_images/00000/00000/00000_00000.tif'
        file_name_tif = x_tiles[i] + "_" + y_tiles[j] + ".tif"
        src_path = os.path.join(base_src_path, x_tiles[i], y_tiles[j], file_name_tif)
        #print(base_src_path + x_tiles[i] + y_tiles[j] + file_name_tif)
         
        #e.g 'Images/00000.tif'
        target_file_name = all_tiles[max_y_tile*i + j] + ".tif"
        target_path = os.path.join(base_target_path, target_file_name)
        #print(base_target_path, target_file_name)
        
        try:
            image = Image.open(src_path)
            image.save(target_path)
        
        except:
            print("No image " + file_name_tif)
            
print("Done")

Start renaming the images
Done


# Creating train val and test

In [8]:
print("Creating train, val and test")


base_src_path = "Images"
base_target_path_train = "dataset/images/train"
base_target_path_val = "dataset/images/val"
base_target_path_test = "dataset/images/test"

train_percentage = 0.70
val_percentage = 0.15
# test will be 0.15 for now

pic_amount = max_x_tile*max_y_tile
train_amount = int(pic_amount*train_percentage)
val_amount = int(pic_amount*val_percentage)
test_amount = int(pic_amount - (train_amount + val_amount))
print(pic_amount, train_amount, val_amount, test_amount, train_amount + val_amount + test_amount)
    
# Shuffel the numbers randomly
tiles_int = np.asarray([number for number in range(pic_amount)]) 
np.random.shuffle(tiles_int)
# Formating from int to matching string
tiles_str = ["{:05d}".format(number) for number in tiles_int] # "maybe 4 has to be changed to 5 or 6"

for i in range(train_amount):
    file_name = tiles_str[i] + ".tif"
    src_path = os.path.join(base_src_path, file_name) 
    target_path = os.path.join(base_target_path_train,  file_name)
        
    image = Image.open(src_path)
    image.save(target_path)
    
for j in range(val_amount):
    current = j +  train_amount   # Starting there where train stopped
        
    file_name = tiles_str[current] + ".tif"
    src_path = os.path.join(base_src_path, file_name) 
    target_path = os.path.join(base_target_path_val,  file_name)
        
    image = Image.open(src_path)
    image.save(target_path)
    
for k in range(test_amount):
    current = k +  (train_amount + val_amount)   # Starting there where test stopped
        
    file_name = tiles_str[current] + ".tif"
    src_path = os.path.join(base_src_path, file_name) 
    target_path = os.path.join(base_target_path_test,  file_name)
        
    image = Image.open(src_path)
    image.save(target_path)

print("Done")

Creating train, val and test
35154 24607 5273 5274 35154
Done


# Converting shp to geoson

In [10]:
base_src_path = "split_features"
base_target_path = "GeoJSON"

print("Start converting shp's into geojson's")

for i in range(max_x_tile*max_y_tile):
    
        # Get source shp e.g. 'split_features/00001/00001.shp'
        folder_name_shp = all_tiles[i] + ".shp" 
        src_path = os.path.join(base_src_path, all_tiles[i], folder_name_shp)
        #print(folder_name_shp)
        
        # Define target location e.g.'GeoJSON/00001.geojson'
        folder_name_geo = all_tiles[i] + ".geojson"
        target_path = os.path.join(base_target_path, folder_name_geo)
        
        try:
            shp_file = geopandas.read_file(src_path)
            shp_file.to_file(target_path)    
        except:
            print("No " + src_path)
            
print("Done")

Start converting shp's into geojson's
No split_features/00000/00000.shp
No split_features/00001/00001.shp
No split_features/00002/00002.shp
No split_features/00003/00003.shp
No split_features/00004/00004.shp
No split_features/00005/00005.shp
No split_features/00006/00006.shp
No split_features/00007/00007.shp
No split_features/00008/00008.shp
No split_features/00009/00009.shp
No split_features/00010/00010.shp
No split_features/00011/00011.shp
No split_features/00012/00012.shp
No split_features/00013/00013.shp
No split_features/00014/00014.shp
No split_features/00015/00015.shp
No split_features/00016/00016.shp
No split_features/00017/00017.shp
No split_features/00018/00018.shp
No split_features/00019/00019.shp
No split_features/00020/00020.shp
No split_features/00021/00021.shp
No split_features/00022/00022.shp
No split_features/00023/00023.shp
No split_features/00024/00024.shp
No split_features/00025/00025.shp
No split_features/00026/00026.shp
No split_features/00027/00027.shp
No split_f