In [32]:
import json
import os
import geojson
from PIL import Image
import shutil

# Preparation

In [2]:
max_x_tile = 217 
max_y_tile = 162 

# Format should always be "XXXX"
all_tiles = ["{:05d}".format(number) for number in range(max_x_tile*max_y_tile)]

global_min_x = 314084.5 #- 20
global_min_y = 5585151.5 + 35

image_width = 224
image_height = 224

#print(os.getcwd())
#print(os.listdir('40_40/GeoJSON'))

# Functions

In [3]:
def find_site_types(sitetype: str) -> int:
    '''
    Given a json, find the int_sitetype_limes (now int_sitety) and output a classifer as either 0, 1, or 2
    0: Bombentrichter
    1: Grabhügel
    2: Meilerpodium
    Input: str
    Output: int
    '''
    if sitetype == 'Bombentrichter':
        return 0
    elif sitetype == 'Grabhügel':
        return 1
    elif sitetype == 'Meilerpodium':
        return 2
    else:
        raise Exception("Did not find Bombentrichter, Grabhügel, or Meilerpodium")

In [4]:
def gen_x_y_points(feature: list):
    '''
    Takes a feature and returns a list of the x and y coordinates aswell as all points
    Input: list
    Output: list, list, and list
    '''
    points = feature['geometry']['coordinates'][0] 
    x = [p[0] for p in points]
    y = [p[1] for p in points]
    return x, y, points

In [5]:
def calc_width(x: list) -> float:
    '''
    Given a list of all x values, identifies the width of the feature
    Input: list
    Output: float
    '''
    width = max(x) - min(x)
    #print(width)
    norm_width = width/image_width
    
    return norm_width

In [6]:
def calc_height(y: list) -> float:
    '''
    Given a list of all y values, identifies the height of the feature
    Input: list
    Output: float
    '''
    height = max(y) - min(y)
    norm_height = height/image_height
    
    return norm_height

In [7]:
def find_parameters(feature: list) -> list:
    '''
    Takes a single feature, extracts the necessary information, and returns a list of paramters.
    The parameters are in the specified order:
    [class, x_center, y_center, width, height]
    Input: list
    Output: list
    '''
    parameters = []
    
    x, y, points = gen_x_y_points(feature)
    x_cen, y_cen = ( sum(x) / len(points), sum(y) / len(points))
    
    norm_x, norm_y = normalize_parameters(x_cen, y_cen)
    
    parameters.append(find_site_types(feature['properties']['int_sitety']))
    parameters.append(norm_x)
    parameters.append(norm_y)
    parameters.append(calc_width(x))
    parameters.append(calc_height(y))
    return parameters

In [8]:
def normalize_parameters(x: float, y: float):
    '''
    Takes x and y value and normalizes them
    Origin (0,0) is the upper left corner
    Input: float
    Output: float
    '''
    #global_min_x = 314084.5
    #global_min_y = 5585151.5 + 30

    #image_width = 1211
    #image_height = 907
    
    # 1 in the grid is equal to 1 pixel
    x = x - global_min_x
    y = global_min_y - y
    
    x = x%image_width
    y = y%image_height
    
    #y = 907 - y
    
    norm_x = x/image_width
    norm_y = y/image_height
    
    #print(x, y, norm_x, norm_y)
    
    return norm_x, norm_y
    
    
#normalize_parameters(319390.35907340754, 5578030.177083888) #192

# Get Parameters

In [31]:
print("Start extracting the parameters from the feature files")

base_src_path = "GeoJSON"
base_target_path = "Labels"

no_labels = 0
contains_labels = 0

for i in range(max_x_tile*max_y_tile):
    
    file_name_geo = all_tiles[i] + ".geojson"
    src_path = os.path.join(base_src_path, file_name_geo)
    
    # Define target location e.g.'40_40/GeoJSON/0001.txt'
    file_name_txt = all_tiles[i] + ".txt"
    target_path = os.path.join(base_target_path, file_name_txt)
    
    # Load all GeoJSON files that exist
    try:
        f = open(src_path)
        json_file = geojson.load(f)
        print(file_name_geo + "File found")
    
    except:
        print(file_name_geo + " does not exist")
    
    all_my_parameters = []

    # Get all needed parameters
    try:
        for j in range(len(json_file["features"])):
            all_my_parameters.append(find_parameters(json_file['features'][j]))   # Add for each feature the parameters that YOLOv5 needs
    
    except:
        print("Nothing detected in this image: " + file_name_geo)    
    
    
    # Test to see what features are in one image
    for j in range(len(all_my_parameters)):
        print(all_my_parameters[j])
    
    # Save each label in one line
    with open(target_path, 'w') as target_file:
        target_file.write('\n'.join(([' '.join(map(str,j)) for j in all_my_parameters])))
    
print("Done")
#print(no_labels, contains_labels)

Start extracting the parameters from the feature files
00000.geojson does not exist
00001.geojson does not exist
00002.geojson does not exist
00003.geojson does not exist
00004.geojson does not exist
00005.geojson does not exist
00006.geojson does not exist
00007.geojson does not exist
00008.geojson does not exist
00009.geojson does not exist
00010.geojson does not exist
00011.geojson does not exist
00012.geojson does not exist
00013.geojson does not exist
00014.geojson does not exist
00015.geojson does not exist
00016.geojson does not exist
00017.geojson does not exist
00018.geojson does not exist
00019.geojson does not exist
00020.geojson does not exist
00021.geojson does not exist
00022.geojson does not exist
00023.geojson does not exist
00024.geojson does not exist
00025.geojson does not exist
00026.geojson does not exist
00027.geojson does not exist
00028.geojson does not exist
00029.geojson does not exist
00030.geojson does not exist
00031.geojson does not exist
00032.geojson doe

# Create two image folders "Labeled_images" and "No_labeles_images"

In [42]:
print("Start dividing images into the Labeled_images and No_label_images folder")

label_base_src_path = "GeoJSON"
label_base_target_path = "Labels"

image_base_src_path = "Images"
image_labeled_base_target_path = "Labeled_images"
image_no_label_base_target_path = "No_label_images"

contains_labels = 0
no_labels = 0
file_does_not_exist = 0

for i in range(max_x_tile*max_y_tile):
    
    
    file_name_geo = all_tiles[i] + ".geojson"
    src_path = os.path.join(label_base_src_path, file_name_geo)
    
    file_name_txt = all_tiles[i] + ".txt"
    target_path = os.path.join(label_base_target_path, file_name_txt)
    
    # Define src image e.g. 'Images/00000.tif
    file_name_tif = all_tiles[i] + ".tif"
    image_src_path = os.path.join(image_base_src_path, file_name_tif)
    image_labeled_target_path = os.path.join(image_labeled_base_target_path, file_name_tif)
    image_no_label_target_path = os.path.join(image_no_label_base_target_path, file_name_tif)
    
    
    
    # Load all GeoJSON files that exist
    try:
        f = open(src_path)
        json_file = geojson.load(f)
        #print(file_name_geo + "File found")
    
    except:
        file_does_not_exist += 1
    
    all_my_parameters = []

    # Get all needed parameters
    try:
        for j in range(len(json_file["features"])):
            all_my_parameters.append(find_parameters(json_file['features'][j]))   # Add for each feature the parameters that YOLOv5 needs
    
    except:
        print("Nothing detected in this image: " + file_name_geo)    
              
    
    # Save the images with and without labels in diffrent folders
    try:
        if all_my_parameters[0]:
            print(i, " has label")
            contains_labels += 1
            image = Image.open(image_src_path)
            image.save(image_labeled_target_path)
        
    except:
        #print(i)
        no_labels += 1
        image = Image.open(image_src_path)
        image.save(image_no_label_target_path)
    
    
print("Done")
print("Images with no labels: ", no_labels,", Images with labels: ", contains_labels)

Start dividing images into the Labeled_images and No_label_images folder
315  has label
316  has label
320  has label
321  has label
478  has label
482  has label
483  has label
641  has label
643  has label
644  has label
645  has label
806  has label
968  has label
969  has label
970  has label
1129  has label
1130  has label
1131  has label
1132  has label
1292  has label
1293  has label
1295  has label
1296  has label
1297  has label
1298  has label
1299  has label
1300  has label
1301  has label
1302  has label
1453  has label
1454  has label
1455  has label
1456  has label
1615  has label
1616  has label
1617  has label
2228  has label
2231  has label
2389  has label
2391  has label
2550  has label
2551  has label
2552  has label
2553  has label
2712  has label
2714  has label
2869  has label
2874  has label
2875  has label
2876  has label
2878  has label
2879  has label
3031  has label
3032  has label
3037  has label
3038  has label
3039  has label
3040  has label
3042  has labe