The following preprocessing steps will be done:
1. Convert GeoTiffs to PNG images
2. Read in the Label Information
3. For Pre-images: create target mask with 0 for background and 1 for building
4. For Post-images: create target mask with 0 for background, 1 for no damage, 2 for minor damage, 3 for major damage, 4 for destroyed



In [2]:
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
from shapely.wkt import loads
import json
import os
import pandas as pd
import rasterio as rio
from rasterio.features import rasterize
import geopandas as gpd
import numpy as np
from shapely import wkt
from shapely import Polygon
import numpy as np


from Utils.preprocessing import extract_features, load_label_data, process_label_metadata, process_features, make_label_dictionary, geotiff_converter, create_disaster_targets

In [None]:
# HPC Terrabyte
# adapt the user to your needs
USER = "di97ren"
# keep the following unchanged
ROOT = Path("/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022")
USER_PATH = ROOT / f"users/{USER}"
DATA_PATH = ROOT / "data"

DATASET_ROOT = DATA_PATH / folder_name

DATA_FOLDER = DATASET_ROOT / "tier1" # this has to be changed in respect to the folder (tier1, tier3, hold, test)

IMAGE_FOLDER = DATA_FOLDER / "images/"

LABEL_FOLDER = DATA_FOLDER / "labels/"

TARGET_FOLDER = DATA_FOLDER / "targets/"

PNG_FOLDER = DATA_FOLDER / "png_images/"
# Path Configuration to the xview2 Subset

def path_configuration(user_name: str,
    folder_name: str # possible values: [tier1, tier3, hold, test]
    ):

                       # keep the following unchanged
    ROOT = Path("/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022")
    USER_PATH = ROOT / f"users/{USER}"
    DATA_PATH = ROOT / "data"

    DATASET_ROOT = DATA_PATH / folder_name

    DATA_FOLDER = DATASET_ROOT / "tier1" # this has to be changed in respect to the folder (tier1, tier3, hold, test)

    IMAGE_FOLDER = DATA_FOLDER / "images/"

    LABEL_FOLDER = DATA_FOLDER / "labels/"

    TARGET_FOLDER = DATA_FOLDER / "targets/"

    PNG_FOLDER = DATA_FOLDER / "png_images/"








In [16]:
print(IMAGE_FOLDER)

/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022/data/xview2/tier1/images


In [19]:

labels = os.listdir(LABEL_FOLDER)


label_paths = []


for l in labels:
    label_paths.append(os.path.join(LABEL_FOLDER / l))


label_data = []

for label in label_paths:
    with open(label, "r") as file:
        label_data.append(pd.read_json(file))



UnicodeDecodeError: 'utf-8' codec can't decode byte 0xef in position 20: invalid continuation byte

In [4]:
def extract_features(features):
    """Extract polygons, feature types, and damage classes from features."""
    return {
        'geometries': [feature['wkt'] for feature in features],
        'class_name': [feature['properties'].get('feature_type', 'unknown') for feature in features],
        'damage_class': [feature['properties'].get('subtype', 'no-damage') for feature in features]
    }

def load_label_data(label_paths):
    """Load label data from the specified paths."""
    label_data = []
    for label in label_paths:
        with open(label, "r") as file:
            label_data.append(pd.read_json(file))
    return label_data

def process_label_metadata(label):
     """Process metadata from the label."""
     metadata = label['metadata']
     return {
        'img_name': metadata['img_name'][:-4],  # Remove file extension
        'disaster': metadata['disaster'],
        'disaster_type': metadata['disaster_type']
    }

def process_features(label, damage_codes):
     """Process features from the label and apply damage codes."""
     
     feature_data = label['features']['xy']
     feature_dict = extract_features(feature_data)
    
     df = pd.DataFrame(feature_dict)
    
    # Add metadata columns to the dataframe
     metadata = process_label_metadata(label)
     for key, value in metadata.items():
        df[key] = value

    # Apply damage codes
     df['damage_code'] = df['damage_class'].apply(lambda x: damage_codes.get(x, 999))

    # Convert damage codes to integers
     df['damage_code'] = df['damage_code'].astype(int)

     return df

def make_label_dictionary(input_directory, damage_codes):
    """Create a dictionary of labels with associated metadata and damage codes."""
    label_paths = [os.path.join(input_directory, f) for f in os.listdir(input_directory)]
    label_data = load_label_data(label_paths)
    
    label_dictionary = {}
    
    for label in label_data:
        img_name = label['metadata']['img_name'][:-4]  # Remove file extension
        label_df = process_features(label, damage_codes)
        
        # Add the processed dataframe to the dictionary
        label_dictionary[img_name] = label_df

    return label_dictionary

The damage map below will be used to link numerical values to the post-disaster label

In [5]:
damage_codes = {
    'no-damage' : 1,
    'minor-damage' : 2,
    'major-damage' : 3,
    'destroyed' : 4,
    'un-classified' : 5
}


In [6]:
label_dictionary = make_label_dictionary(LABEL_FOLDER, damage_codes)

In [7]:
label_dictionary['hurricane-matthew_00000044_pre_disaster']

Unnamed: 0,geometries,class_name,damage_class,img_name,disaster,disaster_type,damage_code
0,"POLYGON ((77.59030516034854 0, 85.668946993448...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1
1,"POLYGON ((193.5972809756205 0, 199.71625600776...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1
2,"POLYGON ((248.9587337209918 0, 253.10010278041...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1
3,"POLYGON ((265.7093697033929 0, 278.09344922398...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1
4,"POLYGON ((320.1406641054013 64.68679692648156,...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1
...,...,...,...,...,...,...,...
268,"POLYGON ((163.5865766441175 312.7090464655843,...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1
269,"POLYGON ((927.0221567239959 2.697160824916165,...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1
270,"POLYGON ((29.93195910739135 309.3868047489998,...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1
271,"POLYGON ((192.6590626088126 346.1349797782409,...",building,no-damage,hurricane-matthew_00000044_pre_disaster,hurricane-matthew,wind,1


In [8]:
def geotiff_converter(image_directoy: dir , output_directory: dir):

    '''
    This function takes the input geotiff images and converts them to png images 
    '''

    images = os.listdir(image_directoy) # get all image names 

        
    # Check if the directory exists
    if not os.path.exists(output_directory):
        # Create the directory if it doesn't exist
        os.makedirs(output_directory)
        print(f"Directory '{output_directory}' created.")
    else:
        print(f"Directory '{output_directory}' already exists.")


    for i in images: # iterate over each image and open it with rasterio

        png_name = i[:-4] + ".png"

        with rio.open( image_directoy / i) as src:
            r , g , b = src.read(1), src.read(2), src.read(3)

            img = np.stack([r, g, b], axis = -1) # Stack the bands to create and np image array 

            # normalize image values:
            if img.dtype != np.uint8:
                img = ((img - img.min()) / (img.max() - img.min()) * 255).astype(np.uint8)

            png_image = Image.fromarray(img) # make it an image

            png_image.save( output_directory / png_name) # save the image


In [9]:
# Apply geotiff_converter to all input images:
geotiff_converter(IMAGE_FOLDER, PNG_FOLDER)

Directory '/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022/data/xview2-subset/png_images' already exists.


In [10]:
def create_disaster_targets (png_image_directory: dir,
                             label_dictionary: dict, 
                             target_output_directory: dir):
    
    
    # Check if the directory exists
    if not os.path.exists(target_output_directory):
        # Create the directory if it doesn't exist
        os.makedirs(target_output_directory)
        print(f"Directory '{target_output_directory}' created.")
    else:
        print(f"Directory '{target_output_directory}' already exists.")

    
    pngs = os.listdir(png_image_directory)


    for image_name in pngs:

        if "pre_disaster" in image_name:
            label = label_dictionary[image_name[:-4]]['geometries'] # retrieving geometries from the label
            gdf = gpd.GeoDataFrame(geometry=label.apply(wkt.loads)) # creating a geodataframe


            image = Image.open(png_image_directory / image_name) # open the corresponding image

            width,height = image.size # getting width and height information

            # Erstelle eine leere Maske (0 = Hintergrund, 1 = Gebäude/Label)
            mask = np.zeros((height, width), dtype=np.uint8)

            # Rasterisiere die Polygone in die Maske
            shapes = [(geom, 1) for geom in gdf.geometry]  # Alle Polygone mit Wert 1 versehen
            mask = rio.features.rasterize(shapes, out_shape=(height, width))
            mask_img = Image.fromarray(mask.astype(np.uint8))
            #mask_img = Image.fromarray(mask * 255)  # Skaliere 0/1 auf 0/255 für Darstellung
            mask_img.save(target_output_directory / image_name )

        else:
            label = label_dictionary[image_name[:-4]]
            gdf_post = gpd.GeoDataFrame({
                'geometry': [wkt.loads(wkt_string) for wkt_string in label['geometries']],
                'damage_code': label['damage_code']
            })

            image = Image.open(png_image_directory / image_name) # open the corresponding image

            width,height = image.size # getting width and height information

            # Erstelle eine leere Maske (0 = Hintergrund, 1 = Gebäude/Label)
            mask = np.zeros((height, width), dtype=np.uint8)

            # Rasterisiere die Polygone in die Maske
            shapes = [(geom, damage) for geom, damage in zip(gdf_post.geometry, gdf_post.damage_code)]
            mask = rio.features.rasterize(shapes, out_shape=(height, width))

            mask_img = Image.fromarray(mask.astype(np.uint8))

            mask_img.save(target_output_directory / image_name)



In [11]:
create_disaster_targets(PNG_FOLDER, label_dictionary, TARGET_FOLDER)

Directory '/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022/data/xview2-subset/targets' already exists.
