In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
from shapely.wkt import loads
import json
import os
import pandas as pd
import rasterio as rio
from rasterio.features import rasterize
import geopandas as gpd
import numpy as np
from shapely import wkt
from shapely import Polygon
import numpy as np


from utils.preprocessing import extract_features, load_label_data, process_label_metadata, process_features, make_label_dictionary, geotiff_converter, create_disaster_targets

In [3]:
# HPC Terrabyte
# adapt the user to your needs
USER = "di97ren"
# keep the following unchanged
ROOT = Path("/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022")
USER_PATH = ROOT / f"users/{USER}"
DATA_PATH = ROOT / "data"

DATASET_ROOT = DATA_PATH / "xview2"

# DATA_FOLDER = DATASET_ROOT / "test" # this has to be changed in respect to the folder (tier1, tier3, hold, test)

# IMAGE_FOLDER = DATA_FOLDER / "images/"

# LABEL_FOLDER = DATA_FOLDER / "labels/"

# TARGET_FOLDER = DATA_FOLDER / "targets/"

# PNG_FOLDER = DATA_FOLDER / "png_images/"
# # Path Configuration to the xview2 Subset

for folder in os.listdir(DATASET_ROOT):
    print(folder)

xview2_geotiff.tgz
xView2
xView2_Experiments
tier3
hold
test
README.md
tier1


In [6]:
labels = os.listdir(LABEL_FOLDER)

label_paths = []

for l in labels:
    label_paths.append(os.path.join(LABEL_FOLDER / l))

label_data = []

for label in label_paths:
    with open(label, "r") as file:
        label_data.append(pd.read_json(file))

In [7]:
damage_codes = {
    'no-damage' : 1,
    'minor-damage' : 2,
    'major-damage' : 3,
    'destroyed' : 4,
    'un-classified' : 5
}

In [8]:

def extract_features(features):
    """Extract polygons, feature types, and damage classes from features."""
    return {
        'geometries': [feature['wkt'] for feature in features],
        'class_name': [feature['properties'].get('feature_type', 'unknown') for feature in features],
        'damage_class': [feature['properties'].get('subtype', 'no-damage') for feature in features]
    }

def load_label_data(label_paths):
    """Load label data from the specified paths."""
    label_data = []
    for label in label_paths:
        with open(label, "r") as file:
            label_data.append(pd.read_json(file))
    return label_data

def process_label_metadata(label):
     """Process metadata from the label."""
     metadata = label['metadata']
     return {
        'img_name': metadata['img_name'][:-4],  # Remove file extension
        'disaster': metadata['disaster'],
        'disaster_type': metadata['disaster_type']
    }

def process_features(label, damage_codes):
     """Process features from the label and apply damage codes."""
     
     feature_data = label['features']['xy']
     feature_dict = extract_features(feature_data)
    
     df = pd.DataFrame(feature_dict)
    
    # Add metadata columns to the dataframe
     metadata = process_label_metadata(label)
     for key, value in metadata.items():
        df[key] = value

    # Apply damage codes
     df['damage_code'] = df['damage_class'].apply(lambda x: damage_codes.get(x, 999))

    # Convert damage codes to integers
     df['damage_code'] = df['damage_code'].astype(int)

     return df

def make_label_dictionary(input_directory, damage_codes):
    """Create a dictionary of labels with associated metadata and damage codes."""
    label_paths = [os.path.join(input_directory, f) for f in os.listdir(input_directory)]
    label_data = load_label_data(label_paths)
    
    label_dictionary = {}
    
    for label in label_data:
        img_name = label['metadata']['img_name'][:-4]  # Remove file extension
        label_df = process_features(label, damage_codes)
        
        # Add the processed dataframe to the dictionary
        label_dictionary[img_name] = label_df

    return label_dictionary


def geotiff_converter(image_directoy: dir , output_directory: dir):

    '''
    This function takes the input geotiff images and converts them to png images 
    '''

    images = os.listdir(image_directoy) # get all image names 

        
    # Check if the directory exists
    if not os.path.exists(output_directory):
        # Create the directory if it doesn't exist
        os.makedirs(output_directory)
        print(f"Directory '{output_directory}' created.")
    else:
        print(f"Directory '{output_directory}' already exists.")


    for i in images: # iterate over each image and open it with rasterio

        png_name = i[:-4] + ".png"

        with rio.open( image_directoy / i) as src:
            r , g , b = src.read(1), src.read(2), src.read(3)

            img = np.stack([r, g, b], axis = -1) # Stack the bands to create and np image array 

            # normalize image values:
            if img.dtype != np.uint8:
                img = ((img - img.min()) / (img.max() - img.min()) * 255).astype(np.uint8)

            png_image = Image.fromarray(img) # make it an image

            png_image.save( output_directory / png_name) # save the image

def create_disaster_targets (png_image_directory: dir,
                             label_dictionary: dict, 
                             target_output_directory: dir):
    
    
    # Check if the directory exists
    if not os.path.exists(target_output_directory):
        # Create the directory if it doesn't exist
        os.makedirs(target_output_directory)
        print(f"Directory '{target_output_directory}' created.")
    else:
        print(f"Directory '{target_output_directory}' already exists.")

    
    pngs = os.listdir(png_image_directory)


    for image_name in pngs:

        if "pre_disaster" in image_name:
            label = label_dictionary[image_name[:-4]]['geometries'] # retrieving geometries from the label
            gdf = gpd.GeoDataFrame(geometry=label.apply(wkt.loads)) # creating a geodataframe


            image = Image.open(png_image_directory / image_name) # open the corresponding image

            width,height = image.size # getting width and height information

            # Erstelle eine leere Maske (0 = Hintergrund, 1 = Gebäude/Label)
            mask = np.zeros((height, width), dtype=np.uint8)

            # Rasterisiere die Polygone in die Maske
            shapes = [(geom, 1) for geom in gdf.geometry]  # Alle Polygone mit Wert 1 versehen
            mask = rio.features.rasterize(shapes, out_shape=(height, width))
            mask_img = Image.fromarray(mask.astype(np.uint8))
            #mask_img = Image.fromarray(mask * 255)  # Skaliere 0/1 auf 0/255 für Darstellung
            mask_img.save(target_output_directory / image_name )

        else:
            label = label_dictionary[image_name[:-4]]
            gdf_post = gpd.GeoDataFrame({
                'geometry': [wkt.loads(wkt_string) for wkt_string in label['geometries']],
                'damage_code': label['damage_code']
            })

            image = Image.open(png_image_directory / image_name) # open the corresponding image

            width,height = image.size # getting width and height information

            # Erstelle eine leere Maske (0 = Hintergrund, 1 = Gebäude/Label)
            mask = np.zeros((height, width), dtype=np.uint8)

            # Rasterisiere die Polygone in die Maske
            shapes = [(geom, damage) for geom, damage in zip(gdf_post.geometry, gdf_post.damage_code)]
            mask = rio.features.rasterize(shapes, out_shape=(height, width))

            mask_img = Image.fromarray(mask.astype(np.uint8))

            mask_img.save(target_output_directory / image_name)



In [11]:
label_dictionary = make_label_dictionary(LABEL_FOLDER, damage_codes)

In [9]:
geotiff_converter(IMAGE_FOLDER, PNG_FOLDER)

Directory '/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022/data/xview2/test/png_images' already exists.


In [12]:
create_disaster_targets(PNG_FOLDER, label_dictionary, TARGET_FOLDER)

Directory '/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022/data/xview2/test/targets' already exists.


In [None]:
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
from shapely.wkt import loads
import json
import os
import pandas as pd
import rasterio as rio
from rasterio.features import rasterize
import geopandas as gpd
import numpy as np
from shapely import wkt
from shapely import Polygon
import numpy as np


from utils.helper_functions import get_data_folders
from utils.preprocessing import extract_features, load_label_data, process_label_metadata, process_features, make_label_dictionary, geotiff_converter, create_disaster_targets


ROOT = Path("/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022")
DATA_PATH = ROOT / "data"


##########################################################
# Define your username here:
USER = "di97ren"
USER_PATH = ROOT / f"users/{USER}"


def get_data_folder_1(folder_name: str,
    main_dataset: bool):  # possible names: ["test", "tier1", "tier3", "hold"] 
    ROOT = Path("/dss/dsstbyfs02/pn49ci/pn49ci-dss-0022")

    DATA_PATH = ROOT / "data"
    

    if main_dataset:
        DATASET_ROOT = DATA_PATH / "xview2"
        DATA_FOLDER = DATASET_ROOT / "test" # this has to be changed in respect to the folder (tier1, tier3, hold, test)
        IMAGE_FOLDER = DATA_FOLDER / "images/"
        LABEL_FOLDER = DATA_FOLDER / "labels/"
        TARGET_FOLDER = DATA_FOLDER / "targets/"
        PNG_FOLDER = DATA_FOLDER / "png_images/"

    else:         # Path Configuration to the xview2 Subset
        DATASET_ROOT = DATA_PATH / "xview2"
        DATA_FOLDER = DATASET_ROOT / "test" # this has to be changed in respect to the folder (tier1, tier3, hold, test)
        IMAGE_FOLDER = DATA_FOLDER / "images/"
        LABEL_FOLDER = DATA_FOLDER / "labels/"
        TARGET_FOLDER = DATA_FOLDER / "targets/"
        PNG_FOLDER = DATA_FOLDER / "png_images/"

    return: DATASET_ROOT, DATA_FOLDER, IMAGE_FOLDER, LABEL_FOLDER, TARGET_FOLDER, PNG_FOLDER


folder_to_prepocess = ["test", "tier1", "tier3", "hold"]

# define building damage codes
damage_codes = {
    'no-damage' : 1,
    'minor-damage' : 2,
    'major-damage' : 3,
    'destroyed' : 4,
    'un-classified' : 5
}
for folder in folder_to_prepocess:


    DATASET_ROOT, DATA_FOLDER, IMAGE_FOLDER, LABEL_FOLDER, TARGET_FOLDER, PNG_FOLDER = get_data_folder_1(folder, main_dataset = True)

    labels = os.listdir(LABEL_FOLDER)

    label_paths = []

    for l in labels:
        label_paths.append(os.path.join(LABEL_FOLDER / l))

    label_data = []

    for label in label_paths:
        with open(label, "r") as file:
            label_data.append(pd.read_json(file))


            
    label_dictionary = make_label_dictionary(LABEL_FOLDER, damage_codes)

    geotiff_converter(IMAGE_FOLDER, PNG_FOLDER)

    create_disaster_targets(PNG_FOLDER, label_dictionary, TARGET_FOLDER)


