In [78]:
import json
from pathlib import Path
import shutil

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw
import rioxarray as rxr
from shapely.geometry import Polygon, box
from tqdm import tqdm

from utils.helpers import get_rgb_channels

root_dir = Path('/datadrive/hand_labels_processing')
json_dir =root_dir / 'json_export'
img_input_dir = root_dir / 'import_candidates/input_128px'
img_dir = root_dir / 'images_import'
recut_dir = root_dir / 'sharp_images'
shp_dir = root_dir / 'shp_labels'

In [45]:

def print_structure(data, indent=0):
    for key, value in data.items():
        print('  ' * indent + str(key))
        if isinstance(value, dict):
            print_structure(value, indent+1)
        elif isinstance(value, list) and isinstance(value[0], dict) and len(value) > 0:
            print_structure(value[0], indent+1)

# for file in json_dir.iterdir():
#     with open(file) as f:
#         data = json.load(f)

#     print_structure(data[0])
#     break


In [53]:
def get_image_names(json_data):
    input_images = []
    for task in json_data:
        img_id = task["file_upload"]
        parts = img_id.split('-', 1) # Split the string at the first occurrence of '-'
        new_string = parts[1] if len(parts) > 1 else parts[0]
        input_images.append(new_string)
    return input_images


def move_files(input_images, img_input_dir, img_dir):
    for img_name in input_images:
        
        tif_name = Path(img_name).with_suffix('.TIF')
        src_tif_file = img_input_dir / "images" / tif_name
        for sub_dir, name in [("png", img_name), ("images", tif_name)]:
            src_file = img_input_dir / sub_dir / name
            if src_file.exists():
                dst_file = img_dir / sub_dir / name
                dst_file.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy2(src_file, dst_file)
            else:
                print(f'File {src_file} not found.')


def extract_name_from_task(task):
    img_id = task["file_upload"]
    parts = img_id.split('-', 1) # Split the string at the first occurrence of '-'
    return parts[1] if len(parts) > 1 else parts[0]


def get_gdf_from_result(result, img, crs):
    tile_size = int(result["original_width"]) # make sure tiles are square
    points = result["value"]["points"]
    
    points = [img.rio.transform() * (tile_size / 100 * np.array(point))  for point in points]
    # draw.polygon(points, outline ="red")

    polygon = Polygon(points)
    label = result["value"]["polygonlabels"][0]
    gdf = gpd.GeoDataFrame({'image_name': [img.name], 'image_bounds': [box(*img.rio.bounds())], 'geometry': [polygon], 'label': [label]}, geometry='geometry', crs=img.rio.crs)
    gdf = gdf.to_crs(crs)
    return gdf


  
def convert_task_to_gdf(task, crs, img_dir):
    img_name = extract_name_from_task(task)

    tif_img_dir = img_dir / 'images'
    img_path = (tif_img_dir / img_name).with_suffix('.TIF')
    img = rxr.open_rasterio(img_path, default_name=img_path.name)
    if img.rio.crs != crs:
        img = img.rio.reproject(crs)
        print(f"Reprojecting {img_name}")

    gdf_list = [get_gdf_from_result(result, img, crs) 
                for annotation in task["annotations"] 
                for result in annotation["result"] 
                if result]
    if gdf_list:
        return pd.concat(gdf_list, ignore_index=True)
    else:
        print(f"No labels could be found for task {img_name}.")
    


In [54]:
# img_name = '23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r3_c2.png'
# r4c2_task = [task for task in data if extract_name_from_task(task) == img_name][0]
# tif_img_dir = Path('/datadrive/hand_labels/images_import/images')
# img_path = (tif_img_dir / img_name).with_suffix('.TIF')
# img = rxr.open_rasterio(img_path, default_name=img_path.name)
# if img.rio.crs != crs:
#     img = img.rio.reproject(crs)
#     print(img)
# print(img.rio.bounds())

In [79]:
file_gdfs = []
for file in tqdm(json_dir.iterdir()):
    crs = "EPSG:32616"
    print(f"Processing {file.name}")    
    with open(file) as f:
        data = json.load(f)
        input_images = get_image_names(data)
        print("Fetching images")    
        # move_files(input_images, img_input_dir=img_input_dir, img_dir=img_dir)

        print("Converting to geoDataframe")
        gdf_list = [convert_task_to_gdf(task, crs, img_dir) for task in data]
        file_gdfs.append(pd.concat(gdf_list, ignore_index=True))
gdf = pd.concat(file_gdfs, ignore_index=True)
# gdf.drop(columns=["image_bounds"]).to_file(shp_dir / "labeled_buildings.shp") # labels geoDF
images_gdf = gdf.set_geometry('image_bounds').groupby(['image_name','image_bounds']).count().reset_index().set_geometry('image_bounds').set_crs(crs)




0it [00:00, ?it/s]

Processing dora-at-2024-01-15-16-42-2d24b852.json
Fetching images
Converting to geoDataframe
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r0_c0.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r0_c1.png
No labels could be found for task 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r0_c1.png.
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r0_c2.png
No labels could be found for task 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r0_c2.png.
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r0_c3.png


1it [00:03,  3.73s/it]

Processing frederike-at-2024-02-02-15-56-8f0455d4.json
Fetching images
Converting to geoDataframe


2it [00:10,  5.30s/it]

Processing mert-at-2024-01-31-23-06-6e6eec71.json
Fetching images
Converting to geoDataframe
No labels could be found for task 23JUL28162837-M3DS_R3C1-016043586010_01_P001_r4_c2_r2_c1.png.
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r1_c0.png
No labels could be found for task 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r1_c0.png.
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r1_c1.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r1_c2.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r1_c3.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r2_c2.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r2_c3.png


3it [00:20,  7.66s/it]

Processing michael-at-2024-01-29-15-31-50b48d2e.json
Fetching images
Converting to geoDataframe
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r2_c0.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r2_c1.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r2_c2.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r2_c3.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r1_c1.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r1_c2.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r1_c3.png


4it [00:30,  8.71s/it]

Processing nathan-at-2024-02-06-17-54-a0e43c3c.json
Fetching images
Converting to geoDataframe
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r3_c1.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r3_c2.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r3_c3.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r0_c0.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r0_c1.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r0_c2.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r0_c3.png
Reprojecting 23JUN18160657-M3DS_R4C2-016144386010_01_P001_r5_c1_r3_c0.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r3_c1.png
Reprojecting 23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r3_c3.png


5it [00:46,  9.28s/it]


In [77]:
sat_images = images_gdf.image_name.apply(lambda x: '_'.join(x.split('_')[:4])).unique()
sat_img_dir = Path('/datadrive/merged_PS/images')
images_gdf['new_image_name'] = None
for file_name in sat_images:
    file_path = (sat_img_dir / file_name.replace('M3DS', 'PS3DS')).with_suffix('.TIF')
    if file_path.exists():
        print (f"Found {file_path}")

        raster = rioxarray.open_rasterio(file_path)
        if raster.rio.crs != crs:
            raster = raster.rio.reproject(crs)
        matching_rows = images_gdf[images_gdf.image_name.str.startswith(file_name)]
        for index, row in matching_rows.iterrows():
            # Get the bounds for cropping
            bounds = row.image_bounds.bounds
            minx, miny, maxx, maxy = bounds
            print("Bounds:", bounds)
            
            # Crop the image using the bounds
            cropped_image = raster.rio.clip_box(minx=minx, miny=miny, maxx=maxx, maxy=maxy)
            
            # Define the new image name and path
            new_image_name = f"{row.image_name}_RECUT.TIF"
            new_image_path = recut_dir / new_image_name
            
            # Save the cropped image
            cropped_image.rio.to_raster(new_image_path)
            
            # Update the new_image_name column in the GeoDataFrame
            images_gdf.at[index, 'new_image_name'] = new_image_name

    else:
        print(f"{file_path} not found")


Found /datadrive/merged_PS/images/23JUL28162830-PS3DS_R3C3-016043585010_01_P001.TIF
Found /datadrive/merged_PS/images/23JUL28162837-PS3DS_R3C1-016043586010_01_P001.TIF
Found /datadrive/merged_PS/images/23JUL28162837-PS3DS_R4C1-016043586010_01_P001.TIF
Found /datadrive/merged_PS/images/23JUN18160657-PS3DS_R4C2-016144386010_01_P001.TIF
Found /datadrive/merged_PS/images/23JUN18160713-PS3DS_R2C1-016144385010_01_P001.TIF


In [56]:
sat_tiles = images_gdf.image_name.apply(lambda x: '-'.join(x.split('-')[:-1]).replace('M3DS', 'PS3DS')).unique()
img_tiles_dir = Path('/datadrive/merged_PS/tiles/images')
regex_match_tiles = []
for prefix in sat_tiles:
    regex_match_tiles.extend(img_tiles_dir.glob(f'{prefix}*'))

boxes_gdfs = []
for img_path in tqdm(regex_match_tiles):
    img = rxr.open_rasterio(img_path, default_name=img_path.name)
    if img.rio.crs != images_gdf.crs:
        img = img.rio.reproject(images_gdf.crs)
    boxes_gdfs.append({'geometry': box(*img.rio.bounds()), 'name': img.name})
tiles_gdf = gpd.GeoDataFrame(boxes_gdfs)
tiles_gdf.crs = images_gdf.crs
# joined_gdf = gpd.sjoin(tiles_gdf, images_gdf, how='inner', op='intersects')
# joined_gdf = gpd.sjoin(images_gdf, tiles_gdf, how='left', op='contains')
# joined_gdf[joined_gdf.name.isna()]
# joined_gdf.shape

  0%|          | 0/4640 [00:00<?, ?it/s]

100%|██████████| 4640/4640 [02:37<00:00, 29.54it/s] 


In [58]:
images_gdf2= images_gdf.copy()
images_gdf2['centroid'] = images_gdf2.geometry.centroid
images_gdf2 = images_gdf2.set_geometry('centroid')
tiles_gdf['centroid'] = tiles_gdf.geometry.centroid
tiles_gdf = tiles_gdf.set_geometry('centroid')
joined_gdf = gpd.sjoin_nearest(images_gdf2, tiles_gdf, how='left', distance_col='distance')



In [65]:
joined_gdf[joined_gdf['distance'] > 1]

Unnamed: 0,image_name,image_bounds,geometry_left,label,centroid,index_right,geometry_right,name,distance
53,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r0_c0.TIF,"POLYGON ((158215.992 1105703.590, 158215.992 1105866.176, 158053.406 1105866.176, 158053.406 1105703.590, 158215.992 1105703.590))",109,109,POINT (158134.699 1105784.883),4539,"POLYGON ((158229.938 1105781.092, 158229.938 1105946.530, 158064.500 1105946.530, 158064.500 1105781.092, 158229.938 1105781.092))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r27_c28.TIF,79.914281
54,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r0_c1.TIF,"POLYGON ((158378.578 1105703.590, 158378.578 1105866.176, 158215.992 1105866.176, 158215.992 1105703.590, 158378.578 1105703.590))",68,68,POINT (158297.285 1105784.883),4571,"POLYGON ((158392.491 1105778.136, 158392.491 1105943.574, 158227.053 1105943.574, 158227.053 1105778.136, 158392.491 1105778.136))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r27_c29.TIF,76.991043
55,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r0_c2.TIF,"POLYGON ((158541.164 1105703.590, 158541.164 1105866.176, 158378.578 1105866.176, 158378.578 1105703.590, 158541.164 1105703.590))",56,56,POINT (158459.871 1105784.883),4612,"POLYGON ((158555.044 1105775.180, 158555.044 1105940.618, 158389.607 1105940.618, 158389.607 1105775.180, 158555.044 1105775.180))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r27_c30.TIF,74.070348
56,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r0_c3.TIF,"POLYGON ((158703.750 1105703.590, 158703.750 1105866.176, 158541.164 1105866.176, 158541.164 1105703.590, 158703.750 1105703.590))",84,84,POINT (158622.457 1105784.883),4579,"POLYGON ((158717.597 1105772.225, 158717.597 1105937.662, 158552.159 1105937.662, 158552.159 1105772.225, 158717.597 1105772.225))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r27_c31.TIF,71.152509
57,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r1_c1.TIF,"POLYGON ((158378.578 1105541.005, 158378.578 1105703.590, 158215.992 1105703.590, 158215.992 1105541.005, 158378.578 1105541.005))",63,63,POINT (158297.285 1105622.297),4572,"POLYGON ((158389.536 1105615.582, 158389.536 1105781.020, 158224.098 1105781.020, 158224.098 1105615.582, 158389.536 1105615.582))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r28_c29.TIF,76.59941
58,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r1_c2.TIF,"POLYGON ((158541.164 1105541.005, 158541.164 1105703.590, 158378.578 1105703.590, 158378.578 1105541.005, 158541.164 1105541.005))",5,5,POINT (158459.871 1105622.297),4613,"POLYGON ((158552.089 1105612.627, 158552.089 1105778.065, 158386.651 1105778.065, 158386.651 1105612.627, 158552.089 1105612.627))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r28_c30.TIF,73.663633
59,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r1_c3.TIF,"POLYGON ((158703.750 1105541.005, 158703.750 1105703.590, 158541.164 1105703.590, 158541.164 1105541.005, 158703.750 1105541.005))",100,100,POINT (158622.457 1105622.297),4580,"POLYGON ((158714.641 1105609.672, 158714.641 1105775.109, 158549.204 1105775.109, 158549.204 1105609.672, 158714.641 1105609.672))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r28_c31.TIF,70.729463
60,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r2_c2.TIF,"POLYGON ((158541.164 1105378.419, 158541.164 1105541.005, 158378.578 1105541.005, 158378.578 1105378.419, 158541.164 1105378.419))",185,185,POINT (158459.871 1105459.712),4614,"POLYGON ((158549.134 1105450.074, 158549.134 1105615.512, 158383.696 1105615.512, 158383.696 1105450.074, 158549.134 1105450.074))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r29_c30.TIF,73.37384
61,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r2_c3.TIF,"POLYGON ((158703.750 1105378.419, 158703.750 1105541.005, 158541.164 1105541.005, 158541.164 1105378.419, 158703.750 1105378.419))",40,40,POINT (158622.457 1105459.712),4581,"POLYGON ((158711.686 1105447.119, 158711.686 1105612.557, 158546.249 1105612.557, 158546.249 1105447.119, 158711.686 1105447.119))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r29_c31.TIF,70.428044
62,23JUN18160713-M3DS_R2C1-016144385010_01_P001_r7_c7_r3_c1.TIF,"POLYGON ((158378.578 1105215.833, 158378.578 1105378.419, 158215.992 1105378.419, 158215.992 1105215.833, 158378.578 1105215.833))",165,165,POINT (158297.285 1105297.126),4574,"POLYGON ((158383.626 1105290.476, 158383.626 1105455.914, 158218.188 1105455.914, 158218.188 1105290.476, 158383.626 1105290.476))",23JUN18160713-PS3DS_R2C1-016144385010_01_P001_r30_c29.TIF,76.155006


In [64]:
joined_gdf[joined_gdf['distance'] < 1].set_geometry('image_bounds').drop(columns=['centroid', 'geometry_right']).to_file(shp_dir / "tiles_epsg_32617.shp")

  joined_gdf[joined_gdf['distance'] < 1].set_geometry('image_bounds').drop(columns=['centroid', 'geometry_right']).to_file(shp_dir / "tiles_epsg_32617.shp")


In [None]:
model_dir = Path("/datadrive/hand_labels_model")

if not model_dir.exists():
    model_dir.mkdir()
    (model_dir / "images").mkdir()
    (model_dir / "labels").mkdir()

for file_name in joined_gdf[joined_gdf['distance'] == 0].name.unique():
    source_path = img_tiles_dir / file_name
    dest_path = model_dir / "images" / file_name
    shutil.copy2(source_path, dest_path)

gdf.drop(columns=["image_bounds"])[gdf.image_name.isin(joined_gdf[joined_gdf['distance'] == 0].image_name)].to_file(model_dir / "labels/hand_labels.shp")


Bad pipe message: %s [b'\x91\x98\t\xd2\x11U\x91\x1cR\x11\xda\xa2\xef\x00\xbeLR\xc8 \xeamm\xc0\xe7\x8cWw\xba\t]\xee\xbe\xf9\x8b\x8a\xeaGQ\xdaP\xa8]+S\xb2\xe0\x8b\xd5@\xb0\xec\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00\x1c\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08\n\x08\x0b\x08\x04\x08\x05\x08\x06\x04\x01\x05\x01\x06\x01\x00+\x00\x03\x02\x03\x04\x00-\x00\x02\x01']
Bad pipe message: %s [b'']
Bad pipe message: %s [b'\xfbXa\xd5\x0c\x95 ;\xea;\x9e.Z\xa1\xf6D\xd6\xb3\x00\x00>\xc0\x14\xc0\n\x009\x008\x007\x006\xc0\x0f\xc0\x05\x005\xc0\x13\xc0\t\x003\x002\x001\x000\xc0\x0e\xc0\x04\x00/\x00\x9a\x00\x99\x00\x98\x00\x97\x00\x96\x00\x07\xc0\x11\xc0\x07\xc0\x0c\xc0\x02\x00\x05\x00\x04\x00\xff\x02']
Bad pipe message: %s [b'']
Bad pipe message: %s [b'.w\xaey\xb9k\xae|\xa21.;-\x00\xd9bk0