In [6]:
import os
import logging
import geopandas as gpd
from osgeo import gdal
from shapely.geometry import box, Point, MultiPoint
from tqdm import tqdm

## _parameters_

In [5]:
tif_path = '../data/raw/2023-02-23_Bakonyszucs_actual.tif'
points_shp_path = '../data/raw/Fa_pontok.shp'
poly_shp_path = '../data/raw/Lombkorona.shp'


dataset_folder = 'SEG_SPLIT_TEST_new'
tile_size=(1024, 1024)


# special split

In [33]:
# OLD split sol.
def split_SEG(tif_path, points_shp_path, poly_shp_path, output_folder, tile_size=(200, 200)):
    """
    Split .shp & .tif files using points from a point shapefile to center each split tile (200x200).
    The function cuts both TIF and polygon shapefile over the same areas centered around the points.
    """
    tifs_path_folder = os.path.join(output_folder, 'tifs')
    os.makedirs(tifs_path_folder, exist_ok=True)
    points_gdf = gpd.read_file(points_shp_path)
    tif_dataset = gdal.Open(tif_path)
    tif_transform = tif_dataset.GetGeoTransform()
    pixel_width = tif_transform[1]
    pixel_height = abs(tif_transform[5])

    out_dict = {}
    
    if poly_shp_path:
        shps_path_folder = os.path.join(output_folder, 'shps')
        os.makedirs(shps_path_folder, exist_ok=True)
        logging.info(f'⚙️ TIF and SHP splitting started.')
    else:
        logging.info(f'⚙️ TIF splitting started.')
        
    for idx, point in tqdm(points_gdf.iterrows(), total=points_gdf.shape[0], desc="Processing trees"):
        if isinstance(point.geometry, Point):
            process_seg_tile(idx, None, point.geometry.x, point.geometry.y, tif_dataset, tif_transform, poly_shp_path, tifs_path_folder, shps_path_folder, tile_size, pixel_width, pixel_height)
            out_dict[idx] = point.geometry.x, point.geometry.y
        elif isinstance(point.geometry, MultiPoint):
            for sub_idx, sub_point in enumerate(point.geometry.geoms):
                out_dict[idx] = sub_point.x, sub_point.y
                process_seg_tile(idx, sub_idx, sub_point.x, sub_point.y, tif_dataset, tif_transform, poly_shp_path, tifs_path_folder, shps_path_folder, tile_size, pixel_width, pixel_height)
        else:
            logging.warning(f'Unsupported geometry type {type(point.geometry)} at index {idx}')
            continue

    if poly_shp_path:
        logging.info(f'✅ TIF and SHP splitting ended.')
    else:
        logging.info(f'✅ TIF splitting ended.')

    return out_dict

def process_seg_tile(idx, sub_idx, center_x, center_y, tif_dataset, tif_transform, poly_shp_path, tifs_path_folder, shps_path_folder, tile_size, pixel_width, pixel_height):
    """
    Helper function to process a single tile around a point.
    """

    half_width = (tile_size[0] // 2) * pixel_width
    half_height = (tile_size[1] // 2) * pixel_height

    minx, miny = center_x - half_width, center_y - half_height
    maxx, maxy = center_x + half_width, center_y + half_height

    tile_bbox = box(minx, miny, maxx, maxy)

    offset_x = int((minx - tif_transform[0]) / pixel_width)
    offset_y = int((tif_transform[3] - maxy) / pixel_height)

    
    
    if poly_shp_path:
        tile_gdf = gpd.read_file(poly_shp_path, mask=tile_bbox)
        if not tile_gdf.empty:
            if sub_idx is not None and sub_idx != 0:
                tile_shp_path = os.path.join(shps_path_folder, f"tile_{idx}_{sub_idx}.shp")
            else:
                tile_shp_path = os.path.join(shps_path_folder, f"tile_{idx}.shp")
            tile_gdf.to_file(tile_shp_path)
            logging.info(f'Saved tile shapefile for point {idx} (sub-point {sub_idx}) at {tile_shp_path}')

            if sub_idx is not None and sub_idx != 0:
                tif_output_path = os.path.join(tifs_path_folder, f"tile_{idx}_{sub_idx}.tif")
            else:
                tif_output_path = os.path.join(tifs_path_folder, f"tile_{idx}.tif")
            
            gdal.Translate(
                tif_output_path,
                tif_dataset,
                srcWin=[offset_x, offset_y, tile_size[0], tile_size[1]]
            )
            logging.info(f'Saved tile TIF for point {idx} (sub-point {sub_idx})')
        #else:
            #logging.info(f'TIF tile is not saved')
    else:
        if sub_idx is not None and sub_idx != 0:
            tif_output_path = os.path.join(tifs_path_folder, f"tile_{idx}_{sub_idx}.tif")
        else:
            tif_output_path = os.path.join(tifs_path_folder, f"tile_{idx}.tif")
        
        gdal.Translate(
            tif_output_path,
            tif_dataset,
            srcWin=[offset_x, offset_y, tile_size[0], tile_size[1]]
        )
        logging.info(f'Saved tile TIF for point {idx} (sub-point {sub_idx})')
    


In [7]:
# NEW split sol.

import os
import logging
from shapely.geometry import Point, MultiPoint, box
import geopandas as gpd
from osgeo import gdal
from tqdm import tqdm

from utils.imageprocessing import split_SEG
from utils.datasetvalidation import set_valid_CRS

In [7]:
points = split_SEG(
    tif_path = tif_path,
    points_shp_path = points_shp_path,
    poly_shp_path = poly_shp_path,
    output_folder = os.path.join(dataset_folder,'all','original'),
    tile_size=tile_size
)

Processing trees:   0%|          | 0/2760 [00:00<?, ?it/s]

In [11]:
#CHECK tiles count

assert len([filename for filename in os.listdir('SEG_SPLIT_TEST/tifs') if filename.endswith('tif')]) == len([filename for filename in os.listdir('SEG_SPLIT_TEST/shps') if filename.endswith('shp')])

In [9]:
#DELETE dir.

os.system('sudo rm -r SEG_SPLIT_TEST') == 0

True

# set valid crs

In [8]:
set_valid_CRS(
    os.path.join(dataset_folder,'all','original','shps'),
    desired_crs_epsg=23700
)

  0%|          | 0/10370 [00:00<?, ?it/s]

([], 0.12376856803894043)

# create dataset function

## convert tifs to pngs

In [11]:
from utils.imageprocessing import convert_TIFtoPNG

working_folder_path = os.path.join(dataset_folder,'all','original')

tile_size = (1024,1024)

elapsed_time = convert_TIFtoPNG(
    os.path.join(working_folder_path,'tifs'),
    os.path.join(working_folder_path,'images'), 
    tile_size=tile_size,
    grayscale=False
)

converting TIFs to PNGs:   0%|          | 0/2074 [00:00<?, ?it/s]

KeyboardInterrupt: 

## convert shps to pngs

In [3]:
import geopandas as gpd
import rasterio
from rasterio.warp import transform_bounds
from PIL import Image, ImageDraw
import os

from utils.imageprocessing import convert_SHPtoPNG_SEG

In [15]:
#test
working_folder_path = 'SEG_SPLIT_TEST'
tile_num = 1900

convert_SHPtoPNG(
    tif_path = os.path.join(working_folder_path,'tifs',f'tile_{tile_num}.tif'),
    shp_path = os.path.join(working_folder_path,'shps',f'tile_{tile_num}.shp'),
    png_path = os.path.join(working_folder_path, f'tile_mask_{tile_num}.png'),
    tile_size=(1024, 1024),
    bg_color='black',
    fg_color='white'
)

NameError: name 'convert_SHPtoPNG' is not defined

## create dataset

In [1]:
import time
import cv2
import numpy as np

from utils.imageprocessing import createPNG_Dataset_SEG

In [8]:
if createPNG_Dataset_SEG(
    os.path.join(dataset_folder,'all','original'),
    os.path.join(dataset_folder,'all','formatted'),
    tile_size,
    grayscale=False
) == False:
    raise Exception('ERROR: check the logs')

Processing tif files:   0%|          | 0/2074 [00:00<?, ?it/s]

Processing shp files:   0%|          | 0/2074 [00:00<?, ?it/s]

# merge shps

In [14]:
import time
import os
import time
from tqdm import tqdm
import rasterio
import geopandas as gpd
from shapely.geometry import Polygon
from PIL import Image
from skimage import measure

from utils.imageprocessing import create_SHP_SEG


In [33]:
create_SHP_SEG(
    os.path.join(dataset_folder,'all','original','tifs'),
    os.path.join(dataset_folder,'all','formatted','masks'),
    os.path.join(dataset_folder,'result.shp'),
    23700   
)

Converting MASKs to SHP: 100%|██████████| 2074/2074 [01:18<00:00, 26.53it/s]


(4729, 90.15366291999817)