In [1]:
import os
import logging
import geopandas as gpd
from osgeo import gdal
from shapely.geometry import box, Point, MultiPoint
from tqdm import tqdm

def split_by_points(tif_path, points_shp_path, poly_shp_path, output_folder, tile_size=(200, 200)):
    """
    Split .shp & .tif files using points from a point shapefile to center each split tile (200x200).
    The function cuts both TIF and polygon shapefile over the same areas centered around the points.
    """
    tifs_path_folder = os.path.join(output_folder, 'tifs')
    os.makedirs(tifs_path_folder, exist_ok=True)
    points_gdf = gpd.read_file(points_shp_path)
    tif_dataset = gdal.Open(tif_path)
    tif_transform = tif_dataset.GetGeoTransform()
    pixel_width = tif_transform[1]
    pixel_height = abs(tif_transform[5])

    out_dict = {}
    
    if poly_shp_path:
        shps_path_folder = os.path.join(output_folder, 'shps')
        os.makedirs(shps_path_folder, exist_ok=True)
        logging.info(f'⚙️ TIF and SHP splitting started.')
    else:
        logging.info(f'⚙️ TIF splitting started.')
        
    for idx, point in tqdm(points_gdf.iterrows(), total=points_gdf.shape[0], desc="Processing trees"):
        if isinstance(point.geometry, Point):
            process_seg_tile(idx, None, point.geometry.x, point.geometry.y, tif_dataset, tif_transform, poly_shp_path, tifs_path_folder, shps_path_folder, tile_size, pixel_width, pixel_height)
            out_dict[idx] = point.geometry.x, point.geometry.y
        elif isinstance(point.geometry, MultiPoint):
            for sub_idx, sub_point in enumerate(point.geometry.geoms):
                out_dict[idx] = sub_point.x, sub_point.y
                process_seg_tile(idx, sub_idx, sub_point.x, sub_point.y, tif_dataset, tif_transform, poly_shp_path, tifs_path_folder, shps_path_folder, tile_size, pixel_width, pixel_height)
        else:
            logging.warning(f'Unsupported geometry type {type(point.geometry)} at index {idx}')
            continue

    if poly_shp_path:
        logging.info(f'✅ TIF and SHP splitting ended.')
    else:
        logging.info(f'✅ TIF splitting ended.')

    return out_dict

def process_seg_tile(idx, sub_idx, center_x, center_y, tif_dataset, tif_transform, poly_shp_path, tifs_path_folder, shps_path_folder, tile_size, pixel_width, pixel_height):
    """
    Helper function to process a single tile around a point.
    """

    half_width = (tile_size[0] // 2) * pixel_width
    half_height = (tile_size[1] // 2) * pixel_height

    minx, miny = center_x - half_width, center_y - half_height
    maxx, maxy = center_x + half_width, center_y + half_height

    tile_bbox = box(minx, miny, maxx, maxy)

    offset_x = int((minx - tif_transform[0]) / pixel_width)
    offset_y = int((tif_transform[3] - maxy) / pixel_height)

    if sub_idx is not None and sub_idx != 0:
        tif_output_path = os.path.join(tifs_path_folder, f"tile_{idx}_{sub_idx}.tif")
    else:
        tif_output_path = os.path.join(tifs_path_folder, f"tile_{idx}.tif")
    
    gdal.Translate(
        tif_output_path,
        tif_dataset,
        srcWin=[offset_x, offset_y, tile_size[0], tile_size[1]]
    )
    logging.info(f'Saved tile TIF for point {idx} (sub-point {sub_idx})')
    
    if poly_shp_path:
        tile_gdf = gpd.read_file(poly_shp_path, mask=tile_bbox)
        if not tile_gdf.empty:
            if sub_idx is not None and sub_idx != 0:
                tile_shp_path = os.path.join(shps_path_folder, f"tile_{idx}_{sub_idx}.shp")
            else:
                tile_shp_path = os.path.join(shps_path_folder, f"tile_{idx}.shp")
            tile_gdf.to_file(tile_shp_path)
            logging.info(f'Saved tile shapefile for point {idx} (sub-point {sub_idx}) at {tile_shp_path}')
        
    #else:
    #    logging.info(f'TIF tile is not saved')


In [3]:
split_by_points(
    tif_path = '../data/raw/2023-02-23_Bakonyszucs_actual.tif',
    points_shp_path = '../data/raw/Fa_pontok.shp',
    poly_shp_path = '../data/raw/Lombkorona.shp',
    output_folder = 'SEG_SPLIT_TEST',
    tile_size=(200, 200)
)

Processing trees: 100%|█████████████████████| 2760/2760 [00:54<00:00, 50.28it/s]


{0: (549060.6906511548, 221137.2421987691),
 1: (549060.852490539, 221197.38171393395),
 2: (549061.6154476359, 221147.4611998851),
 3: (549063.3910568797, 221142.772481726),
 4: (549065.7156710717, 221139.32216288868),
 5: (549065.7839677746, 221213.0547022972),
 6: (549066.1538863671, 221203.87147324003),
 7: (549066.6162846071, 221143.4313992193),
 8: (549067.4552761846, 221151.34318106272),
 9: (549068.9444597486, 221126.13076904888),
 10: (549069.2010907722, 221224.49443476822),
 11: (549069.3305622797, 221263.53934219966),
 12: (549069.6681129952, 221204.6852941434),
 13: (549069.8655374057, 221135.40609184114),
 14: (549069.9825437987, 221202.52127037765),
 15: (549070.6114054054, 221143.89842144225),
 16: (549070.8703484201, 221147.56061550736),
 17: (549070.9293059587, 221129.42093250883),
 18: (549071.1970036533, 221181.17955999347),
 19: (549071.6286815351, 221205.2216761024),
 20: (549071.832136761, 221257.42643745968),
 21: (549073.2555690556, 221114.38497314925),
 22: (54

In [22]:
print(len([filename for filename in os.listdir('SEG_SPLIT_TEST/tifs') if filename.endswith('tif')]))
print(len([filename for filename in os.listdir('SEG_SPLIT_TEST/shps') if filename.endswith('shp')]))

1647
1647


In [2]:
os.system('sudo rm -r SEG_SPLIT_TEST') == 0

True

In [26]:
print(len([filename for filename in os.listdir('SEG_SPLIT_TEST/tifs') if len(filename.split('.')[0].split('_'))>2]))

0


In [35]:
sub_point.x

NameError: name 'sub_point' is not defined

In [None]:
import geopandas as gpd
import rasterio
from shapely.affinity import translate
import os

def merge_shapefiles_to_tif_position(shapefile_folder, tif_file, id_coordinates_dict, output_shapefile):
    # Read the .tif file to get the CRS and affine transform
    with rasterio.open(tif_file) as tif:
        tif_crs = tif.crs
        tif_transform = tif.transform  # Affine transformation matrix
    
    # Create an empty list to store translated GeoDataFrames
    translated_gdfs = []
    
    # Loop over each file in the shapefile folder
    for shapefile_name in os.listdir(shapefile_folder):
        if shapefile_name.endswith('.shp'):
            # Extract the ID from the shapefile name
            shapefile_id = os.path.splitext(shapefile_name)[0]  # Assuming ID is in filename
            
            # Skip if ID not in the dictionary
            if shapefile_id not in id_coordinates_dict:
                continue
            
            # Load shapefile
            shapefile_path = os.path.join(shapefile_folder, shapefile_name)
            gdf = gpd.read_file(shapefile_path)
            
            # Ensure the shapefile has the same CRS as the tif image
            if gdf.crs != tif_crs:
                gdf = gdf.to_crs(tif_crs)
            
            # Get the center coordinates for this ID
            center_x, center_y = id_coordinates_dict[shapefile_id]
            
            # Calculate centroid of the shapefile geometries
            shapefile_centroid = gdf.geometry.centroid
            centroid_x, centroid_y = shapefile_centroid.x.mean(), shapefile_centroid.y.mean()
            
            # Calculate the translation required to align shapefile centroid to tif position
            dx = center_x - centroid_x
            dy = center_y - centroid_y
            
            # Apply translation to each geometry in the GeoDataFrame
            gdf.geometry = gdf.geometry.apply(lambda geom: translate(geom, xoff=dx, yoff=dy))
            
            # Append the translated GeoDataFrame to the list
            translated_gdfs.append(gdf)
    
    # Merge all translated GeoDataFrames
    merged_gdf = gpd.GeoDataFrame(pd.concat(translated_gdfs, ignore_index=True), crs=tif_crs)
    
    # Save the merged shapefile
    merged_gdf.to_file(output_shapefile, driver="ESRI Shapefile")
    
    return merged_gdf
