In [1]:
import rasterio
from rasterio.features import geometry_mask
import geopandas as gpd
from shapely.geometry import box, Polygon
import shapely.ops as ops
import numpy as np
import cv2
import glob
import os 
from tqdm import tqdm

In [2]:
def create_mask(input_geojson_path, tif_path, output_mask_path, width=1024, height=1024):
    # Load the input GeoJSON file using geopandas
    gdf = gpd.read_file(input_geojson_path)

    # Open the TIF file using rasterio
    with rasterio.open(tif_path) as src:
        # Get the bounding box coordinates for the TIF file
        left, bottom, right, top = src.bounds

        # Define the extent as a GeoDataFrame with a single polygon
        extent = gpd.GeoDataFrame(geometry=[box(left, bottom, right, top)], crs=src.crs)

        # Reproject the extent to match the CRS of the input GeoJSON file
        extent = extent.to_crs(gdf.crs)
        left, bottom, right, top = extent.bounds.values[0]

    # Reproject the input GeoJSON file to match the CRS of the extent
    gdf = gdf.to_crs(extent.crs)

    # Crop the GeoJSON file by the extent
    cropped_gdf = gpd.overlay(gdf, extent, how='intersection')

    # Convert the cropped GeoDataFrame to a Shapely MultiPolygon object
    polygons = []
    for geom in cropped_gdf.geometry:
        if isinstance(geom, Polygon):
            polygons.append(geom)
        else:
            for poly in geom.geoms:
                # do something with the polygon, e.g. get its area
                polygons.append(poly)

    # Combine all polygons into a single polygon
    combined_polygon = ops.unary_union(polygons)

    # Define the size and resolution of the mask
    transform = rasterio.transform.from_bounds(left, bottom, right, top, width=width, height=height)

    # Create a raster image from the polygon
    raster = geometry_mask([combined_polygon], out_shape=(height, width), transform=transform)

    # Convert the raster image to a binary mask
    mask = (raster * 255).astype(np.uint8)
    mask = cv2.bitwise_not(mask)

    # Save the mask to a file
    cv2.imwrite(output_mask_path, mask)
    
    # Return the mask as a NumPy array
    return mask

In [3]:
tiffpath = './data/Turkey/images/'
tiff_files = sorted(glob.glob(os.path.join(tiffpath, "*.tif")))
jsonpath = './data/Turkey/labels/'
json_files = sorted(glob.glob(os.path.join(jsonpath, "*.geojson")))
output_path = './data/Turkey/mask/'
i = 0
failed = []
for img in tqdm(tiff_files):
    output_mask_path = os.path.join(output_path, os.path.splitext(os.path.basename(img))[0] + '_mask1.png')
    # print(output_mask_path)
    try:
        mask = create_mask(json_files[i], img, output_mask_path)
        cv2.imwrite(output_mask_path, mask)
        i+=1
    except:
        failed.append(img)
        continue

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 55/55 [00:03<00:00, 15.30it/s]
