# Tiling Satellite Images

## 1. Install and Import Required Packages

In [1]:
%%capture
!pip install -U rasterio geopandas fastai

In [2]:
# Import packages
import rasterio
from rasterio.features import rasterize
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import unary_union # previously used: cascaded_union
from shapely import wkt
from shapely.geometry import mapping, Point, Polygon, box

from PIL import Image
import os
import shutil
from fastai.vision.all import *
from tqdm import tqdm
import cv2

## 2. Set Paths for Images and Masks

If running on Google Colab, uncomment the next cell and mount Google Drive.

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [44]:
from sys import path_hooks
path = Path(f"/content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles")

# Set directory containing images retrieve a list of all images contained.
images_dir = path / "images"
# images_list = get_image_files(path/"test_image")

# Set directory of shapefile to be used to create mask

shapefiles_dir = path / "shapefiles"
buildings_path = path.parent.parent / "open_building_dataset/Managua_NIC_rect.csv"
output_dir = path

In [45]:
print(path, images_dir, shapefiles_dir, buildings_path, output_dir)

/content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/shapefiles /content/drive/MyDrive/UNITAC/data/open_building_dataset/Managua_NIC_rect.csv /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles


In [41]:
source_dir = '/content/drive/MyDrive/UNITAC/data/satellite_imagery/Managua/1040010087B10F00/016043245010_01_003/016043245010_01_003/016043245010_01/016043245010_01_P001_MUL'
target_dir = images_dir

def copy_images_without_r1(source_dir, target_dir):
    # Check if the source directory exists
    if not os.path.exists(source_dir):
        print(f"The source directory {source_dir} does not exist.")
        return

    # Check if the target directory exists, if not, create it
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # List all files in the source directory
    for filename in os.listdir(source_dir):
        print(filename)
        # Check if the file is an image and does not contain 'R1' in its name
        if filename.lower().endswith('.tif') and 'R1C' not in filename and 'C1-' not in filename:
            # Copy the file to the target directory
            shutil.copy(os.path.join(source_dir, filename), os.path.join(target_dir, filename))
            print(f"Copied {filename} to {target_dir}")

# Usage
copy_images_without_r1(source_dir, target_dir)


23JUN25161448-M3DS-016043245010_01_P001.XML
23JUN25161448-M3DS-016043245010_01_P001.IMD
23JUN25161448-M3DS_R1C1-016043245010_01_P001.TIF
23JUN25161448-M3DS-016043245010_01_P001_README.TXT
23JUN25161448-M3DS-016043245010_01_P001-BROWSE.JPG
23JUN25161448-M3DS-016043245010_01_P001.TIL
23JUN25161448-M3DS_R1C2-016043245010_01_P001.TIF
23JUN25161448-M3DS_R1C3-016043245010_01_P001.TIF
23JUN25161448-M3DS_R2C1-016043245010_01_P001.TIF
23JUN25161448-M3DS_R2C2-016043245010_01_P001.TIF
Copied 23JUN25161448-M3DS_R2C2-016043245010_01_P001.TIF to /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images
23JUN25161448-M3DS_R2C3-016043245010_01_P001.TIF
Copied 23JUN25161448-M3DS_R2C3-016043245010_01_P001.TIF to /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images
23JUN25161448-M3DS_R3C1-016043245010_01_P001.TIF
23JUN25161448-M3DS_R3C2-016043245010_01_P001.TIF
Copied 23JUN25161448-M3DS_R3C2-016043245010_01_P001.TIF to /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images

In [60]:
image_path = images_dir.ls()[0]
shp_path = shapefiles_dir / image_path.name.replace('TIF', 'shp')

In [61]:
def get_all_buildings(buildings_path):
  df = pd.read_csv(buildings_path)
  df['geometry'] = df['geometry'].apply(wkt.loads)
  return gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")

def create_shapefile_for_image(image_path, buildings):

  if not os.path.exists(shapefiles_dir):
    os.mkdir(shapefiles_dir)
  sat_image = rasterio.open(image_path)
  bounds = sat_image.bounds
  bounding_box = box(*bounds)

  buildings = buildings.to_crs(sat_image.crs)

  cropped_buildings = buildings[buildings.intersects(bounding_box)]

  # Step 6: Save the Cropped Shapefile
  shp_path = shapefiles_dir / image_path.name.replace('TIF', 'shp')
  cropped_buildings.to_file(shp_path)

  print(f"Cropped shapefile saved to {shp_path}")
  return shp_path

def extract_rgb_image(image_path):
  sat_image = rasterio.open(image_path)
  BAND_STRUCTURE = ["coastal", "blue", "green", "yellow", "red", "red-edge", "near-IR1", "near-IR2"]

  def get_band(image, band_name):
    band_idx = BAND_STRUCTURE.index(band_name) + 1
    channel = image.read(band_idx)
    return channel

  bands = []

  for band in BAND_STRUCTURE:
      bands.append(get_band(sat_image, band))

  sat_image.close()

  return np.stack((bands[4], bands[2], bands[1]), axis=-1)


## 3. Define Functions

In [24]:
def tile_img_msk(images_dir, output_dir, tile_size: int):
  """Tiles both images and masks and store them as .png-files"""
  # Create directory for image tiles
  if not os.path.exists(output_dir/'image_tiles'):
    os.makedirs(output_dir/'image_tiles')
  # Create directory for mask tiles
  if not os.path.exists(output_dir/f'mask_tiles'):
    os.makedirs(output_dir/f'mask_tiles')

  for image_path in tqdm(images_dir.iterdir()):
    if image_path.suffix == '.TIF':
      print(f"Tiling image {image_path}...")
      rgb_image = extract_rgb_image(image_path)


      # Load image and corresponding mask as numpy array and retrieve their shape
      # img = np.array(PILImage.create(fn))
      msk_fn = path / "untiled_masks" / image_path.name
      msk = np.array(PILMask.create(msk_fn))
      x, y, _ = rgb_image.shape

      # Cut image and mask into tiles and store them as .png-files
      for i in range(x // tile_size):
        for j in range(y // tile_size):
          img_tile = rgb_image[i*tile_size:(i+1)*tile_size, j*tile_size:(j+1)*tile_size]
          msk_tile = msk[i*tile_size:(i+1)*tile_size, j*tile_size:(j+1)*tile_size]
          Image.fromarray(img_tile).save(f'{output_dir}/image_tiles/{msk_fn.name[:-4]}_{i}_{j}.png')
          Image.fromarray(msk_tile).save(f'{output_dir}/mask_tiles/{msk_fn.name[:-4]}_{i}_{j}.png')
      print(f"Tiled {image_path.name}.")

In [None]:
#@title Tile img function
def tile_img(rgb_image, output_path, tile_size: int, single = None):
  """Tile image into smaller tiles and save them"""
  if single == False:
    for fn in tqdm(image_path):
      # Create directory
      if not os.path.exists(output_path):
        os.makedirs(output_path)

      # Load image as numpy array and retrieve its shape
      x, y, _ = rgb_image.shape

      # Cut image into tiles and store them as .png-files
      for i in range(x//tile_size):
        for j in range(y//tile_size):
          img_tile = rgb_image[i*tile_size:(i+1)*tile_size, j*tile_size:(j+1)*tile_size]
          Image.fromarray(img_tile).save(f'{output_path}/{fn.name[:-4]}/{fn.name[:-4]}_{i}_{j}.png')

  if single == True:
    # Create directory
    if not os.path.exists(output_path):
      os.makedirs(output_path)

    x, y, _ = rgb_image.shape

    # Cut image into tiles and store them as .png-files
    fn = image_path.name[:-4]

        # Create a subdirectory for the image tiles
    image_tile_dir = os.path.join(output_path, fn)
    if not os.path.exists(image_tile_dir):
        os.makedirs(image_tile_dir)

    for i in range(x//tile_size):
      for j in range(y//tile_size):
        img_tile = rgb_image[i*tile_size:(i+1)*tile_size, j*tile_size:(j+1)*tile_size]
        Image.fromarray(img_tile).save(f'{image_tile_dir}/{fn}_{i}_{j}.png')

In [72]:
def generate_mask(raster_path, shape_path, output_path = None, file_name = None):
    """Function that generates a binary mask from a vector file (shp or geojson)
    raster_path = path to the .tif;
    shape_path = path to the shapefile or GeoJson.
    output_path = Path to save the binary mask.
    file_name = Name of the file."""

    # Load image
    with rasterio.open(raster_path, "r") as src:
        raster_img = src.read()
        raster_meta = src.meta

    # Load corresponding shapefile containing marked pixels
    train_df = gpd.read_file(shape_path)

    # Verify CRS and print warning message before converting if the two don't match
    if train_df.crs != src.crs:
      print(f'Raster CRS: {src.crs}, Vector CRS: {train_df.crs}.\n Convert vector and raster to the same CRS.')
      # update CRS to match the raster
      train_df = train_df.to_crs(src.crs)


    # Generate the mask
    def poly_from_utm(polygon, transform):
        poly_pts = []
        poly = unary_union(polygon) # previously used: cascaded_union(polygon)
        for i in np.array(poly.exterior.coords):
            poly_pts.append( ~ transform * tuple(i))
        new_poly = Polygon(poly_pts)
        return new_poly

    poly_shp = []
    im_size = (src.meta['height'], src.meta['width'])
    for num, row in train_df.iterrows():
        if row['geometry'].geom_type == 'MultiPolygon':
            for p in row['geometry'].geoms: # iterate over polygons within a MultiPolygon
                poly = poly_from_utm(p, src.meta['transform'])
                poly_shp.append(poly)
        elif row['geometry'].geom_type == 'Polygon':
            poly = poly_from_utm(row['geometry'], src.meta['transform'])
            poly_shp.append(poly)
        else:
            # raise an error or skip the object
            raise TypeError("Invalid geometry type")

    if len(poly_shp) > 0:
        mask = rasterize(shapes=poly_shp, out_shape=im_size)
    else:
        mask = np.zeros(im_size)

    # Save or show mask
    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.erode(mask, kernel, iterations=1)
    mask = mask.astype('uint8')
    bin_mask_meta = src.meta.copy()
    bin_mask_meta.update({'count': 1})
    if (output_path != None and file_name != None):
        os.chdir(output_path)
        with rasterio.open(file_name, 'w', **bin_mask_meta) as dst:
            dst.write(mask * 255, 1) # Change 255 to 1 if classes need to be 0 and 1
    else:
        return mask


def save_masks(images_dir, mask, maskdir):
    if not os.path.exists(maskdir):
        os.makedirs(maskdir)
        if images_dir.name.endswith(('.TIF', '.tif')):
          shapes = images_dir.name
          generate_mask(images_dir, mask, maskdir, shapes)
    else:
      for image in tqdm(images_dir):
        if image.name.endswith(('.TIF', '.tif')):
          shapes = image.name
          generate_mask(image, mask, maskdir, shapes)

def save_masks_from_buildings(images_dir, buildings_path, masks_dir):
    buildings = get_all_buildings(buildings_path)
    print("Buildings are loaded.")

    if not os.path.exists(masks_dir):
        os.makedirs(masks_dir)

    iter_dir = images_dir.iterdir() if images_dir.is_dir() else list(images_dir) # handle either file or directory
    for img_path in tqdm(iter_dir):
      if img_path.name.endswith(('.TIF', '.tif')):
        shapefile_path = shapefiles_dir / img_path.name.replace('TIF', 'shp')
        if not shapefile_path.exists():
          print(f"Creating shapefile for {img_path.name}..")
          shapefile_path = create_shapefile_for_image(img_path, buildings)
        else:
          print(f"Skipping shapefile creation for {img_path.name}.")
        mask_path = output_dir / "untiled_masks" / img_path.name
        if not mask_path.exists():
          print(f"Creating untiled mask for {img_path.name}..")
          generate_mask(img_path, shapefile_path, masks_dir, img_path.name)
        else:
          print(f"Skipping untiled mask creation for {img_path.name}.")



And here we tile the images and masks for training.

In [73]:
save_masks_from_buildings(images_dir, buildings_path, output_dir / 'untiled_masks')

Buildings are loaded.


0it [00:00, ?it/s]

Skipping shapefile creation for 23JUN25161448-M3DS_R2C3-016043245010_01_P001.TIF.
Skipping untiled mask creation for 23JUN25161448-M3DS_R2C3-016043245010_01_P001.TIF.
Skipping shapefile creation for 23JUN25161448-M3DS_R2C2-016043245010_01_P001.TIF.
Skipping untiled mask creation for 23JUN25161448-M3DS_R2C2-016043245010_01_P001.TIF.
Skipping shapefile creation for 23JUN25161448-M3DS_R3C2-016043245010_01_P001.TIF.
Skipping untiled mask creation for 23JUN25161448-M3DS_R3C2-016043245010_01_P001.TIF.
Skipping shapefile creation for 23JUN25161448-M3DS_R3C3-016043245010_01_P001.TIF.
Skipping untiled mask creation for 23JUN25161448-M3DS_R3C3-016043245010_01_P001.TIF.
Skipping shapefile creation for 23MAR10162619-M3DS_R2C1-016043246010_01_P001.TIF.
Skipping untiled mask creation for 23MAR10162619-M3DS_R2C1-016043246010_01_P001.TIF.
Skipping shapefile creation for 23MAR10162619-M3DS_R2C2-016043246010_01_P001.TIF.
Skipping untiled mask creation for 23MAR10162619-M3DS_R2C2-016043246010_01_P001.TIF

8it [00:28,  3.51s/it]

Skipping shapefile creation for 23MAR10162619-M3DS_R3C2-016043246010_01_P001.TIF.
Creating untiled mask for 23MAR10162619-M3DS_R3C2-016043246010_01_P001.TIF..


9it [01:12,  9.65s/it]

Skipping shapefile creation for 23MAR10162619-M3DS_R3C3-016043246010_01_P001.TIF.
Creating untiled mask for 23MAR10162619-M3DS_R3C3-016043246010_01_P001.TIF..


10it [01:27, 10.73s/it]

Skipping shapefile creation for 23MAR10162619-M3DS_R4C1-016043246010_01_P001.TIF.
Creating untiled mask for 23MAR10162619-M3DS_R4C1-016043246010_01_P001.TIF..


11it [01:35, 10.17s/it]

Skipping shapefile creation for 23MAR10162619-M3DS_R4C2-016043246010_01_P001.TIF.
Creating untiled mask for 23MAR10162619-M3DS_R4C2-016043246010_01_P001.TIF..


12it [01:48, 10.62s/it]

Skipping shapefile creation for 23MAR10162619-M3DS_R4C3-016043246010_01_P001.TIF.
Creating untiled mask for 23MAR10162619-M3DS_R4C3-016043246010_01_P001.TIF..


13it [01:55,  9.94s/it]

Skipping shapefile creation for 23MAR10162619-M3DS_R5C3-016043246010_01_P001.TIF.
Creating untiled mask for 23MAR10162619-M3DS_R5C3-016043246010_01_P001.TIF..


14it [01:57,  7.77s/it]

Skipping shapefile creation for 23MAR10162619-M3DS_R5C2-016043246010_01_P001.TIF.
Creating untiled mask for 23MAR10162619-M3DS_R5C2-016043246010_01_P001.TIF..


15it [01:58,  6.08s/it]

Skipping shapefile creation for 23MAR10162619-M3DS_R5C1-016043246010_01_P001.TIF.
Creating untiled mask for 23MAR10162619-M3DS_R5C1-016043246010_01_P001.TIF..


19it [01:59,  6.27s/it]


Skipping shapefile creation for 23JUN25161448-M3DS_R4C2-016043245010_01_P001.TIF.
Skipping untiled mask creation for 23JUN25161448-M3DS_R4C2-016043245010_01_P001.TIF.
Skipping shapefile creation for 23JUN25161448-M3DS_R4C3-016043245010_01_P001.TIF.
Skipping untiled mask creation for 23JUN25161448-M3DS_R4C3-016043245010_01_P001.TIF.
Skipping shapefile creation for 23JUN25161448-M3DS_R5C2-016043245010_01_P001.TIF.
Skipping untiled mask creation for 23JUN25161448-M3DS_R5C2-016043245010_01_P001.TIF.


In [74]:
# Tile images and masks with a stride of 0 pixels
tile_size = 512
tile_img_msk(images_dir, output_dir, tile_size)

0it [00:00, ?it/s]

Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23JUN25161448-M3DS_R2C3-016043245010_01_P001.TIF...


1it [00:05,  5.18s/it]

Tiled 23JUN25161448-M3DS_R2C3-016043245010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23JUN25161448-M3DS_R2C2-016043245010_01_P001.TIF...


2it [00:15,  8.29s/it]

Tiled 23JUN25161448-M3DS_R2C2-016043245010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23JUN25161448-M3DS_R3C2-016043245010_01_P001.TIF...


3it [00:27,  9.70s/it]

Tiled 23JUN25161448-M3DS_R3C2-016043245010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23JUN25161448-M3DS_R3C3-016043245010_01_P001.TIF...


4it [00:31,  7.75s/it]

Tiled 23JUN25161448-M3DS_R3C3-016043245010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R2C1-016043246010_01_P001.TIF...


5it [00:41,  8.42s/it]

Tiled 23MAR10162619-M3DS_R2C1-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R2C2-016043246010_01_P001.TIF...


6it [00:48,  7.98s/it]

Tiled 23MAR10162619-M3DS_R2C2-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R2C3-016043246010_01_P001.TIF...


7it [00:52,  6.69s/it]

Tiled 23MAR10162619-M3DS_R2C3-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R3C1-016043246010_01_P001.TIF...


8it [00:59,  6.90s/it]

Tiled 23MAR10162619-M3DS_R3C1-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R3C2-016043246010_01_P001.TIF...


9it [01:07,  7.00s/it]

Tiled 23MAR10162619-M3DS_R3C2-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R3C3-016043246010_01_P001.TIF...


10it [01:12,  6.37s/it]

Tiled 23MAR10162619-M3DS_R3C3-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R4C1-016043246010_01_P001.TIF...


11it [01:19,  6.83s/it]

Tiled 23MAR10162619-M3DS_R4C1-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R4C2-016043246010_01_P001.TIF...


12it [01:27,  7.05s/it]

Tiled 23MAR10162619-M3DS_R4C2-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R4C3-016043246010_01_P001.TIF...


13it [01:32,  6.55s/it]

Tiled 23MAR10162619-M3DS_R4C3-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R5C3-016043246010_01_P001.TIF...


14it [01:33,  4.77s/it]

Tiled 23MAR10162619-M3DS_R5C3-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R5C2-016043246010_01_P001.TIF...


15it [01:35,  3.80s/it]

Tiled 23MAR10162619-M3DS_R5C2-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23MAR10162619-M3DS_R5C1-016043246010_01_P001.TIF...


16it [01:36,  3.08s/it]

Tiled 23MAR10162619-M3DS_R5C1-016043246010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23JUN25161448-M3DS_R4C2-016043245010_01_P001.TIF...


17it [01:46,  5.19s/it]

Tiled 23JUN25161448-M3DS_R4C2-016043245010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23JUN25161448-M3DS_R4C3-016043245010_01_P001.TIF...


19it [01:51,  5.85s/it]

Tiled 23JUN25161448-M3DS_R4C3-016043245010_01_P001.TIF.
Tiling image /content/drive/MyDrive/UNITAC/data/satellite_imagery/tiles/images/23JUN25161448-M3DS_R5C2-016043245010_01_P001.TIF...
Tiled 23JUN25161448-M3DS_R5C2-016043245010_01_P001.TIF.



