In [1]:
import os
from osgeo import gdal
from typing import Tuple

In [2]:
# get environment variables
cep_tiles_directory = os.environ['CEP_TILES_DIRECTORY']
water_tiles_directory = os.environ['WATER_TILES_DIRECTORY']


# Matching Tiles from JRC by extent

In [3]:
class TiffMatcher:

    def __init__(self, water_tiff_path:str, cep_tiff_path:str):
        self.water_tiff = water_tiff_path
        self.cep_tiff = cep_tiff_path
        self.matches = None

    def write_matches(self, output_file):
        # Write the matches to a file
        with open(output_file, "w") as file:
            for water_file, cep_file in self.matches.items():
                file.write(f"{water_file},{cep_file}\n")

    def get_extent(self, tiff_file , rounding:bool=False) -> Tuple[float, float, float, float]:
        # Open the TIFF file
        dataset = gdal.Open(tiff_file)

        # Get raster dimensions
        width = dataset.RasterXSize
        height = dataset.RasterYSize

        # Get geotransform information
        geotransform = dataset.GetGeoTransform()

        # Calculate the extent
        xmin = geotransform[0]
        ymin = geotransform[3] + width*geotransform[4] + height*geotransform[5]
        xmax = geotransform[0] + width*geotransform[1] + height*geotransform[2]
        ymax = geotransform[3]

        if not rounding:
            return (xmin, ymin, xmax, ymax)
        else:
            # return and round to nearest 10 to make extents comparable
            return (round(xmin, -1), round(ymin, -1), round(xmax, -1), round(ymax, -1))

    def get_all_tiff_extents(self,directory, rounding:bool) -> dict[Tuple[float, float, float, float], str]:
        # Get all the files in the directory
        files = os.listdir(directory)

        # Get the extents of all the tiff files
        extents = {}
        for file in files:
            if file.endswith(".tif") or file.endswith(".tiff"):
                extents[self.get_extent(os.path.join(directory, file), rounding)] = file
        return extents

    def match_tiffs_by_extent(self,water_extents, cep_extents, append_file_path=False) -> dict[str, str]:
        # For each water extent, find the CEP extent that matches
        matches = {}
        for water_extent in water_extents:
            for cep_extent in cep_extents:
                if water_extent == cep_extent:
                    if append_file_path:
                        water_path = os.path.join(self.water_tiff, water_extents[water_extent])
                        cep_path = os.path.join(self.cep_tiff, cep_extents[cep_extent])
                        matches[water_path] = cep_path 
                    else:
                        matches[water_extents[water_extent]] = cep_extents[cep_extent]
        return matches

    def match_tiffs_by_nearest_extent(self,water_extents, cep_extents,append_file_path) -> dict[str, str]:
        # For each water extent, find the nearest CEP extent
        matches = {}
        for water_extent in water_extents:
            nearest_cep_extent = None
            nearest_cep_extent_distance = None
            for cep_extent in cep_extents:
                # Calculate the distance between the water and CEP extents using the Euclidean distance
                # distance of first point from second point = sqrt((x1-x2)^2 + (y1-y2)^2)
                distance = ((cep_extent[0] - water_extent[0])**2 + (cep_extent[1] - water_extent[1])**2)**0.5
                if nearest_cep_extent is None or distance < nearest_cep_extent_distance:
                    nearest_cep_extent = cep_extent
                    nearest_cep_extent_distance = distance
            if append_file_path:
                    water_path = os.path.join(self.water_tiff, water_extents[water_extent])
                    cep_path = os.path.join(self.cep_tiff, cep_extents[cep_extent])
                    matches[water_path] = cep_path 
            else:
                matches[water_extents[water_extent]] = cep_extents[cep_extent]
        return matches 
    
    def match_tiffs(self, rounding:bool=False, append_file_path:bool=False) -> dict[str, str]:
        # Get the extents of all the tiff files in the water and CEP directories
        water_extents = self.get_all_tiff_extents(self.water_tiff, rounding)
        cep_extents = self.get_all_tiff_extents(self.cep_tiff, rounding)
        
        # Match the tiff files by extent
        self.matches = self.match_tiffs_by_extent(water_extents, cep_extents, append_file_path)
        return self.matches


In [4]:
tiffmatcher = TiffMatcher(water_tiles_directory, cep_tiles_directory)
tiffmatcher.match_tiffs(rounding=True, append_file_path=True)
tiffmatcher.write_matches("matches.txt")

#check first match pair
for key, value in tiffmatcher.matches.items():
    print(key, value)
    break