We are going to clip some rasters into a big mosaic. 
- Make sure the import is multiple or simple
- if multiple set the folder where all the layers are
- do the clipping into a created folder of the same name

In [1]:
#Import the libraries
import os
import sys
from osgeo import gdal, gdalconst
import numpy as np
import glob

gdal.UseExceptions()

In [None]:
def get_raster_file_list(path):
    """
    Get a list of the raster files inside the folder
    Parameters:
    - path (str): path of the folder with the resources.

    Returns:
    - File_list (list). list of the resources.
    """
    File_list = [] #f for f in os.listdir(path) if os.isfile(mypath,f)
    for file in os.listdir(path):
        if file.endswith(".tif") or file.endswith(".tiff"):
            if file not in File_list:
                File_list.append(os.path.join(path,file))
        else:
            pass
    return File_list

def create_folder_if_not_exists(folder_path):
    """
    Create a folder if it doesn't exist.

    Parameters:
    folder_path (str): The path of the folder to be created.
    """
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Folder created at: {folder_path}")
    else:
        print(f"Folder already exists at: {folder_path}")

In [2]:
"""Set folder main location"""
input_path = r"C:\Users\admin\Downloads\fire_forest_loss_2001-23_annual\test"
output_path = os.path.join(input_path, "output")

In [3]:
"""Get a list of the files inside the folder you want to clipin case of MULTIPLE files, if single, figure it out""" 
create_folder_if_not_exists(output_path)
File_list = get_raster_file_list(input_path)

['C:\\Users\\admin\\Downloads\\fire_forest_loss_2001-23_annual\\test\\global_glad_fire_forest_loss_2001-23_annual.tif']


In [4]:
"""Do the clipping of the file"""
"""We open the file in case of SINGLE"""
file_snap_list = []
for file in File_list[:]:
    # file = "level_1_landsat_oli_captured_prior_to_2018_4326_1.tif" #esto se omite cuando hagamos todos
    datafile = gdal.Open(file)

    # Get the band from the input dataset, Remove the color table and Set the ColorInterp to Gray
    band = datafile.GetRasterBand(1)
    band.SetRasterColorTable(None)
    band.SetColorInterpretation(gdalconst.GCI_GrayIndex)

    """Here we create and go the folder which will contain the mosaic"""
    dataname = os.path.basename(file).replace('.tif','')
    output_path = os.path.join(output_path, dataname + "_mosaic")
    if not os.path.exists(output_path):
        os.mkdir(output_path)

    """Here we get the size of the file"""
    width = datafile.RasterXSize #columns
    height = datafile.RasterYSize #rows
    # define your tile size
    # it could be 256,512 or 1024. it depends on your scope
    tilesize = 27470

    # We might need a calculator here on how many tiles we predict to produce.

    """Here we start with the tiling"""
    for i in range(0, width, tilesize): #tilesize marks from where to where in width
        for j in range(0, height, tilesize): #tilesize in height
            # for the edge parts, so we don't get nodata from the borders
            w = min(i+tilesize, width) - i
            h = min(j+tilesize, height) - j
            # we preapre the sentence
            output_file = output_path + "\\" + dataname + "_" + str(i) + "_" + str(j) + ".tif"
            file_snap_list.append(output_file)
            try:
                """Set the parameters of the clipping"""
                ds = gdal.Translate(output_file, datafile, format = 'GTiff', 
                noData = 0, 
                # outputType = gdal.GDT_Int16, 
                creationOptions = ['COMPRESS=DEFLATE', 'TILED=YES','COPY_SRC_OVERVIEWS=YES'], 
                srcWin = [str(i), str(j), str(w), str(h)])

            except RuntimeError:
                print("The script got an error")
                sys.exit(1)
                
    """We have to close the file in order to proceed for the next parts"""
    ds = None

    

In [4]:
"""Get all the folders in case of multiple"""
# Get a list of all items (files and folders) in the directory
all_items = os.listdir(output_path)

# Filter the list to include only folders (directories)
folder_paths = [os.path.join(output_path, item) for item in all_items if os.path.isdir(os.path.join(path, item))]
print(folder_paths)

['C:\\Users\\admin\\Downloads\\treecover_2000\\output']


In [5]:
"""In case for a single folder"""
path = r"C:\Users\admin\Downloads\fire_forest_loss_2001-23_annual\test\global_glad_fire_forest_loss_2001-23_annual_mosaic"
folder_paths = [path]

In [6]:
"""with this block, we delete tiles that include just nodata"""
for folder in folder_paths:
    # get a list of all the files at the pointer
    tiles=glob.glob(folder.replace("/","\\") + '/*')
    for img in tiles:
        ds_filter = gdal.Open(img, gdal.GA_ReadOnly)

        # Get the nodata value
        band = ds_filter.GetRasterBand(1)
        nodata_value = band.GetNoDataValue()

        #read data as array
        ds_array=ds_filter.ReadAsArray()

        check=np.all(ds_array==nodata_value) # ds_array==nodata_value
        if check:
            ds_filter = None #we have to close the file before deleting it
            # print(img)
            os.remove(img)
            continue

    ds_array=None
    np_where_img=None