This code merges the different tiles (zip files) from Sentinel-2 (10m) data considering one date folder at a time. The zip files are fetched from a local input folder and unzipped to another local temporary folder. It then processes bands 2, 3, 4 and 8. The mosaicked files are stored in a folder named "output_mosaicked" within the output folder for each date. These output tiff files are named as satellite_merged_date[yyyymmdd]_band.tif (eg. Sentinel2_merged_20250330_B08.tif). After mosaicking, the code clips all the band tiffs to the three study provinces (which is input as a shape file) and saves the outputs to tiff files (one file for each band) also inside "output_mosaicked". The merged and clipped output tiffs are named as satellite_merged_clipped_date[yyyymmdd]_band.tif (eg. Sentinel2_merged_clipped_20250330_B08.tif). All the calculations are done locally. Temporary files are created locally during the calculations, but they (including the extracted contents of the zip files) are deleted at the end of the code execution. This has been done so as to conveninently execute the code even in a normal computer with limited storage capacity.

In [None]:
import os
import sys
import subprocess

# This gives the name of the environment directory
print("Environment name:", os.path.basename(sys.prefix))

In [None]:
# Install necessary packages, if needed

required_packages = ["zipfile", "glob", "rasterio", "geopandas", "datetime", "re", "shutil"]

for package in required_packages:
    try:
        __import__(package if package != "scikit-learn" else "sklearn")
        print(f"{package} is already installed.")
    except ImportError:
        print(f"{package} not found. Installing...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

print("All packages have been installed!")

In [None]:
# Import necessary packages
import tempfile
import os
import tempfile
import zipfile
import glob
import numpy as np
from rasterio.merge import merge
import rasterio
from rasterio.mask import mask
import geopandas as gpd
from datetime import datetime
import re
from rasterio.enums import Resampling
from rasterio.warp import reproject
import shutil

In [None]:
# User defined input and output paths
source_folder = r"C:\Users\U8019357\OneDrive - UniSQ\00_Projects\2025.06.06 A4I Crop Monitoring Vietnam\04_Data\A4I Geospatial Tech - Global Shared Folder - Put Data Here\Satellite Data\Sentinel-2"

# Date string (i.e., sub-folder name)
date_str = "20250330"

# Output folder
output_folder = r"C:\Users\U8019357\UniSQ\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\1 - Image Processing\Processed_Sentinel2_VI_TIFFs"

# Read AOI shapefile (e.g., provincial borders)
aoi_path = r"C:\Users\U8019357\UniSQ\A4I Geospatial Tech - UniSQ Internal - UniSQ Internal\1 - Image Processing\Raw Data\GIS Maps and Shapefiles\Three Provinces (Old)\StudyAreaA4I.shp"  

# List of band numbers to process
bands_10m = ['B02', 'B03', 'B04', 'B08']

In [None]:
input_folder_path = os.path.join(source_folder, date_str)
print(input_folder_path)

output_folder_date = os.path.join(output_folder, date_str)
os.makedirs(output_folder_date, exist_ok=True)
print(output_folder_date)

# Temporary folder to extract zip files
temp_folder = tempfile.TemporaryDirectory()
print(temp_folder.name)

In [None]:
# Unzips all zip files into folders inside a temp "unzipped" folder
zip_files = glob.glob(os.path.join(input_folder_path, '*.zip'))
print(f"{len(zip_files)} ZIP files were found.")

for zip_file in zip_files:
    zip_name = os.path.basename(zip_file)
    print(f"Processing {zip_name} ...")

    match = re.search(r'T(\d{2}[A-Z]{3})_(\d{8})', zip_name)
    if match:
        tile_id = match.group(1)
        date_str = match.group(2)
        folder_name = f"S2_T{tile_id}_{date_str}"
    else:
        print(f"Filename format not matched for: {zip_name}")
        continue
        
    # Extract into another temp folder
    extract_path = os.path.join(temp_folder.name, folder_name)

    if not os.path.exists(extract_path):
        print(f"Unzipping {zip_name} to {extract_path} ...")
        try:
            with zipfile.ZipFile(zip_file, 'r') as zf:
                zf.extractall(extract_path)
        except Exception as e:
            print(f"Error unzipping {zip_name}: {e}")
    else:
        print(f"Already unzipped: {folder_name}")

print("\n All files have been successfully unzipped.")

In [None]:
# Revert the zip files to cloud-only
for zip_file in zip_files:
    print(f"Cleaning {os.path.basename(zip_file)} ...")
    subprocess.run(["attrib", "+U", "-P", zip_file], check=True)

In [None]:
# Performing the mosaick
mosaicked_files = []

for band in bands_10m:
    print(f"=== Processing Band: {band} ===")

    band_files = []
    for main_folder in os.listdir(temp_folder.name):
        main_path = os.path.join(temp_folder.name, main_folder)
        if not os.path.isdir(main_path):
            continue

        safe_folders = [f for f in os.listdir(main_path) if f.endswith('.SAFE')]
        for safe_folder in safe_folders:
            safe_path = os.path.join(main_path, safe_folder)
            granule_path = os.path.join(safe_path, 'GRANULE')
            granules = os.listdir(granule_path) if os.path.exists(granule_path) else []
            for granule in granules:
                r10m_path = os.path.join(granule_path, granule, 'IMG_DATA', 'R10m')
                if os.path.exists(r10m_path):
                    pattern = f'*_{band}_10m.jp2'
                    band_files.extend(glob.glob(os.path.join(r10m_path, pattern)))

    if not band_files:
        print(f"No files found for band {band}. Skipping...\n")
        continue

    print(f"Files found for band {band} ({len(band_files)} files):")
    for f in band_files:
        print(f" - {os.path.basename(f)}")

    # Step 1: Create base mosaic to determine extent/shape/transform
    src_files = [rasterio.open(f) for f in band_files]
    base_mosaic, base_transform = merge(src_files, method='first')
    base_shape = base_mosaic.shape[1:]  # (height, width)
    base_crs = src_files[0].crs
    base_dtype = src_files[0].dtypes[0]

    # Step 2: Create overlap mask and sum arrays
    overlap_sum = np.zeros(base_shape, dtype='float32')
    overlap_count = np.zeros(base_shape, dtype='uint16')

    for src in src_files:
        temp_array = np.zeros(base_shape, dtype='float32')
        reproject(
            source=rasterio.band(src, 1),
            destination=temp_array,
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=base_transform,
            dst_crs=base_crs,
            resampling=Resampling.nearest
        )
        temp_mask = temp_array > 0
        overlap_sum += temp_array * temp_mask
        overlap_count += temp_mask.astype('uint16')

    overlap_mask = overlap_count > 1
    overlap_avg = np.zeros_like(overlap_sum, dtype='float32')
    overlap_avg[overlap_mask] = overlap_sum[overlap_mask] / overlap_count[overlap_mask]

    # Save the overlap average to a temporary file (optional)
    temp_overlap_tif = os.path.join(temp_folder.name, f'temp_overlap_{date_str}_{band}.TIF')
    out_meta = src_files[0].profile.copy()
    out_meta.update({
        "height": base_shape[0],
        "width": base_shape[1],
        "transform": base_transform,
        "driver": "GTiff",
        "count": 1,
        "dtype": 'float32'
    })
    with rasterio.open(temp_overlap_tif, 'w', **out_meta) as dst:
        dst.write(overlap_avg, 1)

    # Step 3: Create mosaic using first method
    final_mosaic, final_transform = merge(src_files, method='first')
    final_output = final_mosaic[0]  # single band

    # Step 4: Patch the overlapping areas
    final_output[overlap_mask] = overlap_avg[overlap_mask]

    # Step 5: Write final output
    final_out_path = os.path.join(output_folder_date, f'Sentinel2_Merged_{date_str}_{band}.TIF')
    out_meta.update({
        "dtype": final_output.dtype,
        "transform": final_transform
    })
    with rasterio.open(final_out_path, 'w', **out_meta) as dst:
        dst.write(final_output, 1)

    for src in src_files:
        src.close()

    print(f" Mosaic completed for band {band}. Output saved to {os.path.basename(final_out_path)}\n")

    mosaicked_files.append(final_out_path)
    print(f"✅ Mosaic completed for band {band}. Output saved to: {final_out_path}\n")

print("✅ All files have been successfully mosaicked and saved.")

In [None]:
# Load AOI geometry
aoi = gpd.read_file(aoi_path)
aoi = aoi.to_crs("EPSG:32648")
aoi_geom = [aoi.geometry.union_all()]

In [None]:
# Clipping Function
for tif_path in mosaicked_files:
    with rasterio.open(tif_path) as src:
        out_image, out_transform = mask(src, aoi_geom, crop=True)
        out_meta = src.meta.copy()
        out_meta.update({
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform
        })

        band = os.path.basename(tif_path).split('_')[-1].replace('.TIF', '')
        clipped_path = os.path.join(output_folder_date, f"Sentinel2_Merged_Clipped_{date_str}_{band}.TIF")
        
        with rasterio.open(clipped_path, "w", **out_meta) as dest:
            dest.write(out_image)
        
        print(f"Clipped {band} saved to {os.path.basename(clipped_path)}")

print("\n All files have been successfully clipped and saved into the 'output_mosaicked' folder.")

In [None]:
# Clean the temporary folder
temp_folder.cleanup()