### Import libraries

In [1]:
#import required packages
import os
import rasterio
from rasterio.merge import merge
from rasterio.enums import Resampling
from rasterio.io import MemoryFile
from rasterio.mask import mask
from rasterio.plot import show
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np

### Load EMIT scences

In [2]:
# Define the path to the root folder
root_folder = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\EMIT_Scenes\clipped'

# Initialize a counter for the number of TIFF files
tif_count = 0

# Iterate through all the directories and files in the root folder
for dirpath, dirnames, filenames in os.walk(root_folder):
    # Iterate through the files in the current directory
    for filename in filenames:
        # Check if the file has a TIFF extension
        if filename.endswith('.tif'):
            # Increment the counter if it's a TIFF file
            tif_count += 1

# Print the total count of TIFF files found
print("Total TIFF files found:", tif_count)


Total TIFF files found: 3


### Mask no data values

In [4]:
# Define the folder containing the TIFF files
input_folder = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\EMIT_Scenes\clipped'
output_folder = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\mask'

# Get a list of all TIFF files in the input folder
tiff_files = [os.path.join(input_folder, file) for file in os.listdir(input_folder) if file.endswith('.TIF') or file.endswith('.tif')]
print(tiff_files)

# Loop through each TIFF file
for tiff_file in tiff_files:
    with rasterio.open(tiff_file) as src:
        # Read the metadata and update for float32 and NaN
        meta = src.meta
        meta.update({"dtype": 'float32', "count": src.count - 8})  # Update dtype to float32 and adjust band count
        meta.update({"nodata": np.nan})    # Set no-data value to NaN

        # Create an array to store the processed data for the selected bands (after removing the first 4 and last 4 bands)
        processed_data = np.empty((src.count - 8, src.height, src.width), dtype='float32')

        # Loop through the selected bands (removing first 4 and last 4 bands)
        for i, band in enumerate(range(4, src.count - 4)):  # Bands 5 to (src.count - 4)
            # Read each band
            data = src.read(band + 1)  # +1 because bands are 1-based in rasterio

            # Create a mask where data == 0 or data == -0.0099999997764826
            mask = (data == 0) | (data == -0.0099999997764826)

            # Apply the mask, setting zeros and -9999 to NaN
            data = np.where(mask, np.nan, data)

            # Store the processed band data
            processed_data[i] = data

        # Prepare output file path
        output_file = os.path.join(output_folder, os.path.basename(tiff_file))

        # Write the processed data to a new TIFF file
        with rasterio.open(output_file, 'w', **meta) as dst:
            dst.write(processed_data)

print("Processing complete. All TIFF files have been updated with the first 4 and last 4 bands removed and zeros and -9999 masked as NaN.")


['C:\\Users\\User\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\EMIT_Scenes\\clipped\\clipped_EMIT_L2A_001_20240308T070913_2406805_011.tif', 'C:\\Users\\User\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\EMIT_Scenes\\clipped\\clipped_EMIT_L2A_20240229T101956_2406007_017.tif', 'C:\\Users\\User\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\EMIT_Scenes\\clipped\\clipped_EMIT_L2A_TIF_001_20240229T101944_2406007_016.tif']
Processing complete. All TIFF files have been updated with the first 4 and last 4 bands removed and zeros and -9999 masked as NaN.


### Mosaic the scences

In [5]:
# List all GeoTIFF files in the directory and its subdirectories
tif_files = []
for root, dirs, files in os.walk(output_folder):
    for file in files:
        if file.lower().endswith('.tif'):
            tif_files.append(os.path.join(root, file))

print(f"Found {len(tif_files)} GeoTIFF files.")
print(tif_files)

# Prioritize the third file
if len(tif_files) >= 3:
    # Move the third file to the front
    third_file = tif_files.pop(2)  # Remove the third file from its position
    tif_files.insert(0, third_file)  # Insert it at the beginning of the list

# Open all GeoTIFF files with error handling
src_files_to_mosaic = []
for tif in tif_files:
    try:
        src = rasterio.open(tif)
        src_files_to_mosaic.append(src)
    except rasterio.errors.RasterioIOError as e:
        print(f"Error opening {tif}: {e}")

print(f"Successfully opened {len(src_files_to_mosaic)} GeoTIFF files for mosaicking.")

# Check if there are any files to mosaic
if src_files_to_mosaic:
    # Merge the GeoTIFF files, prioritizing data from the (formerly) third file
    mosaic, out_trans = merge(src_files_to_mosaic)
    print("Mosaic created successfully with priority given to the third file.")

    # Create an in-memory Rasterio dataset from the mosaic array
    with MemoryFile() as memfile:
        with memfile.open(
            driver='GTiff',
            height=mosaic.shape[1],
            width=mosaic.shape[2],
            count=mosaic.shape[0],
            dtype=mosaic.dtype,
            crs=src_files_to_mosaic[0].crs,
            transform=out_trans
        ) as dataset:
            dataset.write(mosaic)

            # Output file path for the mosaic
            output_path = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\mosaic_2\EMIT_mosaic2.tif'

            # Write the mosaic to a new GeoTIFF file
            with rasterio.open(output_path, "w", 
                               driver='GTiff',
                               height=mosaic.shape[1],
                               width=mosaic.shape[2],
                               count=mosaic.shape[0],
                               dtype=mosaic.dtype,
                               crs=src_files_to_mosaic[0].crs,
                               transform=out_trans) as dest:
                dest.write(mosaic)
            print("Mosaic written to file successfully.")
else:
    print("No valid TIFF files found to mosaic.")

print('Progress done')


Found 3 GeoTIFF files.
['C:\\Users\\User\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\mask\\clipped_EMIT_L2A_001_20240308T070913_2406805_011.tif', 'C:\\Users\\User\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\mask\\clipped_EMIT_L2A_20240229T101956_2406007_017.tif', 'C:\\Users\\User\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\mask\\clipped_EMIT_L2A_TIF_001_20240229T101944_2406007_016.tif']
Successfully opened 3 GeoTIFF files for mosaicking.
Mosaic created successfully with priority given to the third file.
Mosaic written to file successfully.
Progress done
