In [1]:
#import required functions
import os
import rasterio
from rasterio.merge import merge
from rasterio.io import MemoryFile
from rasterio.mask import mask
from rasterio.plot import show
import numpy as np

### EMIT SCENCES

In [2]:
# Define the path to the root folder
root_folder = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\03092024_EMIT\clipped'

# Initialize a counter for the number of TIFF files
tif_count = 0

# Iterate through all the directories and files in the root folder
for dirpath, dirnames, filenames in os.walk(root_folder):
    # Iterate through the files in the current directory
    for filename in filenames:
        # Check if the file has a TIFF extension
        if filename.endswith('.tif'):
            # Increment the counter if it's a TIFF file
            tif_count += 1

# Print the total count of TIFF files found
print("Total TIFF files found:", tif_count)


Total TIFF files found: 3


### Mask out zeros to nan values

In [3]:
# Define the output folder
output_folder = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\03092024_EMIT\masked'

# Get a list of all TIFF files in the input folder
tiff_files = [os.path.join(root_folder, file) for file in os.listdir(root_folder) if file.endswith('.TIF') or file.endswith('.tif')]
print(tiff_files)

# Loop through each TIFF file
for tiff_file in tiff_files:
    with rasterio.open(tiff_file) as src:
        # Read the metadata and update for float32 and NaN
        meta = src.meta
        meta.update({"dtype": 'float32'})  # Update dtype to float32
        meta.update({"nodata": np.nan})    # Set no-data value to NaN

        # Create an array to store the processed data for all bands
        processed_data = np.empty((src.count, src.height, src.width), dtype='float32')

        # Loop through all bands
        for i in range(src.count):  # Loop through each band
            # Read each band
            data = src.read(i + 1)  # +1 because bands are 1-based in rasterio

            # Create a mask where data == 0
            mask = (data == 0)

            # Apply the mask, setting zeros to NaN
            data = np.where(mask, np.nan, data)

            # Store the processed band data
            processed_data[i] = data

        # Prepare output file path
        output_file = os.path.join(output_folder, os.path.basename(tiff_file))

        # Write the processed data to a new TIFF file
        with rasterio.open(output_file, 'w', **meta) as dst:
            dst.write(processed_data)

print("Processing complete. All TIFF files have been updated with zeros masked as NaN.")


['C:\\Users\\SkosanaT\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\03092024_EMIT\\clipped\\clipped_EMIT_L2A_001_20240308T070913_2406805_011.tif', 'C:\\Users\\SkosanaT\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\03092024_EMIT\\clipped\\clipped_EMIT_L2A_20240229T101956_2406007_017.tif', 'C:\\Users\\SkosanaT\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\03092024_EMIT\\clipped\\clipped_EMIT_L2A_TIF_001_20240229T101944_2406007_016.tif']
Processing complete. All TIFF files have been updated with zeros masked as NaN.


### EMIT MOSAIC

In [5]:
# List all GeoTIFF files in the directory and its subdirectories
tif_files = []
for root, dirs, files in os.walk(output_folder):
    for file in files:
        if file.lower().endswith('.tif'):
            tif_files.append(os.path.join(root, file))

print(f"Found {len(tif_files)} GeoTIFF files.")
print(tif_files)

# Open all GeoTIFF files with error handling
src_files_to_mosaic = []
for tif in tif_files:
    try:
        src = rasterio.open(tif)
        src_files_to_mosaic.append(src)
    except rasterio.errors.RasterioIOError as e:
        print(f"Error opening {tif}: {e}")

print(f"Successfully opened {len(src_files_to_mosaic)} GeoTIFF files for mosaicking.")

# Check if there are any files to mosaic
if src_files_to_mosaic:
    # Sequentially merge the GeoTIFF files, prioritizing the first available pixels
    mosaic, out_trans = merge(src_files_to_mosaic, method='first')
    print("Mosaic created successfully with priority given to the first available pixels.")

    # Create an in-memory Rasterio dataset from the mosaic array
    with MemoryFile() as memfile:
        with memfile.open(
            driver='GTiff',
            height=mosaic.shape[1],
            width=mosaic.shape[2],
            count=mosaic.shape[0],
            dtype=mosaic.dtype,
            crs=src_files_to_mosaic[0].crs,
            transform=out_trans
        ) as dataset:
            dataset.write(mosaic)

            # Output file path for the mosaic
            output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\03092024_EMIT\mosaic\EMIT20240903_mosaic.tif'

            # Update metadata for the mosaic
            out_meta = dataset.meta.copy()
            out_meta.update({
                "driver": "GTiff",
                "height": mosaic.shape[1],
                "width": mosaic.shape[2],
                "transform": out_trans,
                "crs": src_files_to_mosaic[0].crs
            })

            # Write the mosaic to a new GeoTIFF file
            with rasterio.open(output_path, "w", **out_meta) as dest:
                dest.write(mosaic)
            print("Mosaic written to file successfully.")
else:
    print("No valid TIFF files found to mosaic.")

print('Progress done')



Found 3 GeoTIFF files.
['C:\\Users\\SkosanaT\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\03092024_EMIT\\masked\\clipped_EMIT_L2A_001_20240308T070913_2406805_011.tif', 'C:\\Users\\SkosanaT\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\03092024_EMIT\\masked\\clipped_EMIT_L2A_20240229T101956_2406007_017.tif', 'C:\\Users\\SkosanaT\\OneDrive - Stellenbosch University\\MAPWAPS\\DataChapter1\\uMzi_ROI\\data\\EMIT\\03092024_EMIT\\masked\\clipped_EMIT_L2A_TIF_001_20240229T101944_2406007_016.tif']
Successfully opened 3 GeoTIFF files for mosaicking.
Mosaic created successfully with priority given to the first available pixels.
Mosaic written to file successfully.
Progress done


### Filter out bands that cause noise

In [3]:
import os
import rasterio
import numpy as np

tif_file_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\03092024_EMIT\mosaic\EMIT20240903_mosaic.tif'
output_folder = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\EMIT\03092024_EMIT\filtered'

with rasterio.open(tif_file_path) as src:
    # Determine the bands to keep (subtract the specified bands)
    total_bands = src.count
    bands_to_keep = list(range(4, 128)) + list(range(142, 188)) + list(range(213, 282))  # Keep bands after removing specified ranges

    # Update metadata for the new band count
    meta = src.meta
    meta.update({"dtype": 'float32', "count": len(bands_to_keep)})
    meta.update({"NoData": np.nan})  # Set no-data value to NaN

    # Create an array to store the processed data for the selected bands
    processed_data = np.empty((len(bands_to_keep), src.height, src.width), dtype='float32')

    # Loop through the bands to keep
    for i, band in enumerate(bands_to_keep):
        # Read each band (bands are 1-based in rasterio)
        data = src.read(band + 1)

        # NoData, and None pixel values as NaN
        mask = (data == src.nodata) | np.isnan(data)
        data = np.where(mask, np.nan, data)

        # Store the processed band data
        processed_data[i] = data

    # Prepare output file path
    output_file = os.path.join(output_folder, '20240904_2filtered.tif')

    # Write the processed data to a new TIFF file
    with rasterio.open(output_file, 'w', **meta) as dst:
        dst.write(processed_data)

print("Processing complete. The TIFF file has been updated with the specified bands removed and masked values set to NaN.")



Processing complete. The TIFF file has been updated with the specified bands removed and masked values set to NaN.
