## Import all necessary libraries

### Sentinel-2 Image Preprocessing for Image Matching Tasks

 This notebook demonstrates how to preprocess Sentinel-2 .SAFE data for image matching tasks. Sentinel-2 data is stored in a .SAFE format,   which includes multiple spectral bands as .jp2 files. We'll preprocess these images to resize them to a consistent size, normalize their values, and save them in a usable format for downstream tasks.



In [7]:
!pip install rasterio
!pip install scikit-image


Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-image
  Downloading scikit_image-0.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.8/14.8 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting lazy-loader>=0.4
  Downloading lazy_loader-0.4-py3-none-any.whl (12 kB)
Collecting imageio!=2.35.0,>=2.33
  Downloading imageio-2.36.1-py3-none-any.whl (315 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.4/315.4 KB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tifffile>=2022.8.12
  Downloading tifffile-2024.12.12-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.5/227.5 KB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packa

In [8]:
import os
import cv2
import glob
import logging
import numpy as np
import rasterio # labraary for reading and writing raster data
from rasterio.enums import Resampling # enum for resampling methods
from skimage.transform import resize # library for resizing images


#### Define Input Paths and Parameters
- Define path to raw Sentinel-2 .SAFE data 
- output directory for raster images
- target dimensions for resizing

In [10]:
input_data_path = 'path to raw Sentinel-2 .SAFE data'
output_data_path = 'Output directory'
target_size = (224, 224)

Some helper functions to ensure the output directory exists and to find all .SAFE folders in the input directory

In [11]:
def ensure_output_directory(path):
    if not os.path.exists(path):
        logging.warning(f'Output directory {path} does not exist. Creating it...')
        os.makedirs(path, exist_ok=True)

def find_safe_folders(input_path):
    return [os.path.join(input_path, f) for f in os.listdir(input_path) if os.path.isdir(os.path.join(input_path, f))]

#### Process .jp2 Files
Core function to process .jp2 files and save them as .jpeg images

In [12]:
def process_jp2_files(path_to_image_data, output_folder, target_size):
    jp2_files = glob.glob(os.path.join(path_to_image_data, '*.jp2'))
    if not jp2_files:
        logging.warning(f'No .jp2 files found in {path_to_image_data}. Skipping...')
        return
    
    for jp2_file in jp2_files:
        try:
            processed_image = preprocess_jp2_image(jp2_file, target_size)
            image_path = os.path.join(output_folder, os.path.basename(jp2_file).replace('.jp2', '.jpeg'))
            cv2.imwrite(image_path, processed_image)
        except Exception as e:
            logging.exception(f'Error processing {jp2_file}: {str(e)}')


#### Preprocess Individual .jp2 Image

- resample 
- normalizes 
- resizes 
- converts images to .jpeg

In [13]:
def preprocess_jp2_image(jp2_file, target_size):
    with rasterio.open(jp2_file) as src:
        image = src.read(1, resampling=Resampling.bilinear)

        if image is None or image.size == 0:
            logging.exception(f'Image cannot be read: {jp2_file}')
            return None

        # Normalize and resize
        image = image.astype(np.float32)
        resized_image = resize(image, target_size, anti_aliasing=True)
        max_value = np.max(resized_image)
        normalized_image = resized_image / max_value if max_value != 0 else resized_image
        return (normalized_image * 255).astype(np.uint8)


#### Preprocessing Workflow

Process all .SAFE folders, find .jp2 images, and preprocess them.

In [14]:
def preprocess_sentinel_data(input_data_path, output_data_path, target_size):
    ensure_output_directory(output_data_path)

    folders = find_safe_folders(input_data_path)
    if not folders:
        logging.error(f'No folders found in {input_data_path}')
        return

    for folder in folders:
        safe_folders = glob.glob(os.path.join(folder, '*.SAFE'))
        if not safe_folders:
            logging.warning(f'No .SAFE folders found in {folder}. Skipping...')
            continue

        for safe_folder in safe_folders:
            granule_path = os.path.join(safe_folder, 'GRANULE')
            if not os.path.exists(granule_path):
                logging.warning(f'No GRANULE folder found in {safe_folder}. Skipping...')
                continue

            granule_folders = glob.glob(os.path.join(granule_path, '*'))
            for granule_folder in granule_folders:
                path_to_image_data = os.path.join(granule_folder, 'IMG_DATA')
                if not os.path.exists(path_to_image_data):
                    logging.warning(f'No IMG_DATA folder found in {granule_folder}. Skipping...')
                    continue

                output_folder = os.path.join(output_data_path, os.path.basename(granule_folder))
                ensure_output_directory(output_folder)
                process_jp2_files(path_to_image_data, output_folder, target_size)


In [None]:
preprocess_sentinel_data(input_data_path, output_data_path, target_size)