In [30]:
import os
import rasterio
from glob import glob
import glob
import re
from pathlib import Path
from scipy.ndimage import zoom


In [31]:
# Function to extract the numeric value from filenames
def numeric_sort_key(filepath):
    # Extract numbers from the filename using a regular expression
    match = re.search(r'\d+', filepath)
    # Return the integer value of the number if found, otherwise 0
    return int(match.group()) if match else 0
    
def preprocess_dataset(transaction_ID, image_dir, output_dir, scale_factor=2):
    """
    Preprocess the input images by applying bicubic interpolation and saving the results using rasterio.
    
    Args:
        image_dir (str): Directory containing input images.
        output_dir (str): Base directory to store the preprocessed data.
        scale_factor (int): Factor by which to upscale the images.
    """
    
    # Get all the image and mask paths and sort them numerically
    image_paths = sorted(glob.glob(os.path.join(image_dir, transaction_ID, "*tif")), key=numeric_sort_key)
    
    # Create output directories for images and masks
    images_output_dir = Path(os.path.join(output_dir, transaction_ID))
    images_output_dir.mkdir(parents=True, exist_ok=True)
    
    # Process images
    print(f"Processing {len(image_paths)} images...")
    for img_path in image_paths:
        with rasterio.open(img_path) as src:
            # Read and upscale each band
            upscaled_data = []
            for band in range(1, src.count + 1):  # Loop through bands
                band_data = src.read(band)
                upscaled_band = zoom(band_data, scale_factor, order=3)  # Cubic interpolation
                upscaled_data.append(upscaled_band)
            
            # Write to a new file
            meta = src.meta.copy()
            meta.update({
                "height": int(src.height * scale_factor),
                "width": int(src.width * scale_factor),
                "transform": src.transform * rasterio.Affine.scale(1 / scale_factor),
            })
            output_path = images_output_dir / os.path.basename(img_path)
            with rasterio.open(output_path, "w", **meta) as dst:
                for i, band_data in enumerate(upscaled_data, start=1):
                    dst.write(band_data, i)
    
    print(f"Preprocessing complete. Data saved to: {output_dir}")


In [34]:
preprocess_dataset(
    transaction_ID = '200010',
    image_dir = os.path.join(os.path.expanduser("~"), "satellite-ml-solarp-detection","acquisition"), 
    output_dir = os.path.join(os.path.expanduser("~"), "satellite-ml-solarp-detection","image_enhancement"),
    scale_factor=2
)

Processing 16 images...
Preprocessing complete. Data saved to: /home/sagemaker-user/satellite-ml-solarp-detection/image_enhancement
