In [57]:
import s3fs
from datetime import datetime, timedelta
import pystac
from pystac.extensions.eo import EOExtension
from pystac.extensions.sat import SatExtension
import os
from minio import Minio
import tifffile
from pathlib import Path
import zarr
import numpy as np
import shutil
import re
from dateutil import parser
from skimage.transform import resize
# import gdal

In [2]:
def upload_to_minio(client, bucket_name, local_path, minio_path):
    """Upload a file or directory to MinIO server"""
    if os.path.isfile(local_path):
        client.fput_object(bucket_name, minio_path, local_path)
    elif os.path.isdir(local_path):
        for root, _, files in os.walk(local_path):
            for file in files:
                local_file_path = os.path.join(root, file)
                minio_file_path = os.path.join(minio_path, os.path.relpath(local_file_path, local_path))
                client.fput_object(bucket_name, minio_file_path, local_file_path)

In [3]:
minio_client = Minio(
    "localhost:9000",  
    access_key="minioadmin",
    secret_key="minioadmin",
    secure=False 
)

In [6]:
# Source directory containing files and .zarr directories
source_path = Path(".//new_data")
bucket_name = "fusion-lake"
# metadata_path = Path(".//new_data/img_1/LC08_L1TP_142049_20250518_20250518_02_RT_MTL.json")

In [56]:
landsat8_bands = [
    {"name": "B1", "common_name": "coastal", "description": "Coastal aerosol", "center_wavelength": 0.44, "full_width_half_max": 0.02},
    {"name": "B2", "common_name": "blue", "description": "Blue", "center_wavelength": 0.48, "full_width_half_max": 0.06},
    {"name": "B3", "common_name": "green", "description": "Green", "center_wavelength": 0.56, "full_width_half_max": 0.06},
    {"name": "B4", "common_name": "red", "description": "Red", "center_wavelength": 0.65, "full_width_half_max": 0.04},
    {"name": "B5", "common_name": "nir", "description": "Near Infrared (NIR)", "center_wavelength": 0.86, "full_width_half_max": 0.03},
    {"name": "B6", "common_name": "swir16", "description": "Short-wave Infrared 1 (SWIR 1)", "center_wavelength": 1.6, "full_width_half_max": 0.08},
    {"name": "B7", "common_name": "swir22", "description": "Short-wave Infrared 2 (SWIR 2)", "center_wavelength": 2.2, "full_width_half_max": 0.2},
    {"name": "B8", "description": "Panchromatic", "center_wavelength": 0.59, "full_width_half_max": 0.18},
    {"name": "B9", "description": "Cirrus", "center_wavelength": 1.37, "full_width_half_max": 0.02},
    {"name": "B10", "common_name": "lwir11", "description": "Thermal Infrared (TIRS) 1", "center_wavelength": 10.9, "full_width_half_max": 0.8},
    {"name": "B11", "common_name": "lwir12", "description": "Thermal Infrared (TIRS) 2", "center_wavelength": 12.0, "full_width_half_max": 1.0},
]

In [5]:
if not minio_client.bucket_exists(bucket_name):
    print(f"Creating bucket: {bucket_name}")
    minio_client.make_bucket(bucket_name)

In [29]:
def reproject_zarr(image,variable_name,local_zarr_store_path,bucket_name):
    
    local_zarr_store_path_str = str(local_zarr_store_path)
    # print(f"Creating local Zarr group at: {local_zarr_store_path_str}")
    root_group = zarr.open_group(local_zarr_store_path_str, mode='w')
    # print(f"Creating array '{variable_name}' within the Zarr group.")

    if image.ndim == 2:
        dimension_names = ['y', 'x']
        zarr_array_chunks = (512, 512)
    elif image.ndim == 3:
        dimension_names = ['band','y', 'x']
        zarr_array_chunks = (1,512, 512) 
    elif image.ndim == 1:
        dimension_names = ['dim_0']
        zarr_array_chunks = "auto"
    else:
        # Create generic dimension names if unsure
        dimension_names = [f'dim_{i}' for i in range(image.ndim)]
        zarr_array_chunks = "auto" # Or define appropriate chunks

    z_array = root_group.create_dataset(
        name=variable_name,
        data=image,
        chunks=zarr_array_chunks,
        dtype=image.dtype,
        overwrite=True
    )
    
    # --- ADD THE CRUCIAL XARRAY ATTRIBUTE ---
    z_array.attrs['_ARRAY_DIMENSIONS'] = dimension_names
    zarr.consolidate_metadata(local_zarr_store_path_str)
    minio_zarr_path_prefix = f"raw/{variable_name}.zarr"
    print(f"Uploading Zarr store to MinIO at prefix: s3://{bucket_name}/{minio_zarr_path_prefix}")
    upload_to_minio(minio_client, bucket_name, local_zarr_store_path, minio_zarr_path_prefix)
    # print("Upload complete.")
    
    shutil.rmtree(local_zarr_store_path_str)

In [34]:
def parse_mtl(mtl_path):
    metadata = {}
    pattern = re.compile(r'(\w+)\s=\s"?(.*?)"?$')
    with open(mtl_path, 'r') as file:
        for line in file:
            match = pattern.search(line.strip())
            if match:
                key, val = match.groups()
                metadata[key] = val
    return metadata

In [58]:
def make_stac_item(metadata,zarr_path):
    # Bounding box (UL, LR)
    ul_lat = float(metadata["CORNER_UL_LAT_PRODUCT"])
    ul_lon = float(metadata["CORNER_UL_LON_PRODUCT"])
    lr_lat = float(metadata["CORNER_LR_LAT_PRODUCT"])
    lr_lon = float(metadata["CORNER_LR_LON_PRODUCT"])
    bbox = [ul_lon, lr_lat, lr_lon, ul_lat]

    # Geometry as polygon (UL, UR, LR, LL, back to UL)
    geometry = {
        "type": "Polygon",
        "coordinates": [[
            [float(metadata["CORNER_UL_LON_PRODUCT"]), float(metadata["CORNER_UL_LAT_PRODUCT"])],
            [float(metadata["CORNER_UR_LON_PRODUCT"]), float(metadata["CORNER_UR_LAT_PRODUCT"])],
            [float(metadata["CORNER_LR_LON_PRODUCT"]), float(metadata["CORNER_LR_LAT_PRODUCT"])],
            [float(metadata["CORNER_LL_LON_PRODUCT"]), float(metadata["CORNER_LL_LAT_PRODUCT"])],
            [float(metadata["CORNER_UL_LON_PRODUCT"]), float(metadata["CORNER_UL_LAT_PRODUCT"])]
        ]]
    }

    # Acquisition time
    dt_str = metadata["DATE_ACQUIRED"] + "T" + metadata["SCENE_CENTER_TIME"]
    dt = parser.isoparse(dt_str)

    # Create item
    item = pystac.Item(
        id=metadata["LANDSAT_PRODUCT_ID"],
        bbox=bbox,
        geometry=geometry,
        datetime=dt,
        properties={
            "platform": metadata["SPACECRAFT_ID"].lower().replace("_", "-"),
            "instruments": [i.lower() for i in metadata["SENSOR_ID"].split("_")],
            "eo:cloud_cover": float(metadata["CLOUD_COVER"]),
            "sat:cloud_cover": float(metadata["CLOUD_COVER"]),
            "sat:off_nadir": 0.0,
            "sat:orbit_state": "descending",
            "gsd": 30  # Approximate
        }
    )

    # Enable extensions
    EOExtension.add_to(item)
    SatExtension.add_to(item)

    # Add Zarr asset
    zarr_href = zarr_path  # Adjust as needed
    item.add_asset(
        "data_zarr",
        pystac.Asset(
            href=zarr_href,
            media_type="application/vnd+zarr",
            roles=["data"],
            title="Landsat 8 Zarr Dataset",
            extra_fields={
                "xarray:open_kwargs": {"consolidated": True},
                "eo:bands": landsat8_bands
            }
        )
    )

    return item

In [59]:
def generate_stac(img_folder,mtl_filename,variable_name):
    
    minio_zarr_path = f"raw/{variable_name}.zarr"    
    
    full_path = os.path.join(img_folder, mtl_filename)
    metadata = parse_mtl(full_path)
    
    item = make_stac_item(metadata, minio_zarr_path)
    item.save_object(dest_href=f"{item.id}.json")
    
    minio_stac_path = f"stac/{item.id}.json"
    print(f"Uploading STAC to MinIO: {minio_stac_path}")
    
    upload_to_minio(minio_client, bucket_name, f"{item.id}.json", minio_stac_path)
    
    print("Upload complete.")
    
    if os.path.exists(f'{item.id}.json'):
            os.remove(f'{item.id}.json')

In [62]:
# Upload regular files and convert to zarr
for img_folder in source_path.iterdir():
    if img_folder.is_dir() and img_folder.name.startswith("img_"):
        tif_files = list(img_folder.glob("*.tif")) + list(img_folder.glob("*.TIF"))
        
        if not tif_files:
            continue
        
        first_file = tif_files[0]
        img_id = "_".join(first_file.stem.split("_")[:-1])
        # print(img_id)
        
        mtl_filename = f"{img_id}_MTL.txt"
        mtl_path = img_folder / mtl_filename
        # print(mtl_path.exists())
        
        band_files = {}
        for tif_file in tif_files:
            match = re.search(r'_B(\d{1,2})$', tif_file.stem)
            if match:
                band_num = int(match.group(1))
                if 1 <= band_num <= 11:
                    band_files[band_num] = tif_file
    
        bands = []
        for band_num in sorted(band_files.keys()):
            band = tifffile.imread(band_files[band_num])
            bands.append(band)


        # Resize Panchromatic band as its of lesser resolution
        ref_shape = bands[0].shape
        bands[7] = resize(bands[7], ref_shape, order=1, preserve_range=True, anti_aliasing=True).astype(bands[7].dtype) 
        
        stacked_image = np.stack(bands, axis=0)
        
        # print(stacked_image.shape)
        
        # gdal.Warp('output.tif', file_path, format='GTiff', dstSRS='EPSG:4978')
        
        variable_name = img_id
        local_zarr_store_path = img_folder / f"{img_id}.zarr"
        
        reproject_zarr(stacked_image,variable_name,local_zarr_store_path,bucket_name)
        
        generate_stac(img_folder,mtl_filename,variable_name)
        

Uploading Zarr store to MinIO at prefix: s3://fusion-lake/raw/LC08_L1TP_142049_20250518_20250518_02_RT.zarr
Uploading STAC to MinIO: stac/LC08_L1TP_142049_20250518_20250518_02_RT.json
Upload complete.
Uploading Zarr store to MinIO at prefix: s3://fusion-lake/raw/LC09_L1TP_141047_20250519_20250519_02_T1.zarr
Uploading STAC to MinIO: stac/LC09_L1TP_141047_20250519_20250519_02_T1.json
Upload complete.


### Note:

Resizing the panchromatic band (Band 8) significantly increases processing time. Consider excluding it from the stacked image to improve efficiency.