In [48]:
import s3fs
from datetime import datetime, timedelta
import pystac
from pystac.extensions.eo import EOExtension
from pystac.extensions.sat import SatExtension
import os
from minio import Minio
import tifffile
from pathlib import Path
import zarr
import numpy as np
import shutil
import re
from dateutil import parser
# import gdal

In [49]:
def upload_to_minio(client, bucket_name, local_path, minio_path):
    """Upload a file or directory to MinIO server"""
    if os.path.isfile(local_path):
        client.fput_object(bucket_name, minio_path, local_path)
    elif os.path.isdir(local_path):
        for root, _, files in os.walk(local_path):
            for file in files:
                local_file_path = os.path.join(root, file)
                minio_file_path = os.path.join(minio_path, os.path.relpath(local_file_path, local_path))
                client.fput_object(bucket_name, minio_file_path, local_file_path)

In [50]:
minio_client = Minio(
    "localhost:9000",  
    access_key="minioadmin",
    secret_key="minioadmin",
    secure=False 
)

In [51]:
# Source directory containing files and .zarr directories
source_path = Path("../Data_Lake/Test/38-Cloud_test/Natural_False_Color/")
bucket_name = "fusion-lake"
metadata_path = Path("./Test/38-Cloud_95-Cloud_Test_Metadata_Files/38-Cloud_95-Cloud_Test_Metadata_Files")

In [52]:
if not minio_client.bucket_exists(bucket_name):
    print(f"Creating bucket: {bucket_name}")
    minio_client.make_bucket(bucket_name)

In [53]:
def reproject_zarr(image,variable_name,local_zarr_store_path,bucket_name):
    
    local_zarr_store_path_str = str(local_zarr_store_path)
    # print(f"Creating local Zarr group at: {local_zarr_store_path_str}")
    root_group = zarr.open_group(local_zarr_store_path_str, mode='w')
    # print(f"Creating array '{variable_name}' within the Zarr group.")

    if image.ndim == 2:
        dimension_names = ['y', 'x']
        zarr_array_chunks = (512, 512)
    elif image.ndim == 3:
        dimension_names = ['y', 'x','band']
        zarr_array_chunks = (512, 512, 1) 
    elif image.ndim == 1:
        dimension_names = ['dim_0']
        zarr_array_chunks = "auto"
    else:
        # Create generic dimension names if unsure
        dimension_names = [f'dim_{i}' for i in range(image.ndim)]
        zarr_array_chunks = "auto" # Or define appropriate chunks

    z_array = root_group.create_dataset(
        name=variable_name,
        data=image,
        chunks=zarr_array_chunks,
        dtype=image.dtype,
        overwrite=True
    )
    
    # --- ADD THE CRUCIAL XARRAY ATTRIBUTE ---
    z_array.attrs['_ARRAY_DIMENSIONS'] = dimension_names
    zarr.consolidate_metadata(local_zarr_store_path_str)
    minio_zarr_path_prefix = f"raw/{variable_name}.zarr"
    print(f"Uploading Zarr store to MinIO at prefix: s3://{bucket_name}/{minio_zarr_path_prefix}")
    upload_to_minio(minio_client, bucket_name, local_zarr_store_path, minio_zarr_path_prefix)
    # print("Upload complete.")
    
    shutil.rmtree(local_zarr_store_path_str)

In [54]:
def parse_mtl(mtl_path):
    metadata = {}
    pattern = re.compile(r'(\w+)\s=\s"?(.*?)"?$')
    with open(mtl_path, 'r') as file:
        for line in file:
            match = pattern.search(line.strip())
            if match:
                key, val = match.groups()
                metadata[key] = val
    return metadata

In [55]:
def make_stac_item(metadata,zarr_path):
    # Bounding box (UL, LR)
    ul_lat = float(metadata["CORNER_UL_LAT_PRODUCT"])
    ul_lon = float(metadata["CORNER_UL_LON_PRODUCT"])
    lr_lat = float(metadata["CORNER_LR_LAT_PRODUCT"])
    lr_lon = float(metadata["CORNER_LR_LON_PRODUCT"])
    bbox = [ul_lon, lr_lat, lr_lon, ul_lat]

    # Geometry as polygon (UL, UR, LR, LL, back to UL)
    geometry = {
        "type": "Polygon",
        "coordinates": [[
            [float(metadata["CORNER_UL_LON_PRODUCT"]), float(metadata["CORNER_UL_LAT_PRODUCT"])],
            [float(metadata["CORNER_UR_LON_PRODUCT"]), float(metadata["CORNER_UR_LAT_PRODUCT"])],
            [float(metadata["CORNER_LR_LON_PRODUCT"]), float(metadata["CORNER_LR_LAT_PRODUCT"])],
            [float(metadata["CORNER_LL_LON_PRODUCT"]), float(metadata["CORNER_LL_LAT_PRODUCT"])],
            [float(metadata["CORNER_UL_LON_PRODUCT"]), float(metadata["CORNER_UL_LAT_PRODUCT"])]
        ]]
    }

    # Acquisition time
    dt_str = metadata["DATE_ACQUIRED"] + "T" + metadata["SCENE_CENTER_TIME"]
    dt = parser.isoparse(dt_str)

    # Create item
    item = pystac.Item(
        id=metadata["LANDSAT_PRODUCT_ID"],
        bbox=bbox,
        geometry=geometry,
        datetime=dt,
        properties={
            "platform": metadata["SPACECRAFT_ID"].lower().replace("_", "-"),
            "instruments": [i.lower() for i in metadata["SENSOR_ID"].split("_")],
            "eo:cloud_cover": float(metadata["CLOUD_COVER"]),
            "sat:cloud_cover": float(metadata["CLOUD_COVER"]),
            "sat:off_nadir": 0.0,
            "sat:orbit_state": "descending",
            "gsd": 30  # Approximate
        }
    )

    # Enable extensions
    EOExtension.add_to(item)
    SatExtension.add_to(item)

    # Add Zarr asset
    zarr_href = zarr_path  # Adjust as needed
    item.add_asset(
        "data_zarr",
        pystac.Asset(
            href=zarr_href,
            media_type="application/vnd+zarr",
            roles=["data"],
            title="Landsat 8 Zarr Dataset",
            extra_fields={
                "xarray:open_kwargs": {"consolidated": True}
            }
        )
    )

    return item

In [56]:
def generate_stac(mtl_filename,variable_name):
    
    minio_zarr_path = f"raw/{variable_name}.zarr"    
    
    full_path = os.path.join(metadata_path, mtl_filename)
    metadata = parse_mtl(full_path)
    
    item = make_stac_item(metadata, minio_zarr_path)
    item.save_object(dest_href=f"{item.id}.json")
    
    minio_stac_path = f"stac/{item.id}.json"
    print(f"Uploading STAC to MinIO: {minio_stac_path}")
    
    upload_to_minio(minio_client, bucket_name, f"{item.id}.json", minio_stac_path)
    
    print("Upload complete.")
    
    if os.path.exists(f'{item.id}.json'):
            os.remove(f'{item.id}.json')

In [57]:
# Upload regular files and convert to zarr
for file_path in source_path.glob("*"):
    if file_path.is_file():
        # Convert to zarr and upload=)
        if file_path.suffix == ".tif" or file_path.suffix == ".TIF":
            
            # gdal.Warp('output.tif', file_path, format='GTiff', dstSRS='EPSG:4978')
            
            image = tifffile.imread(file_path)
            variable_name = file_path.stem 
            local_zarr_store_path = file_path.with_suffix('.zarr')
            
            reproject_zarr(image,variable_name,local_zarr_store_path,bucket_name)
            
            base_name = os.path.splitext(os.path.basename(file_path))[0]
            mtl_filename = f"{base_name}_MTL.txt"
            
            generate_stac(mtl_filename,variable_name)

Uploading Zarr store to MinIO at prefix: s3://fusion-lake/raw/LC08_L1TP_050024_20160520_20170324_01_T1.zarr
Uploading STAC to MinIO: stac/LC08_L1TP_050024_20160520_20170324_01_T1.json
Upload complete.
Uploading Zarr store to MinIO at prefix: s3://fusion-lake/raw/LC08_L1TP_032037_20160420_20170223_01_T1.zarr
Uploading STAC to MinIO: stac/LC08_L1TP_032037_20160420_20170223_01_T1.json
Upload complete.
Uploading Zarr store to MinIO at prefix: s3://fusion-lake/raw/LC08_L1TP_064015_20160420_20170223_01_T1.zarr
Uploading STAC to MinIO: stac/LC08_L1TP_064015_20160420_20170223_01_T1.json
Upload complete.
Uploading Zarr store to MinIO at prefix: s3://fusion-lake/raw/LC08_L1TP_063013_20160920_20170221_01_T1.zarr
Uploading STAC to MinIO: stac/LC08_L1TP_063013_20160920_20170221_01_T1.json
Upload complete.
Uploading Zarr store to MinIO at prefix: s3://fusion-lake/raw/LC08_L1TP_066014_20160520_20170223_01_T1.zarr
Uploading STAC to MinIO: stac/LC08_L1TP_066014_20160520_20170223_01_T1.json
Upload compl