## FWI METRICS Data Store Organization

### Top level
```text
/<data version>
    /<netcdf or cog>
```

### File level
**Exploded to COG (single metric per file)`/v0/cog`** 
```text
/fwi
    /<ensemble or model member>
        /<ensemble stat>
            /<cadence>
            /fwi-METRICS-mme50-yearly-245-items.ndjson
            /fwi-METRICS-mme50-yearly-245-collection.ndjson
                /<ssp|historical>
                    /<yyyy-mm | yyyy>
                        /<ensemble stat>_<ssp>_fwi_METRICS_<cadence>_<yyyy-mm>_<fwi metric>.tif
###
# One yearly STAC item will be generated per year with an asset for each of 13 COGs/METRICS
# Sample/Src
src_url = s3://cmip6-staging/Sample/FWI/Yearly/MME/MME50_ssp245_fwi_METRICS_yearly_2100.n
# Dst
dst_urls = s3://veda-nex-gddp-cmip6-public/v0/cog/fwi/mme/mme50/yearly/ssp245/2100/mme50_ssp245_fwi_METRICS_yearly_2100_ffmc.tif
           s3://veda-nex-gddp-cmip6-public/v0/cog/fwi/mme/mme50/yearly/ssp245/2100/mme50_ssp245_fwi_METRICS_yearly_2100_fwi_p25.tif
           s3://veda-nex-gddp-cmip6-public/v0/cog/fwi/mme/mme50/yearly/ssp245/2100/mme50_ssp245_fwi_METRICS_yearly_2100_fwi_dc.tif
###
```

## Publish all yearly netcdfs to public data store

## Iterate over matching objects and explode to single metric COGs

- There are 151 total yearly NetCDFs
  - 65 historical experiment
  - 86 projected ssp245 experiment
  
- Each NetCDF contains 13 variables--one for each FWI Metric 

In [1]:
!pip install s3fs h5netcdf --quiet

In [2]:
import cmip6_file_organization

In [3]:
import boto3
import s3fs
import rioxarray
import rasterio
import rio_cogeo.cogeo
import xarray as xr
from rasterio.io import MemoryFile

fs = s3fs.S3FileSystem()

# # if running locally, uncomment to set user role for session
# boto3.setup_default_session(profile_name="deltawest")
client = boto3.client("s3")

In [4]:
STAGING_BUCKET = "cmip6-staging"
DST_BUCKET = "veda-nex-gddp-cmip6-public"

In [5]:
EXPERIMENTS = ["historical", "ssp245"]
METRICS = ["FFMC", "FWI_P25", "FWI_P50", "FWI_P75", "FWI_P95", "DMC", "DC", "ISI", "BUI", "FWI", "FWI_N15", "FWI_N30", "FWI_N45"]

In [6]:
# COG configuration, deflate compression is slightly smaller than the LZW default for the cog driver 
# and predictor=2 further reduces file size
# https://kokoalberti.com/articles/geotiff-compression-optimization-guide/ 
# "Predictors work especially well when there is some spatial correlation in the data, and pixels have values which are similar to their neighbours. As the name suggests, the floating point predictor can only be used on floating point data."

driver = "COG"
compress = "DEFLATE"
predictor = 2

In [None]:
error_responses = []
invalid_cogs = []
verbose = True
dryrun = True  # convert to COG in-memory but do not upload
extra_dryrun = False  # do not even convert in-memory


for experiment in EXPERIMENTS:
    prefix = f"Sample/FWI/Yearly/MME/MME50_{experiment}_"
    r = client.list_objects_v2(
        Bucket = STAGING_BUCKET,
        Prefix = prefix,
    )
    if verbose:
        print(f"\n{r['KeyCount']} objects for {prefix=}")
                
    # These objects are all of the 13 variable in single yearly netcdf    
    objects = r["Contents"]
    for obj in objects:
        src_key = obj["Key"]

        if verbose:
            print(f"{src_key=}")
            
        # Get object and read open as xarray dataset
        with fs.open(f"{STAGING_BUCKET}/{src_key}") as fileobj:
            with xr.open_dataset(fileobj, engine="h5netcdf") as ds:

                # Now loop through the individual variables and generate a COG for each of these    
                for metric in METRICS:

                    # Generate output path
                    cog_key = cmip6_file_organization.generate_yearly_fwi_metrics_key(src_key, "v0", pub_type="cog", metric=metric)
                    
                    if verbose or extra_dryrun:
                        print(f"{cog_key=}")
                    if extra_dryrun:
                        continue

                    # Read individual metric into data array (only one time in yearly NetCDFs)
                    da = ds[metric]

                    # Realign the x dimension to -180 origin for dataset
                    da = da.assign_coords(lon=(((da.lon + 180) % 360) - 180)).sortby("lon")
                    
                    # Reverse the DataArray's y dimension. It appears that the source NetCDF's y dimension is inverted. 
                    da = da.reindex(lat=list(reversed(da.lat)))

                    # Convert to COG via rioxarray
                    da.rio.set_spatial_dims("lon", "lat")
                    da.rio.write_crs("epsg:4326", inplace=True)
                    
                    # Here we need a memory file to write the output raster to
                    with MemoryFile() as memfile:
                        da.rio.to_raster(memfile.name, driver="COG", compress=compress, predictor=predictor)

                        # Validate and upload COG
                        if rio_cogeo.cogeo.cog_validate(memfile.name)[0]:

                            if dryrun:
                                print(f"Generation of valid COG {cog_key=} successful. Skipping upload.")
                                continue
                            
                            if verbose:
                                print(f"START to upload {cog_key=} to {DST_BUCKET=}")

                            # Upload memory file and confirm success
                            if not dry_run:
                                r = client.put_object(
                                    Body=memfile,
                                    Bucket=DST_BUCKET,
                                    Key=cog_key,
                                )
                                if r["ResponseMetadata"]["HTTPStatusCode"] != 200:
                                    error_responses.append(r)
                                    if verbose:
                                        print(f"WARNING unable to upload {cog_key=}")
                        else:
                            invalid_cogs.append(cog_key)
                        
print(f"\nCOMPLETED with {len(error_responses)} errors and {len(invalid_cogs)} invalid cogs")
print(error_responses)
print(invalid_cogs)


65 objects for prefix='Sample/FWI/Yearly/MME/MME50_historical_'
src_key='Sample/FWI/Yearly/MME/MME50_historical_fwi_metrics_yearly_1950.nc'
cog_key='v0/cog/fwi/mme/mme50/yearly/historical/1950/mme50_historical_fwi_metrics_yearly_1950_ffmc.tif'
Generation of valid COG cog_key='v0/cog/fwi/mme/mme50/yearly/historical/1950/mme50_historical_fwi_metrics_yearly_1950_ffmc.tif' successful. Skipping upload.
cog_key='v0/cog/fwi/mme/mme50/yearly/historical/1950/mme50_historical_fwi_metrics_yearly_1950_fwi_p25.tif'
Generation of valid COG cog_key='v0/cog/fwi/mme/mme50/yearly/historical/1950/mme50_historical_fwi_metrics_yearly_1950_fwi_p25.tif' successful. Skipping upload.
cog_key='v0/cog/fwi/mme/mme50/yearly/historical/1950/mme50_historical_fwi_metrics_yearly_1950_fwi_p50.tif'
Generation of valid COG cog_key='v0/cog/fwi/mme/mme50/yearly/historical/1950/mme50_historical_fwi_metrics_yearly_1950_fwi_p50.tif' successful. Skipping upload.
cog_key='v0/cog/fwi/mme/mme50/yearly/historical/1950/mme50_histo