# [0] Setup

In [1]:
from pathlib import Path
import zipfile
import os

import xarray as xr
import rioxarray
import zarr

from redplanet.DatasetManager.hash import (
    get_available_algorithms,
    _calculate_hash_from_file,
)

### Inputs

Dataset can be downloaded here: https://astrogeology.usgs.gov/search/map/mars_mgs_mola_mex_hrsc_blended_dem_global_200m

See README for more info.

In [2]:
fpath_dem_tif = Path('/home/lain/root/100_work/110_projects/111_mars/raw_data/Mars_HRSC_MOLA_BlendDEM_Global_200mp_v2.tif')

assert _calculate_hash_from_file(fpath_dem_tif, 'xxh3_64') == 'dafb191af5826c66'

---
---
# [1] Load/format given TIF data file to `xarray.DataArray`

In [3]:
dat_dem_xr = (
    rioxarray.open_rasterio(
        filename = fpath_dem_tif,
        chunks   = {'x': 'auto', 'y': 'auto'},
    )
    .sel(band=1)
    .drop_vars(['band'])
    .rename({'x': 'lon', 'y': 'lat'})
    .sortby('lat', ascending=True)
    .chunk({'lon': 'auto', 'lat': 'auto'})
    .rename('Mars_HRSC_MOLA_BlendDEM_Global_200mp_v2')
)

dat_dem_xr.attrs = {
    'units'   : 'meters',
    'metadata': {
        'source_data': 'https://astrogeology.usgs.gov/search/map/mars_mgs_mola_mex_hrsc_blended_dem_global_200m',
        'more_info'  : 'https://github.com/Humboldt-Penguin/redplanet',
    },
}

---
### Inspecting dataset properties

This this point, `dat_dem_xr` looks like this:

![](https://files.catbox.moe/39ymhp.png)

And the CRS is:

In [4]:
known_crs = {
    'GeoTransform'               : '-180.0 0.003374120830641 0.0 90.0 0.0 -0.003374120830641',
    'crs_wkt'                    : 'GEOGCS["GCS_Mars_2000_Sphere",DATUM["Mars_2000_(Sphere)",SPHEROID["Mars_2000_Sphere_IAU_IAG",3396190,0]],PRIMEM["Reference_Meridian",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]]',
    'geographic_crs_name'        : 'GCS_Mars_2000_Sphere',
    'grid_mapping_name'          : 'latitude_longitude',
    'horizontal_datum_name'      : 'Mars_2000_(Sphere)',
    'inverse_flattening'         : 0.0,
    'longitude_of_prime_meridian': 0.0,
    'prime_meridian_name'        : 'Reference_Meridian',
    'reference_ellipsoid_name'   : 'Mars_2000_Sphere_IAU_IAG',
    'semi_major_axis'            : 3396190.0,
    'semi_minor_axis'            : 3396190.0,
    'spatial_ref'                : 'GEOGCS["GCS_Mars_2000_Sphere",DATUM["Mars_2000_(Sphere)",SPHEROID["Mars_2000_Sphere_IAU_IAG",3396190,0]],PRIMEM["Reference_Meridian",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]]'
}

assert dat_dem_xr.spatial_ref.attrs == known_crs

---
---
# [2] Save to `zarr.ZipStore`

Note: Instead of directly saving dataset as a `zarr.ZipStore`, we:

- Save as `zarr.DirectoryStore`,
- Set all files to have a fixed/consistent timestamp,
- Zip it.

This is functionally equivalent to saving a `zarr.ZipStore`, but standardizing the modtimes ensures the final file always has a consistent hash.

In [5]:
dirpath_out = Path.cwd() / 'output'
dirpath_out.mkdir(exist_ok=True)


## Save as `zarr.DirectoryStore`
fname_dem = 'Mars_HRSC_MOLA_BlendDEM_Global_200mp_v2'
dirpath_dem_zarr = dirpath_out / f'{fname_dem}.zarr'
with zarr.DirectoryStore(dirpath_dem_zarr) as dirstore:
    dat_dem_xr.to_zarr(store=dirstore)


## Normalize modtimes and save as `zarr.ZipStore` (deleting as we go)
fpath_dem_zarrzip = dirpath_out / f'{fname_dem}.zarr.zip'

with zipfile.ZipFile(fpath_dem_zarrzip, "w", compression=zipfile.ZIP_STORED, strict_timestamps=False) as zipf:
    for dirpath_parent, dirnames, fnames in dirpath_dem_zarr.walk(top_down=False):

        for fname in fnames:
            fpath = dirpath_parent / fname
            os.utime(fpath, times=(0,0))
            zipf.write(fpath, arcname = fpath.relative_to(dirpath_dem_zarr))
            fpath.unlink()

        for dirname in dirnames:
            (dirpath_parent / dirname).rmdir()

dirpath_dem_zarr.rmdir()

---
### Final hashes

Mars_HRSC_MOLA_BlendDEM_Global_200mp_v2.zarr.zip
- xxh3_64: 591d09f97c971546
- md5: 63b8913ffedc12438bda2e9817470dee
- sha256: 7bf515611a2fd8562f4daba0fd7bedb3c57b55a8d4f4975c9ca6351de4c2b16f

In [6]:
# print(f'{fpath_dem_zarrzip.name}')
# for alg in get_available_algorithms():
#     print(f'- {alg}: {_calculate_hash_from_file(fpath_dem_zarrzip, alg)}')

In [9]:
assert _calculate_hash_from_file(fpath_dem_zarrzip, 'xxh3_64') == '591d09f97c971546'