Written by Zain Kamal [zain.eris.kamal@rutgers.edu](mailto:zain.eris.kamal@rutgers.edu) in 2024 March.

https://github.com/Humboldt-Penguin/redplanet

---
---
# [0] Main 

In [1]:
"""
Written by Zain Kamal (zain.eris.kamal@rutgers.edu).
https://github.com/Humboldt-Penguin/redplanet

For more information, call `help(Crust)` or directly view docstring in `Crust/__init__.py`.

...

TODO:
    [ ] Type hinting
    [ ] Finish writing/fleshing out docstrings. 
    [ ] Add proper units + attributes + citations to xarray dataset. 
    [ ] Write a `plot` function? Need to rewrite the corresponding `visualize` function in `redplanet.utils` first (rename to `plot` there as well). 
    [ ] When using a coarsening factor for `load_topo`, also rechunk dask (or just remove this and leave the feature for people who want more control with get rawdata?)

"""



from redplanet import utils

from pathlib import Path
# import json

import pooch
import numpy as np
import xarray as xr
import rioxarray
import dask
import pyshtools as pysh
import pandas as pd



''' ————————————————————————————— Type Hinting ————————————————————————————— '''

from typing import Iterable, Union, Annotated, Literal, TypeVar, Any

NativeArray_1D_Numeric = Iterable[float]    # 1D lists, tuples, arrays

NumpyNumberType = TypeVar("NumpyNumberType", bound=np.number)
NumpyArray_1D_Numeric = Annotated[np.typing.NDArray[NumpyNumberType], Literal["N"]]

Array_1D_Numeric = Union[NativeArray_1D_Numeric, NumpyArray_1D_Numeric]







''' ——————————— Global Variables (Intentionally User-Accessible) ——————————— '''

## path where pooch downloads/caches data.
dirpath_data_root = pooch.os_cache('redplanet')













''' ######################################################################## '''
'''                             Load/Format Data                             '''
''' ######################################################################## '''





''' ——————————————————————————— Dichotomy Coords ——————————————————————————— '''


dat_dichotomy_coords : np.ndarray = ...  # Nx2 numpy array of dichotomy coordinates, structured (lon, lat).


def _init_dichotomy() -> None:

    global dat_dichotomy_coords

    ## lazy initialization
    if dat_dichotomy_coords is not ...:
        return
    else:
        pass  # needed to avoid pylance annoyances

    ## download / cache dichotomy coords
    with utils.disable_pooch_logger():
        fpath_dichotomy_coords = pooch.retrieve(
            fname      = 'dichotomy_coordinates-JAH-0-360.txt',
            url        = r'https://drive.google.com/file/d/17exPNRMKXGwa3daTEBN02llfdya6OZJY/view?usp=sharing',
            known_hash = 'sha256:42f2b9f32c9e9100ef4a9977171a54654c3bf25602555945405a93ca45ac6bb2',
            path       = dirpath_data_root / 'Crust' / 'dichotomy',
            downloader = utils.download_gdrive_file,
        )
    fpath_dichotomy_coords = Path(fpath_dichotomy_coords)

    ## load into Nx2 numpy array of dichotomy coordinates, structured (lon, lat).'''
    dat_dichotomy_coords = np.loadtxt(fpath_dichotomy_coords)

    ## fix the lons (convert from `0->360` to `0->180 U -180->0` and sort)
    dat_dichotomy_coords[:,0] = utils.plon2slon(dat_dichotomy_coords[:,0])
    dat_dichotomy_coords = dat_dichotomy_coords[np.argsort(dat_dichotomy_coords[:,0])]

    ## add wraparound coordinates for safety / convenience
    dat_dichotomy_coords = np.vstack((
        dat_dichotomy_coords, 
        [dat_dichotomy_coords[0,0]+360, dat_dichotomy_coords[0,1]], 
        [dat_dichotomy_coords[1,0]+360, dat_dichotomy_coords[1,1]], 
    ))

    return














''' ——————————————————— Topography (2 options, DEM or SH) —————————————————— '''


dat_dem_xr : xr.DataArray


def load_topo(
        model_type : str = 'DEM', 
        **kwargs   : Any, 
) -> None:
    
    match model_type.lower():

        case 'dem':
            _load_topo_dem(**kwargs)
        case 'sh':
            _load_topo_sh(**kwargs)
        case _:
            raise ValueError(f'Given argument `{model_type = }` is not valid. Please choose either "DEM" or "SH" (case insensitive).')









def _load_topo_dem(
    coarsen_factor   : int      = 1, 
    fpath_custom_DEM : str|Path = None, 
    chunk_size       : str      = '128MB', 
    quiet_download   : bool     = False, 
) -> None:
    """
    for `chunk_size` see https://docs.dask.org/en/latest/array-best-practices.html#select-a-good-chunk-size
    
    REFERENCES:
    ------------
        Mars MGS MOLA DEM 463m v2:
            > https://astrogeology.usgs.gov/search/map/Mars/GlobalSurveyor/MOLA/Mars_MGS_MOLA_DEM_mosaic_global_463m

    """

    global dat_dem_xr



    # ————————————————————————————————————————————————————————————————————
    '''import DEM (digital elevation model) to xarray'''

    ## download TIF (if no custom path provided)
    if quiet_download:
        downloader = utils.download_gdrive_file
    else:
        downloader = utils.download_gdrive_file_SHOWPROGRESS

    if fpath_custom_DEM is None:
        with utils.disable_pooch_logger():
            fpath_custom_DEM = pooch.retrieve(
                fname      = 'Mars_MGS_MOLA_DEM_mosaic_global_463m.tif',
                url        = r'https://drive.google.com/file/d/1ACMocVNzs7pFwxulLOp2vqQ24LjLOVuU/view?usp=sharing',
                known_hash = 'sha256:38a4eb0b4452855b8dabfac40a367b458555ab4c01b31235807ad0a53c031f4c',
                path       = dirpath_data_root / 'Crust' / 'topo',
                downloader = downloader,
            )
    

    ## load TIF     *(chunked with dask)*
    fpath_custom_DEM = Path(fpath_custom_DEM)

    if chunk_size != '128MB':
        dask.config.set({'array.chunk-size': chunk_size})

    dat_dem_xr = (
        rioxarray.open_rasterio(fpath_custom_DEM, chunks={'x': 'auto', 'y': 'auto'})
        .sel(band=1)
        .drop_vars(['band', 'spatial_ref'])
    )


    # ————————————————————————————————————————————————————————————————————
    '''post-processing'''
    
    ## transform coords
    R = 3396190.0   # mars radius per IAU 2000 definition
    standard_parallel = 0 
    scale = np.cos(np.radians(standard_parallel))
    dat_dem_xr['x'] = ((dat_dem_xr.x / (R * scale)) * (180 / np.pi)).data
    dat_dem_xr['y'] = ((dat_dem_xr.y / R) * (180 / np.pi)).data
    dat_dem_xr = dat_dem_xr.rename({'x': 'lon', 'y': 'lat'})
    dat_dem_xr = dat_dem_xr.sortby('lat')

    # ## convert elevation to topo
    # dat_dem_xr = (dat_dem_xr + R) * 1e-3
    ## ^ AVOID THIS, it significantly increases array size (2GB -> ~8GB).

    ## metadata
    dat_dem_xr.attrs = {
        'units': 'meters'
    }


    # ————————————————————————————————————————————————————————————————————
    '''optional: downsample for speed/storage'''
    if coarsen_factor > 1:
        dat_dem_xr = dat_dem_xr.coarsen(
            lon=coarsen_factor, 
            lat=coarsen_factor, 
            boundary='trim'
        ).mean()


    return













def _load_topo_sh(
    grid_spacing   : float = 0.1,
    quiet_download : bool  = False,
) -> None:
    """
    for chunk_size see https://docs.dask.org/en/latest/array-best-practices.html#select-a-good-chunk-size
    

    
    REFERENCES:
    ------------
        Mars MGS MOLA DEM 463m v2:
            > https://astrogeology.usgs.gov/search/map/Mars/GlobalSurveyor/MOLA/Mars_MGS_MOLA_DEM_mosaic_global_463m

    """
    global dat_dem_xr


    

    ''' ———————————————————————————————————————————————————————————————————— '''
    '''arg handling'''
    

















''' ——————————————————————— Mohorovičić Discontinuity —————————————————————— '''



dat_moho_xr : xr.DataArray


def load_moho(
    RIM                  : str, 
    insight_thickness    : int, 
    rho_north            : int, 
    rho_south            : int, 
    suppress_model_error : bool = False,
) -> bool:
    """
    NOTE: All data and the following spreadsheet is taken *directly* from the following work with no alterations:
        - Wieczorek, M. A. (2022). InSight Crustal Thickness Archive [Data set]. Zenodo. https://doi.org/10.5281/zenodo.6477509
    
    Summary of all available models (reuploaded to Google Sheets with for convenience — no alterations were made):
        - https://docs.google.com/spreadsheets/d/1ZDILcSPdbXAFp60VfyC4xTZzdnAVhx_U/edit?usp=sharing&ouid=107564547097010500390&rtpof=true&sd=true

    You can also view all available models with `Crust.get_moho_registry()`.
    """

    global dat_moho_xr

    _init_moho_shcoeffs_registry()

    model_name = f'{RIM}-{insight_thickness}-{rho_south}-{rho_north}'


    # ————————————————————————————————————————————————————————————————————
    '''download/fetch file containing SH coefficients for the chosen model'''

    try:
        model_metadata = _df_moho_shcoeffs_registry.loc[model_name].to_dict()
    except KeyError:
        if suppress_model_error:
            return False
        else:
            raise ValueError(f'No Moho model with the inputs {model_name} exists.')

    with utils.disable_pooch_logger():
        fpath_moho_shcoeffs = pooch.retrieve(
            fname      = f'{model_name}.txt', 
            url        = model_metadata['link'], 
            known_hash = model_metadata['hash'], 
            path       = dirpath_data_root / 'Crust' / 'moho' / 'SH_coeffs', 
            downloader = utils.download_gdrive_file, 
        )
    fpath_moho_shcoeffs = Path(fpath_moho_shcoeffs)


    # ————————————————————————————————————————————————————————————————————
    '''load to xarray'''

    ## convert shcoeffs -> shgrid -> xarray
    dat_moho_xr = pysh.SHCoeffs.from_file(
        fname = fpath_moho_shcoeffs, 
        name  = model_name, 
    ).expand(
        grid   = 'DH2', 
        extend = True, 
    ).to_xarray()
    
    ## post-processing
    # dat_moho_xr = dat_moho_xr * 1e-3    # convert m -> km
    dat_moho_xr = utils.fix_pyshtools_coords(dat_moho_xr)
    dat_moho_xr.attrs.update({
        'units'                       : 'meters',
        'moho_model_name'             : model_name,
        'moho_model_RIM'              : RIM,
        'moho_model_insight_thickness': insight_thickness,
        'moho_model_rho_north'        : rho_north,
        'moho_model_rho_south'        : rho_south,
    })

    return True











_df_moho_shcoeffs_registry : pd.DataFrame = ...   # columns: 'link', 'hash', 'model_name', 'RIM', 'insight_thickness', 'rho_south', 'rho_north'. 


def _init_moho_shcoeffs_registry() -> None:

    global _df_moho_shcoeffs_registry

    ## lazy initialization
    if _df_moho_shcoeffs_registry is not ...:
        return
    else:
        pass   # needed to avoid pylance annoyances


    ## download a pre-computed registry of moho models, which provides a google drive download link and a sha256 hash for a given model name
    with utils.disable_pooch_logger():
        fpath_moho_shcoeffs_registry = pooch.retrieve(
            fname      = 'mohoSHcoeffs_rawdata_registry.json',
            url        = r'https://drive.google.com/file/d/17JJuTFKkHh651-rt2J2eFKnxiki0w4ue/view?usp=sharing',
            known_hash = 'sha256:1800ee2883dc6bcc82bd34eb2eebced5b59fbe6c593cbc4e9122271fd01c1491',
            path       = dirpath_data_root / 'Crust' / 'moho', 
            downloader = utils.download_gdrive_file,
        )
    fpath_moho_shcoeffs_registry = Path(fpath_moho_shcoeffs_registry)


    ## load to pandas dataframe and split 'model_name' into components
    _df_moho_shcoeffs_registry = (
        pd.read_json(
            fpath_moho_shcoeffs_registry, 
            orient='index', 
        ).reset_index(
        ).rename(
            columns={'index': 'model_name'}, 
        )
    )

    _df_moho_shcoeffs_registry[['RIM', 'insight_thickness', 'rho_south', 'rho_north']] = (
        _df_moho_shcoeffs_registry['model_name'].str.split('-', expand=True)
    )

    _df_moho_shcoeffs_registry['insight_thickness'] = _df_moho_shcoeffs_registry['insight_thickness'].astype(int)
    _df_moho_shcoeffs_registry['rho_south']         = _df_moho_shcoeffs_registry['rho_south']        .astype(int)
    _df_moho_shcoeffs_registry['rho_north']         = _df_moho_shcoeffs_registry['rho_north']        .astype(int)

    _df_moho_shcoeffs_registry.set_index('model_name', inplace=True)

    return    


















''' ######################################################################## '''
'''                                 Accessors                                '''
''' ######################################################################## '''




''' ——————————————————————————————— Dichotomy —————————————————————————————— '''

def is_above_dichotomy(lon: float, lat: float) -> None:
    ## Our approach is to find two closest points by longitude and linearly interpolate to find latitude (additional latitude checks and cross product are both slower)

    _init_dichotomy()   # consider duplicating function so this can be better vectorized?

    i_lon = np.searchsorted(
        dat_dichotomy_coords[:,0], 
        lon, 
        side='right'
    ) - 1
    
    llon, llat = dat_dichotomy_coords[i_lon]
    rlon, rlat = dat_dichotomy_coords[i_lon+1]

    tlat = llat + (rlat-llat)*( (lon-llon)/(rlon-llon) )
    return lat >= tlat

    # v1 = (rlon-llon, rlat-llat)
    # v2 = (rlon-lon, rlat-lat)
    # xp = v1[0]*v2[1] - v1[1]*v2[0]  # cross product magnitude
    # return xp >= 0










''' ————————————————————————————————— Moho ————————————————————————————————— '''


def get_moho_registry() -> pd.DataFrame: 
    """
    TODO: docstring
    """
    _init_moho_shcoeffs_registry()
    return _df_moho_shcoeffs_registry.copy()





def get_model_info() -> dict:
    attrs = dat_moho_xr.attrs
    return {
        'name'              : attrs.get('moho_model_name'), 
        'RIM'               : attrs.get('moho_model_RIM'), 
        'insight_thickness' : attrs.get('moho_model_insight_thickness'), 
        'rho_north'         : attrs.get('moho_model_rho_north'), 
        'rho_south'         : attrs.get('moho_model_rho_south'), 
    }




---
---
# [1] Generate Zarrz

---
## [1.2] Zarr — Topo5759

In [2]:
with utils.disable_pooch_logger():
    fpath_Mars_shape_5759 = pooch.retrieve(
        fname      = 'Mars_shape_5759.sh',
        url        = r'https://drive.google.com/file/d/1jW3xkcjIONY3SKCU-NCzvYqvOVppj3zx/view?usp=drive_link',
        known_hash = 'sha256:b5f129b3e26669ca0f977a1d486fdd3de3796c555c90c5dc5a6c51a9c81e9bc0',
        path       = dirpath_data_root / 'Crust' / 'topo',
        downloader = utils.download_gdrive_file_SHOWPROGRESS,
        # processor  = pooch.Decompress(),
    )


fpath_Mars_shape_5759

'C:\\Users\\Eris\\AppData\\Local\\redplanet\\redplanet\\Cache\\Crust\\topo\\Mars_shape_5759.sh'

In [4]:
lmax_maxval = 5759
grid_spacing_minval = 180. / (2 * lmax_maxval + 2)
print(f'{grid_spacing_minval = }')

grid_spacing_minval = 0.015625


In [9]:
''' ———————————————————————— [1] choose grid spacing ——————————————————————— '''

# grid_spacing_desired = 0.05
# lmax_desired = round(90. / grid_spacing_desired - 1)


''' ———————————————————————————— [2] choose lmax ——————————————————————————— '''

lmax_desired = lmax_maxval
grid_spacing_desired = 180. / (2 * lmax_desired + 2)



''' ———————————————————————————————————————————————————————————————————————— '''
''' ———————————————————————————————— convert ——————————————————————————————— '''

## convert shcoeffs -> shgrid -> xarray, and fix coords
dat_sh_xr = (
    pysh.SHCoeffs.from_file(
        fname  = fpath_Mars_shape_5759, 
        format = 'bshc', 
        lmax   = lmax_desired, 
        name   = 'Mars_shape_5759', 
        units  = 'm', 
    ).expand(
        grid   = 'DH2', 
        extend = True, 
    ).to_xarray()
    .chunk(
        {'lon': 'auto', 'lat': 'auto'}
    )
)

dat_sh_xr = utils.fix_pyshtools_coords(dat_sh_xr)


dat_sh_xr

Unnamed: 0,Array,Chunk
Bytes,1.98 GiB,128.00 MiB
Shape,"(11521, 23041)","(4096, 4096)"
Dask graph,24 chunks in 8 graph layers,24 chunks in 8 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.98 GiB 128.00 MiB Shape (11521, 23041) (4096, 4096) Dask graph 24 chunks in 8 graph layers Data type float64 numpy.ndarray",23041  11521,

Unnamed: 0,Array,Chunk
Bytes,1.98 GiB,128.00 MiB
Shape,"(11521, 23041)","(4096, 4096)"
Dask graph,24 chunks in 8 graph layers,24 chunks in 8 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [None]:
import zarr

fpath_sh5759_xr = Path(fpath_Mars_shape_5759).with_suffix('.zarr.zip')

with zarr.ZipStore(fpath_sh5759_xr, mode='w') as zipstore:
    dat_sh_xr.to_zarr(store=zipstore)





In [14]:
dat_sh_xr.plot()

<matplotlib.collections.QuadMesh at 0x1eb0491f7d0>

Error in callback <function _draw_all_if_interactive at 0x000001EB041E8180> (for post_execute), with arguments args (),kwargs {}:


KeyboardInterrupt: 

In [12]:
Path(fpath_Mars_shape_5759).with_suffix('.zarr.zip')

WindowsPath('C:/Users/Eris/AppData/Local/redplanet/redplanet/Cache/Crust/topo/Mars_shape_5759.zarr.zip')

In [None]:
dat_topo_xr

In [None]:
max_sh = dat_topo_xr.max().item()
min_sh = dat_topo_xr.min().item()

print(f'{max_sh = :.2f}')
print(f'{min_sh = :.2f}')
print(f'range = {max_sh - min_sh :.2f}')

In [None]:
dat_topo_xr.sel(
    lon    = np.arange(-180,180,1), 
    lat    = np.arange(-90,90,1), 
    method = 'nearest', 
).plot(figsize=(10,5))

In [None]:
dat_topo_xr.sel(
    lon    = slice(170,179.9), 
    lat    = slice(-20,-10), 
).plot()

---
---
# `get`

In [None]:
load_topo()
dat_dem_xr

In [None]:
R = 3_396_190
dat_dem_xr + R

In [None]:
load_moho(
    RIM               = 'Khan2022',
    insight_thickness = 39,
    rho_north         = 2900,
    rho_south         = 2900,
)

dat_moho_xr

In [None]:
def get(
    quantity    : str, 
    lon         : float | Array_1D_Numeric, 
    lat         : float | Array_1D_Numeric, 
    interpolate : bool = False, 
    as_xarray   : bool = False, 
) -> float | Array_1D_Numeric | xr.DataArray:

    # ————————————————————————————————————————————————————————————————————
    '''checks'''

    if (np.any(lon < -180) or np.any(360 < lon)):
        raise ValueError(f'Longitude values must be in range [-180, 360].')
    if (np.any(lat < -90) or np.any(90 < lat)):
        raise ValueError(f'Latitude values must be in range [-90, 90].')
    lon = utils.plon2slon(lon) # this only modifies values btwn 180 and 360

    ## annoying floating point precision issues with numpy arange/linspace
    lon = np.round(lon, 10)
    lat = np.round(lat, 10)



    # ————————————————————————————————————————————————————————————————————
    '''accessing'''
    if interpolate:
        method = 'linear'
    else:
        method = 'nearest'

    match quantity.lower():

        case 'topo':
            result = dat_dem_xr.interp(lon=lon, lat=lat, assume_sorted=True, method=method)

        case 'moho':
            _init_moho_shcoeffs_registry()
            result = dat_moho_xr.interp(lon=lon, lat=lat, assume_sorted=True, method=method)
        
        case 'crust' | 'crustal thickness' | 'crthick':
            topo  = dat_dem_xr .interp(lon=lon, lat=lat, assume_sorted=True, method=method)
            moho  = dat_moho_xr.interp(lon=lon, lat=lat, assume_sorted=True, method=method)
            result = (topo-moho)
        
        case 'rho' | 'density' | 'crustal density':
            vec_is_above_dichotomy = np.vectorize(is_above_dichotomy)
            result_mask = vec_is_above_dichotomy(
                np.meshgrid(lon, lat)[0], 
                np.meshgrid(lon, lat)[1],
            )
            result = np.where(
                result_mask, 
                get_model_info()['rho_north'], 
                get_model_info()['rho_south'],
            )
        
        case _:
            raise Exception('Invalid quantity. Options are ["topo", "moho", "crust"/"crustal thickness"/"crthick", "rho"/"density"/"crustal density"].')

    return result






---
---
---
# scrap

In [None]:
'''
NOTE: for "power-users" who want to work with the xarrays directly, instead of forcing them to use `get_rawdata` (inconvenient, verbose), just let them access the global variables directly (also allows them to customize the data as they want and still use my built-in functions). 
'''
# def get_rawdata(how=None):
#     """
#     `format` options: ['xarray', 'dict', 'dichotomy']

#     Note: when viewing/exploring dictionaries, it may help to call:
#         ```
#         from redplanet import utils
#         utils.print_dict(dat_something_dict)     # insert any dictionary here
#         ```
#     """
#     if how is None:
#         raise ValueError('Options are ["xarray", "dict", "dichotomy"].')

#     _initialize()
#     match how:
#         case 'xarray':
#             return dat_moho_xr
#         case 'dict':
#             return dat_crust_dict
#         case 'dichotomy':
#             return dat_dichotomy_coords
#         case _:
#             raise ValueError('Options are ["xarray", "dict", "dichotomy"].')






# def _update_dict_to_match_xrds():
#     global dat_crust_dict
#     dat_crust_dict = {
#         'lats': dat_moho_xr.lat.values,
#         'lons': dat_moho_xr.lon.values,
#         'attrs': dat_moho_xr.attrs, 
#     }
#     for data_var in list(dat_moho_xr.data_vars):
#         dat_crust_dict[data_var] = dat_moho_xr[data_var].values


