# Working with GRACE to build a mask and calculate water balance

by Anthony Arendt and Landung Setiawan

Datasets:
- GRACE: https://ssed.gsfc.nasa.gov/grace/products.html
- LIS: https://github.com/NASA-Planetary-Science/HiMAT/tree/master/Projects/LIS

This is a continuation to the LIS Dataset processing workflow. [<<](../LIS/)

This code is designed to access files directly on the NASA ADAPT system

In [1]:
# Import the necessary libraries
%matplotlib inline

import os
import pickle

from dask import compute
from distributed import Client
from himatpy.GRACE_MASCON.pygrace import (extract_grace, get_mascon_gdf, get_cmwe_trend_analysis, build_mask)
from himatpy.LIS import utils
from himatpy.tools import animation_tools
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import s3fs
import xarray as xr
import zarr

matplotlib.style.use('ggplot')

  return f(*args, **kwds)


## Get GRACE Data

In [2]:
# Read in mascon file
grace_file = os.path.abspath('GSFC.h5')

try:
    f = extract_grace(grace_file)
except Error as e:
    print(e)

Data extracted: 
---
Group: mascon
---
<HDF5 dataset "area_deg": shape (1, 41168), type "<f8">
<HDF5 dataset "area_km2": shape (1, 41168), type "<f8">
<HDF5 dataset "basin": shape (1, 41168), type "<f8">
<HDF5 dataset "elev_flag": shape (1, 41168), type "<f8">
<HDF5 dataset "lat_center": shape (1, 41168), type "<f8">
<HDF5 dataset "lat_span": shape (1, 41168), type "<f8">
<HDF5 dataset "location": shape (1, 41168), type "<f8">
<HDF5 dataset "lon_center": shape (1, 41168), type "<f8">
<HDF5 dataset "lon_span": shape (1, 41168), type "<f8">
---
Group: size
---
<HDF5 dataset "N_arcs": shape (1, 1), type "<f8">
<HDF5 dataset "N_mascon_times": shape (1, 1), type "<f8">
<HDF5 dataset "N_mascons": shape (1, 1), type "<f8">
---
Group: solution
---
<HDF5 dataset "cmwe": shape (41168, 148), type "<f8">
---
Group: time
---
<HDF5 dataset "list_ref_days_solution": shape (1, 4128), type "<f8">
<HDF5 dataset "n_ref_days_solution": shape (1, 148), type "<f8">
<HDF5 dataset "n_ref_days_window": shape (

In [3]:
mascon = f['mascon']
soln = f['solution']
time = f['time']

In [4]:
# Get mascon gdf
mascon_gdf = get_mascon_gdf(mascon)

# we need to track mascon numbers later, which we get from the index, so make a new "mascon" column
mascon_gdf['mascon'] = mascon_gdf.index

There are 41168 Mascons in this dataset.


In [5]:
# Filter only mascon within HMA geographical areas
HMA = mascon_gdf[(mascon_gdf['location'] == 80.0) & ((mascon_gdf['basin'] > 5000.0) & (mascon_gdf['basin'] < 6100.0))]

## Retrieve NASA Land Information System (LIS) Dataset

In [6]:
# Open Zarr Store
fs = s3fs.S3FileSystem()
store = s3fs.S3Map(root='himat-data/store', s3=fs, check=False)
ds = xr.open_zarr(store)

In [7]:
# Connect to Dask Scheduler
client = Client('dask-scheduler:8786')

In [8]:
# Get list of data variables
products = [x for x in ds.data_vars]
# Get bounding box
x_min, x_max, y_min, y_max = ds.long[0].values, ds.long[-1].values, ds.lat[0].values, ds.lat[-1].values
# Select mascons inside bounding box
masked_gdf = mascon_gdf.cx[x_min:x_max,y_min:y_max].copy()
# Get mascbon bounds
mascon_geos = [x.bounds for x in masked_gdf['geometry']]
mascon_coords = masked_gdf['mascon']
time_coords = ds['time'].data
# Get timeseries
def get_timeseries(mascon_geo, product):
    sel = ds[product].sel(long=slice(mascon_geo[0], mascon_geo[2]), lat=slice(mascon_geo[1], mascon_geo[3]))
    agg_data = sel.mean(axis=(1,2)).data
    return agg_data

agg_list = compute(*[[get_timeseries(mascon_geo, product) for mascon_geo in mascon_geos] for product in products])
agg_dict = {product:data for product, data in zip(products, agg_list)}

In [20]:
agg_flat = np.concatenate([np.concatenate(x) for x in agg_list])
agg_arr = agg_flat.reshape(len(products), len(mascon_coords), len(time_coords))
time = ds['time'].data


# Add coordinate data
agg_data = {
    'data':  agg_arr,
    'time': np.asarray(time_coords),
    'mascons' : np.asarray(mascon_coords),
    'products': np.asarray(products)
}

# Save to pickle file
with open('agg_data.pkl', 'wb') as file:
    pickle.dump(agg_data, file)

### This workflow continues on another notebook [>>](GRACE_LIS_fullyProcessed_comparison.ipynb)