### Code to aggregate NASA LIS data to GRACE mascons

### Load the packages

In [3]:
# Jupter notebook magic
%matplotlib inline

# Import libraries to work with the data
import os
import sys
import matplotlib 
import matplotlib.pyplot as plt
import numpy as np
import scipy
from scipy import signal
import scipy.optimize
from scipy import stats
import geopandas as gpd
import pandas as pd
import s3fs
import salem
from PyAstronomy import pyasl
import xarray as xr
from datetime import datetime, timedelta
import boto3
import botocore
import dask.array as da
#from dask import compute
from dask.diagnostics import ProgressBar
    
matplotlib.style.use('ggplot')

from himatpy.GRACE_MASCON.pygrace import (extract_grace, get_mascon_gdf, trend_analysis, get_cmwe_trend_analysis, select_mascons, aggregate_mascons)

import warnings
warnings.filterwarnings('ignore')

  return f(*args, **kwds)
  return f(*args, **kwds)


### Open the LIS data

In [3]:
os.getcwd()

'/mnt/c/Users/HCD/UW_work/git/HiMAT/Projects/WBM'

In [6]:
datadir = '/mnt/c/Users/HCD/UW_work/git/HiMAT/files/LIS/new/'

In [7]:
dgws = xr.open_dataset(datadir+'LISMonthly.nc')

In [8]:
dgws_ok=dgws*0.1

### Extract the lat, lon from data variables

In [15]:
latok=dgws_ok.lat.north_south
lonok=dgws_ok.lon.east_west

In [21]:
LIS = dgws_ok.assign_coords(lon=lonok)

In [22]:
LIS_f=LIS.assign_coords(lat=latok)

In [23]:
LIS_f = LIS_f.rename({'north_south':'lat','east_west':'lon'})

### Download the GRACE data from S3 bucket

In [None]:
#import urllib.request
#urllib.request.urlretrieve ('https://himat-data.s3.us-west-2.amazonaws.com/GRACE/GSFC.glb.200301_201607_v02.4-ICE6G.h5', '../../files/GRACE/GSFC.glb.200301_201607_v02.4-ICE6G.h5')

In [None]:
#os.chdir('/mnt/c/Users/HCD/UW_work/git/HiMAT/files/GRACE')

In [13]:
#BUCKET_NAME = 'himat-data' 
#KEY = 'GRACE/GSFC.glb.200301_201607_v02.4-ICE6G.h5' 
fileName = '../../files/GRACE/GSFC.glb.200301_201607_v02.4-ICE6G.h5'
#s3 = boto3.resource('s3')

#try:
    #s3.meta.client.download_file(BUCKET_NAME, KEY, fileName)
#except botocore.exceptions.ClientError as e:
    #if e.response['Error']['Code'] == "404":
        #print("The object does not exist.")
    #else:
        #raise
        
grace_file = os.path.abspath(fileName)
f = extract_grace(grace_file)

Data extracted: 
---
Group: mascon
---
<HDF5 dataset "area_deg": shape (1, 41168), type "<f8">
<HDF5 dataset "area_km2": shape (1, 41168), type "<f8">
<HDF5 dataset "basin": shape (1, 41168), type "<f8">
<HDF5 dataset "elev_flag": shape (1, 41168), type "<f8">
<HDF5 dataset "lat_center": shape (1, 41168), type "<f8">
<HDF5 dataset "lat_span": shape (1, 41168), type "<f8">
<HDF5 dataset "location": shape (1, 41168), type "<f8">
<HDF5 dataset "lon_center": shape (1, 41168), type "<f8">
<HDF5 dataset "lon_span": shape (1, 41168), type "<f8">
---
Group: size
---
<HDF5 dataset "N_arcs": shape (1, 1), type "<f8">
<HDF5 dataset "N_mascon_times": shape (1, 1), type "<f8">
<HDF5 dataset "N_mascons": shape (1, 1), type "<f8">
---
Group: solution
---
<HDF5 dataset "cmwe": shape (41168, 148), type "<f8">
---
Group: time
---
<HDF5 dataset "list_ref_days_solution": shape (1, 4128), type "<f8">
<HDF5 dataset "n_ref_days_solution": shape (1, 148), type "<f8">
<HDF5 dataset "n_ref_days_window": shape (

In [14]:
mascon = f['mascon']
soln = f['solution']
time = f['time']

mascon_gdf = get_mascon_gdf(mascon)
## we need to track mascon numbers later, which we get from the index, so make a new "mascon" column
mascon_gdf['mascon'] = mascon_gdf.index

There are 41168 Mascons in this dataset.


### Load the shapefile boundary 

In [24]:
path='/mnt/c/Users/HCD/UW_work/git/HiMAT/files/WBM/Watershed_boundaries/'

In [25]:
shdf = salem.read_shapefile(path+'dugwells_districts.shp')
LIS_fr= LIS_f.salem.roi(shape=shdf)

#### Tidy up the variable names to comply with our function

In [26]:
LIS_fr = LIS_fr.rename({'lon':'long'})

### Clip the GRACE mascons to the domain of GWA

In [27]:
masked_gdf4 = select_mascons(LIS_fr, mascon_gdf)

### The main processing step that calculates a mean value across each mascon

In [28]:
with ProgressBar():
    agg_data4 = aggregate_mascons(LIS_fr,masked_gdf4,scale_factor=1)

ValueError: all the input array dimensions except for the concatenation axis must match exactly

### Convert WBM array to Pandas dataframe for plotting

In [None]:
A3 = agg_data4['data']

names = ['products', 'mascons', 'time']
index = pd.MultiIndex.from_product([agg_data4[n] for n in names], names=names)
df4 = pd.DataFrame({'A3': A3.flatten()}, index=index)['A3']
df4 = df4.unstack(level='products').swaplevel().sort_index()
df4.columns = agg_data4['products']
df4.index.names = ['date','mascon']

### Drop the NA values in the new UGWA mascons dataframe

In [None]:
dfok4=df4.dropna(how='any')

### Convert GRACE time series to Pandas dataframe

In [37]:
t = pd.DataFrame(data = {
    'time': f['time']["ref_days_middle"][0,:]
})

grc_lst = []
for m in agg_data['mascons']:
    df = pd.DataFrame(data={
        'cmwe': soln['cmwe'][m][:],
        'mascon': m
    })
    df.index = t['time'].apply(lambda x: datetime(2001,12,31) + timedelta(days=x))
    grc_lst.append(df)
    
gracedf = pd.concat(grc_lst)

In [None]:
print(gracedf)