In [1]:
import glob
import re
import os
import regionmask
import datetime
# to fix MJD
import astropy
from astropy.time import Time

# data management
import numpy as np
import pandas as pd
import xarray as xr

# plotting
import matplotlib.pyplot as plt
# os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import rasterio
from rasterio.plot import show as rioshow
from shapely.geometry import Polygon
from geospatial_functions import get_background_map

# imported from provided py code: long functions
from calc_geoid_change_alt import readstrokescoefficients, plm


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gp


In [2]:
# loading other layers
outline           = gpd.read_file(f"Data\\lena_basin_outline_polygon.gpkg",driver="GPKG")
main_rivers       = gpd.read_file(f"Data\\lena_main_river.gpkg",driver="GPKG")   
coast             = gpd.read_file(f"Data\\north_east_russian_coastlines.gpkg",driver="GPKG")   
# fixing crs
for layer in [outline,main_rivers,coast]:
    layer.geometry = layer.geometry.to_crs("EPSG:4326")

background:

Grace can measure $\frac{\Delta S}{\Delta t}$, when looking at the water balance: $\frac{\Delta S}{\Delta t} = P - L$ where $L$ are the losses.

Losses are due to Evapotranspiration ($ET$), Discharge ($Q$) and Underground flow (ground water - $G$). 

Evaporation can be esimated but is difficult, discharge can be considered known: 

$\frac{\Delta S}{\Delta t} = P - Q - ET - GW$

# first focus on grace data
Downloaded data can be loaded in, note using stokes coefficients till 60th

In [3]:
grace_files = glob.glob('Data\\Grace\*.gfc')
love_numbers_kl = np.loadtxt('Data\\loadLoveNumbers_60.txt')[:,1]
l = 60
m = 60

Stokes coeffcients order 1& 2 need to be handeled differently

order 1: 

In [4]:
fname_stoke_coeff1 = f'Data\\Grace\\degree1_stokes_coeff.txt'
df_stokes_1 = pd.read_csv(fname_stoke_coeff1,skiprows=116,delimiter=" ", 
        names=['GRCOF2',"_1","_2","_3","l","_4","_5","m", "Clm","Slm","sd_Clm","sd_Slm","begin_date","end_date"])
### drop unwanted
df_stokes_1.drop(columns=["_1","_2","_3","_4","_5","GRCOF2","sd_Clm","sd_Slm"],inplace=True)
### Reformat dates
df_stokes_1["end_date"] = df_stokes_1.apply(lambda x: pd.Timestamp(f'{str(x.end_date)[0:4]}-{str(x.end_date)[4:6]}-{str(x.end_date)[6:8]}'),axis=1)
### dirty fix to make more fit
df_stokes_1["begin_date"] = df_stokes_1.apply(lambda x: pd.Timestamp(f'{str(x.begin_date)[0:4]}-{str(x.begin_date)[4:6]}-01'),axis=1)

In [5]:
df_1_0 = df_stokes_1[df_stokes_1['m'] == 0].set_index('begin_date')
df_1_1 = df_stokes_1[df_stokes_1['m'] == 1].set_index('begin_date')

In [6]:
# easiest way to fix the indexing in matchting the correct indexes
df_index_replace = df_1_1.index.to_numpy().copy()
for i, index in enumerate(df_1_1.index):
    if i == 0:
        pass
    if (df_1_1.index[i-1] - index) == pd.Timedelta(0):
        replace = pd.Timestamp(f'{index.year}-{index.month+1}-{index.day}')
        df_index_replace[i] = replace
df_1_1.index = df_index_replace
df_1_0.index = df_index_replace

order 2,0 & 3,0:

In [7]:
def MJD_to_ts(mjd):
    ## thanks to https://stackoverflow.com/questions/72597699/how-do-i-convert-mjdjulian-date-to-utc-using-astropy
    # Start with some time in modified julian date (MJD)
    # Convert to Julian Date
    mjd = float(mjd)
    jd = mjd + 2400000.5
    # Convert to astropy Time object
    t = astropy.time.Time(jd, format='jd')
    # Convert to datetime
    str = t.to_datetime()
    return str

In [8]:
fname_stoke_coeff2 = f'Data\\Grace\\degree2_stokes_coeff.txt'
col_names = ['MJD begin',"Year fraction begin","C20","C20 - C20_mean (1.0E-10)","sig_C20 (1.0E-10)", 
             "C30","C30 - C30_mean (1.0E-10)","sig_C30 (1.0E-10)",'MJD end',"Year fraction end"]
df_stokes_2_3 = pd.read_csv(fname_stoke_coeff2,skiprows=37,delimiter="\s+",names=col_names)
# fix date format
df_stokes_2_3["begin_date"] = df_stokes_2_3.apply(lambda x: MJD_to_ts(x['MJD begin']), axis=1)
df_stokes_2_3["end_date"] = df_stokes_2_3.apply(lambda x: MJD_to_ts(x['MJD begin']), axis=1)
df_stokes_2_3 = df_stokes_2_3[["begin_date","C20","C30","end_date"]].set_index("begin_date")

# allign indexes and replace like in C_1_1..
df_stokes_2_3 = df_stokes_2_3.iloc[:-2] # remove last two months to make same length
df_stokes_2_3.index = df_index_replace

In [9]:
# Names and dates needed, obtained from file name
grace_names = [file[-11:-4] for file in grace_files]
times = [pd.Timestamp(grace_names_i) for grace_names_i in grace_names]

In [10]:
# plt.figure(figsize=(12,5))
# plt.plot(times,marker=".",lw=0)

In [11]:
# load in all coefficient
C, S = [], []
for i, file in enumerate(grace_files):
    C_i, S_i, R, GM = readstrokescoefficients(file)
    
    # replace C_1_0,C_1_1, S_1_1, C_2_0, C_3_0
    try:
        test  = df_1_0.loc[times[i],"Clm"]
        new_time = times[i]
    except KeyError: # issue with finding correct value, this is easiest fix
        new_time = pd.Timestamp(f'{times[i].year}-{times[i].month-1}-{times[i].day}')
        print(new_time)
    
    C_i[1,0]  = df_1_0.loc[new_time,"Clm"]
    C_i[1,1]  = df_1_1.loc[new_time,"Clm"]
    S_i[1,1]  = df_1_1.loc[new_time,"Slm"]
    C_i[2,0]  = df_stokes_2_3.loc[new_time,"C20"]
    C_30  = df_stokes_2_3.loc[new_time,"C30"]
    if np.isnan(C_30): 
        pass
    else: 
        C_i[3,0] = C_30
        
    C.append(C_i), S.append(S_i)

2015-06-01 00:00:00


In [12]:
# calculate means of coefficients
C = np.array(C)
S = np.array(S)
C_mean = C.sum(axis=0)/len(C)
S_mean = S.sum(axis=0)/len(S)

In [13]:
# remove mean coefficients
dc1_store = []
ds1_store = []
for i, c in enumerate(C):
    dc1 = C[i] - C_mean
    ds1 = S[i] - S_mean
    dc1_store.append(dc1)
    ds1_store.append(ds1)

using 
\begin{equation}
\delta h_w \left(\theta,\lambda\right) = \sum_{l,m=0}^{\infty}=\bar{C_{lm}^{\delta h_w}} \bar{Y}_{lm}\left(\theta,\lambda\right)
\end{equation}


In [14]:
rho_av__rho_w = 5.5 # aprox

In [15]:
dh_c1_store = []
dh_s1_store = []

for k, dc1 in enumerate(dc1_store):
    C_dhw_i_1 = np.zeros((l+1, l+1))
    S_dhw_i_1 = np.zeros((l+1, l+1))
    for i in range(l+1):
        for j in range(i+1):
            multiplication_factor = (R  * (2 * i + 1) * rho_av__rho_w) / ( 3 * (1 + love_numbers_kl[i]))
            C_dhw_i_1[i, j] = (dc1_store[k][i, j] * multiplication_factor)
            S_dhw_i_1[i, j] = (ds1_store[k][i, j] * multiplication_factor)
    dh_c1_store.append(C_dhw_i_1)
    dh_s1_store.append(S_dhw_i_1)

Create array of lat lon for the area: 

![Figures\Lena_Basin_map.png](Figures\Lena_Basin_map.png)

In [16]:
_lambda = np.pi / 180 * np.arange(270, 330, 1) - np.pi  # 90 - 150 # deg lon
theta = np.pi - np.pi / 180 * np.arange(180 - 40, 180 - 10, 1)  # 80 - 50 deg lat

print('_lambda=',_lambda/np.pi*180)
print('\n')
print('theta=',theta/np.pi*180)

_lambda= [ 90.  91.  92.  93.  94.  95.  96.  97.  98.  99. 100. 101. 102. 103.
 104. 105. 106. 107. 108. 109. 110. 111. 112. 113. 114. 115. 116. 117.
 118. 119. 120. 121. 122. 123. 124. 125. 126. 127. 128. 129. 130. 131.
 132. 133. 134. 135. 136. 137. 138. 139. 140. 141. 142. 143. 144. 145.
 146. 147. 148. 149.]


theta= [40. 39. 38. 37. 36. 35. 34. 33. 32. 31. 30. 29. 28. 27. 26. 25. 24. 23.
 22. 21. 20. 19. 18. 17. 16. 15. 14. 13. 12. 11.]


This is how the loop looks, to speed it up it is run in parrallel in `multicore_raw.py`, thus here we only actually load in the netCDF files

In [19]:
times = [pd.Timestamp(grace_names_i) for grace_names_i in grace_names]
run = True
debug = True
if debug:
    n=2                   
    loop = dh_c1_store[:n]
    times = times[:(n)]      # debugging
    if run: fname = "Data\\anomally_waterhead_raw_test.nc"
    else: fname = "Data\\anomally_waterhead_raw_test_multicore.nc"
else:
    loop = dh_c1_store
    if run: fname = "Data\\anomally_waterhead_raw.nc" 
    else: fname = "Data\\anomally_waterhead_raw_multicore.nc"
    
if run:
    # this is a much slower method, faster is the multicore.py which uses multi proecessing 
    store_ewh_1 = []
    print(datetime.datetime.now())
    for z, dh_c1 in enumerate(loop): # debugging
        print(f'{z=}',end='\n')
        ewh_i_1 = np.zeros((len(theta), len(_lambda)))
        ewh_i_2 = np.zeros((len(theta), len(_lambda)))
        
        for i in range(len(theta)):                              # loop over all thetas
            print(f'{i=} (out of {len(theta)})',end='\r')
            P_lm = plm(theta[i], l)                              # all Legendre Functions for one theta
            for j in range(len(_lambda)):                        # loop over all lambdas
                for k in range(l+1):                             # loop over all degrees
                    for t in range(k+1):                         # loop over negative orders
                        sin_t_lambda = np.sin(t*_lambda[j])      # negative orders
                        cos_t_lambda = np.cos(t*_lambda[j])      # non-negative orders
                        # compute here equivalent water heights
                        ewh_i_1[i, j] = ewh_i_1[i, j] + (dh_s1_store[z][k, t] * P_lm[k, t] * sin_t_lambda)
                        ewh_i_1[i, j] = ewh_i_1[i, j] + (dh_c1_store[z][k, t] * P_lm[k, t] * cos_t_lambda)
        print('\r')
        store_ewh_1.append(ewh_i_1)


    ds = xr.DataArray(store_ewh_1, dims=("time","lat","lon"),coords={"lon":_lambda/np.pi*180,
                                                                 "lat": 90 - theta/np.pi*180,
                                                                 "time":times}, name="dh(m)")
    
    ds.to_netcdf(fname)
    print(datetime.datetime.now())
else:
    ds = xr.open_dataset(fname)

2023-10-27 11:46:25.223587
z=0
i=29 (out of 30)
z=1
i=29 (out of 30)
2023-10-27 11:46:47.528662


20 = 5min
160 = 50min

From here on continue in filterd - could refactor but effort