-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathlib_mjo.py
311 lines (263 loc) · 9.96 KB
/
lib_mjo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
"""
Code written by Jiwoo Lee, LLNL. Feb. 2019
Inspired by Daehyun Kim and Min-Seop Ahn's MJO metrics.
Code update history
2024-05 converted to use xcdat as base building block (Jiwoo Lee)
Reference:
Ahn, MS., Kim, D., Sperber, K.R. et al. Clim Dyn (2017) 49: 4023.
https://doi.org/10.1007/s00382-017-3558-4
"""
from typing import Union
import numpy as np
import xarray as xr
import xcdat as xc
from scipy import signal
from pcmdi_metrics.io import base, get_time_key, select_subset
from pcmdi_metrics.utils import regrid
# from pcmdi_metrics.utils import create_target_grid
def interp2commonGrid(ds, data_var, dlat, dlon=None, debug=False):
if dlon is None:
dlon = dlat
# grid = create_target_grid(target_grid_resolution=f"{dlat}x{dlon}")
nlat = int(180 / dlat)
grid = xc.create_gaussian_grid(nlat)
# If the longitude values include 0 and 360, then remove 360 to avoid having repeating grid
if 0 in grid.lon.values and 360 in grid.lon.values:
min_lon = grid.lon.values[0] # 0
# max_lon = grid.lon.values[-1] # 360
second_max_lon = grid.lon.values[-2] # 360-dlat
grid = grid.sel(lon=slice(min_lon, second_max_lon))
# Reverse latitude if needed
if grid.lat.values[0] > grid.lat.values[-1]:
grid = grid.isel(lat=slice(None, None, -1))
# Regrid
ds_regrid = regrid(ds, data_var, grid)
ds_regrid_subset = select_subset(ds_regrid, lat=(-10, 10))
if debug:
print(
"debug: ds_regrid_subset[data_var] shape:", ds_regrid_subset[data_var].shape
)
return ds_regrid_subset
def subSliceSegment(
ds: Union[xr.Dataset, xr.DataArray], year: int, mon: int, day: int, length: int
) -> Union[xr.Dataset, xr.DataArray]:
"""
Note: From given array (3D: time and spatial 2D)
Subslice to get segment with given length starting from given time.
input
- ds: xarray dataset or dataArray
- year: segment starting year (integer)
- mon: segment starting month (integer)
- day: segement starting day (integer)
- length: segment length (integer)
"""
time_key = get_time_key(ds)
n = list(ds[time_key].values).index(
ds.sel(time=f"{year:04}-{mon:02}-{day:02}")[time_key]
)
return ds.isel(
time=slice(n, n + length)
) # slie 180 time steps starting from above index
def get_daily_ano_segment(d_seg: xr.Dataset, data_var: str) -> xr.Dataset:
"""
Note: 1. Get daily time series (3D: time and spatial 2D)
2. Meridionally average (2D: time and spatial, i.e., longitude)
3. Get anomaly by removing time mean of the segment
input
- d_seg: xarray dataset
- data_var: name of variable
output
- d_seg_x_ano: xarray dataset that contains 2d output array
"""
# sub region
d_seg = select_subset(d_seg, lat=(-10, 10))
# Get meridional average (3d (t, y, x) to 2d (t, y))
d_seg_x = d_seg.spatial.average(data_var, axis=["Y"])
# Get time-average in the segment on each longitude grid
d_seg_x_ave = d_seg_x.temporal.average(data_var)
# Remove time mean for each segment
d_seg_x_ano = d_seg.copy()
d_seg_x_ano[data_var] = d_seg_x[data_var] - d_seg_x_ave[data_var]
return d_seg_x_ano
def space_time_spectrum(d_seg_x_ano: xr.Dataset, data_var: str) -> np.ndarray:
"""
input
- d: xarray dataset that contains 2d DataArray (t (time), n (space)) named as `data_var`
- data_var: name of the 2d DataArray
output
- p: 2d numpy array for power
NOTE: Below code taken from
https://github.com/CDAT/wk/blob/2b953281c7a4c5d0ac2d79fcc3523113e31613d5/WK/process.py#L188
"""
# Number of grid in longitude axis, and timestep for each segment
NTSub = d_seg_x_ano[data_var].shape[0] # NTSub
NL = d_seg_x_ano[data_var].shape[1] # NL
# Tapering
d_seg_x_ano[data_var] = taper(d_seg_x_ano[data_var])
# Power sepctrum analysis
EE = np.fft.fft2(d_seg_x_ano[data_var], axes=(1, 0)) / float(NL) / float(NTSub)
# Now the array EE(n,t) contains the (complex) space-time spectrum.
"""
Create array PEE(NL+1,NT/2+1) which contains the (real) power spectrum.
Note how the PEE array is arranged into a different order to EE.
In this code, PEE is "Power", and its multiyear average will be "power"
"""
# OK NOW THE LITTLE MAGIC WITH REORDERING !
A = np.absolute(EE[0 : NTSub // 2 + 1, 1 : NL // 2 + 1]) ** 2
B = np.absolute(EE[NTSub // 2 : NTSub, 1 : NL // 2 + 1]) ** 2
C = np.absolute(EE[NTSub // 2 : NTSub, 0 : NL // 2 + 1]) ** 2
D = np.absolute(EE[0 : NTSub // 2 + 1, 0 : NL // 2 + 1]) ** 2
# Define returning array
p = np.zeros((NTSub + 1, NL + 1), np.float)
p[NTSub // 2 :, : NL // 2] = A[:, ::-1]
p[: NTSub // 2, : NL // 2] = B[:, ::-1]
p[NTSub // 2 + 1 :, NL // 2 :] = C[::-1, :]
p[: NTSub // 2 + 1, NL // 2 :] = D[::-1, :]
return p
def taper(data):
"""
Note: taper first and last 45 days with cosine window, using scipy.signal function
input
- data: 2d array (t, n) t: time, n: space (meridionally averaged)
output:
- data: tapered data
"""
window = signal.windows.tukey(len(data))
data2 = data.copy()
for i in range(0, len(data)):
data2[i] = np.multiply(data[i][:], window[i])
return data2
def generate_axes_and_decorate(Power, NT: int, NL: int) -> xr.DataArray:
"""
Note: Generates axes for the decoration
input
- Power: 2d numpy array
- NT: integer, number of time step
- NL: integer, number of spatial grid
output
- xr.DataArray that contains Power 2d DataArray that has frequency and zonalwavenumber axes
"""
# frequency
ff = []
for t in range(0, NT + 1):
ff.append(float(t - NT / 2) / float(NT))
ff = np.array(ff)
# wave number
ss = []
for n in range(0, NL + 1):
ss.append(float(n) - float(NL / 2))
ss = np.array(ss)
# Add name attributes to x and y coordinates
x_coords = xr.IndexVariable(
"zonalwavenumber", ss, attrs={"name": "zonalwavenumber", "units": "-"}
)
y_coords = xr.IndexVariable(
"frequency", ff, attrs={"name": "frequency", "units": "cycles per day"}
)
# Create an xarray DataArray
da = xr.DataArray(
Power,
coords={"frequency": y_coords, "zonalwavenumber": x_coords},
dims=["frequency", "zonalwavenumber"],
name="power",
)
return da
def output_power_spectra(NL: int, NT: int, Power):
"""
Below code taken and modified from Daehyun Kim's Fortran code (MSD/level_2/sample/stps/stps.sea.f.sample)
"""
# The corresponding frequencies, ff, and wavenumbers, ss, are:-
PEE = Power
ff = Power.frequency
ss = Power.zonalwavenumber
OEE = np.zeros((21, 11))
for n in range(int(NL / 2), int(NL / 2) + 1 + 10):
nn = n - int(NL / 2)
for t in range(int(NT / 2) - 10, int(NT / 2 + 1 + 10)):
tt = -(int(NT / 2) + 1) + 11 + t
OEE[tt, nn] = PEE[t, n]
a = list((ff[i] for i in range(int(NT / 2) - 10, int(NT / 2) + 1 + 10)))
b = list((ss[i] for i in range(int(NL / 2), int(NL / 2) + 1 + 10)))
a = np.array(a)
b = np.array(b)
# Decoration
# Add name attributes to x and y coordinates
x_coords = xr.IndexVariable(
"zonalwavenumber", b, attrs={"name": "zonalwavenumber", "units": "-"}
)
y_coords = xr.IndexVariable(
"frequency", a, attrs={"name": "frequency", "units": "cycles per day"}
)
# Create an xarray DataArray
OEE = xr.DataArray(
OEE,
coords={"frequency": y_coords, "zonalwavenumber": x_coords},
dims=["frequency", "zonalwavenumber"],
name="power",
)
# Transpose for visualization
# OEE = np.transpose(OEE, (1, 0))
print("before transpose, OEE.shape:", OEE.shape)
transposed_OEE = OEE.transpose()
print("after transpose, transposed_OEE.shape:", transposed_OEE.shape)
return transposed_OEE
# return OEE
def write_netcdf_output(da: xr.DataArray, fname):
"""
Note: write array in a netcdf file
input
- d: xr.DataArray object
- fname: string of filename. Directory path that includes file name without .nc
output
- None
"""
ds = xr.Dataset({da.name: da})
ds.to_netcdf(fname + ".nc")
def calculate_ewr(OEE):
"""
According to DK's gs script (MSD/level_2/sample/stps/e.w.ratio.gs.sample),
E/W ratio is calculated as below:
'd amean(power,x=14,x=17,y=2,y=4)/aave(power,x=5,x=8,y=2,y=4)'
where x for frequency and y for wavenumber.
Actual ranges of frequency and wavenumber have been checked and applied.
"""
east_power_domain = OEE.sel(
zonalwavenumber=slice(1, 3), frequency=slice(0.0166667, 0.0333333)
)
west_power_domain = OEE.sel(
zonalwavenumber=slice(1, 3), frequency=slice(-0.0333333, -0.0166667)
)
eastPower = np.average(east_power_domain)
westPower = np.average(west_power_domain)
ewr = eastPower / westPower
return ewr, eastPower, westPower
def mjo_metrics_to_json(
outdir, json_filename, result_dict, model=None, run=None, cmec_flag=False
):
# Open JSON
JSON = base.Base(outdir, json_filename)
# Dict for JSON
if model is None and run is None:
result_dict_to_json = result_dict
else:
# Preserve only needed dict branch
result_dict_to_json = result_dict.copy()
models_in_dict = list(result_dict_to_json["RESULTS"].keys())
for m in models_in_dict:
if m == model:
runs_in_model_dict = list(result_dict_to_json["RESULTS"][m].keys())
for r in runs_in_model_dict:
if r != run:
del result_dict_to_json["RESULTS"][m][r]
else:
del result_dict_to_json["RESULTS"][m]
# Write selected dict to JSON
JSON.write(
result_dict_to_json,
json_structure=["model", "realization", "metric"],
sort_keys=True,
indent=4,
separators=(",", ": "),
)
if cmec_flag:
JSON.write_cmec(indent=4, separators=(",", ": "))