-
Notifications
You must be signed in to change notification settings - Fork 1
/
generate_gebco_6min_masks.py
294 lines (228 loc) · 10.8 KB
/
generate_gebco_6min_masks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import numpy as np
import os
import xarray as xr
import pandas as pd
from tqdm import trange
# import time
# import haversine as hs
from clim_helpers import deg2km, get_standard_levels
# from numba import jit
import dask
import matplotlib.pyplot as plt
from xarray import open_dataset
# @jit(nopython=True)
def haversine(point1, point2):
# Copied from https://github.com/mapado/haversine/blob/main/haversine/haversine.py
# to use numpy instead of math module
# mean earth radius - https://en.wikipedia.org/wiki/Earth_radius#Mean_radius
earth_radius_km = 6371.0088
# unpack latitude/longitude
lat1, lng1 = point1
lat2, lng2 = point2
# convert all latitudes/longitudes from decimal degrees to radians
lat1 = np.radians(lat1)
lng1 = np.radians(lng1)
lat2 = np.radians(lat2)
lng2 = np.radians(lng2)
# calculate haversine
lat = lat2 - lat1
lng = lng2 - lng1
d = np.sin(lat * 0.5) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(lng * 0.5) ** 2
# Return distance in km units
return 2 * earth_radius_km * np.arcsin(np.sqrt(d))
# @jit(nopython=True) # Set "nopython" mode for best performance, equivalent to @njit
def generate_gebco_mask(lon_obs, lat_obs, elevation, Lon2d, Lat2d, depth):
# Mask out land area of gebco bathymetry
# sldata = pd.read_csv(obs_filename)
# Find limits for range of standard level observations
lon_min, lon_max, lat_min, lat_max = [np.min(lon_obs), np.max(lon_obs),
np.min(lat_obs), np.max(lat_obs)]
# -1 to convert elevation above sea level to depth below sea level
# Subset out obviously out lat/lon
mask = (-elevation >= depth) & (Lon2d >= lon_min - radius_deg) & \
(Lon2d <= lon_max + radius_deg) & (Lat2d >= lat_min - radius_deg) & \
(Lat2d <= lat_max + radius_deg)
# Flatten the boolean mask
mask_flat = mask.flatten()
mask_v2_flat = np.repeat(0, len(mask_flat))
mask_v2_flat[mask_flat] = 1
# start_time = time.time()
for i in trange(len(lon_obs)):
# Create tuple of the lon/lat of each standard level observation point
obs_loc = (lon_obs[i], lat_obs[i])
# print(i, 'Creating dist_arr...')
# start_dist = time.time()
dist_arr = np.repeat(np.nan, len(Lon2d.flatten()))
# start_time = time.time() # Time the lambda function
# dist_arr[mask_flat] = np.array(list(map(
# lambda x, y: hs.haversine(obs_loc, (x, y)), Lon2d[mask], Lat2d[mask])))
# Fancy indexing of numpy arrays not supported by numba
dist_arr[mask_flat] = np.array(
[haversine(obs_loc, (x, y)) for (x, y) in zip(Lon2d[mask], Lat2d[mask])])
# print(i, 'Execution time: %s seconds' % (time.time() - start_time))
# for j in range(len(mask_flat)):
mask_v2_flat[dist_arr < radius_km] = 2
# print("--- %s seconds ---" % (time.time() - start_time))
# Reshape flattened mask back to 2d
mask_v2 = mask_v2_flat.reshape(Lon.shape)
mask_v3 = np.repeat(False, mask_v2_flat.shape).reshape(Lon.shape)
mask_v3[mask_v2 == 2] = True
return mask_v3
def generate_gebco_mask_dask(lon_obs, lat_obs, elevation, Lon2d, Lat2d, depth, year, season,
ncout_dir):
# Mask out land area of gebco bathymetry
# Use dask to improve execution speed...
# Find limits for range of standard level observations
lon_min, lon_max, lat_min, lat_max = [np.min(lon_obs), np.max(lon_obs),
np.min(lat_obs), np.max(lat_obs)]
# -1 to convert elevation above sea level to depth below sea level
# Subset out obviously out lat/lon
# mask = (-elevation >= depth) & (Lon2d >= lon_min - radius_deg) & \
# (Lon2d <= lon_max + radius_deg) & (Lat2d >= lat_min - radius_deg) & \
# (Lat2d <= lat_max + radius_deg)
mask = (-elevation >= depth)
# Flatten the boolean mask
mask_flat = mask.flatten()
mask_v2_flat = np.repeat(0, len(mask_flat))
mask_v2_flat[mask_flat] = 1
Lon2d_flat = Lon2d.flatten()
Lat2d_flat = Lat2d.flatten()
for i in trange(len(lon_obs)):
# Create tuple of the lon/lat of each standard level observation point
obs_loc = (lon_obs[i], lat_obs[i])
# Compute haversine distance between the observation point and each
# point in the mask coordinates
# Dask delays the computation of the haversine distance
dist_arr = dask.delayed(
lambda x, y: haversine(obs_loc, (x, y)))(Lon2d_flat, Lat2d_flat)
# Returns a tuple; index its first element which is the result
Dist_arr = dask.compute(dist_arr)[0]
# print(len(Dist_arr))
# If distance less than search radius and point is not land
mask_v2_flat[(mask_v2_flat == 1) & (Dist_arr < radius_km)] = 2
# Reshape flattened mask back to 2d
mask_v2 = mask_v2_flat.reshape(Lon.shape)
# Create final version of boolean mask
mask_v3 = np.repeat(False, mask_v2_flat.shape).reshape(Lon.shape)
mask_v3[mask_v2 == 2] = True
# Export boolean mask to netCDF file
ncout = xr.Dataset(coords={'lon': Lon2d[0], 'lat': Lat2d[:, 0]},
data_vars={'mask': (('lat', 'lon'), mask_v3)})
ncout_filename = os.path.join(ncout_dir + '{}_{}m_{}_{}_mask_6min.nc'.format(
var_name, depth, year, season))
ncout.to_netcdf(ncout_filename)
ncout.close() # Close the dataset
return ncout_filename
def generate_gebco_full_mask(elevation, Lon2d, Lat2d, var_name, depth, season,
ncout_dir):
mask = (-elevation >= depth)
ncout = xr.Dataset(coords={'lon': Lon2d[0], 'lat': Lat2d[:, 0]},
data_vars={'mask': (('lat', 'lon'), mask)})
ncout_filename = os.path.join(ncout_dir + '{}_{}m_{}_mask_6min.nc'.format(
var_name, depth, season))
ncout.to_netcdf(ncout_filename)
ncout.close()
return ncout_filename
def plot_mask_coverage(mask_filename):
# Check that coverage looks right
mask_ds = open_dataset(mask_filename)
lon2d, lat2d = np.meshgrid(mask_ds.lon.data, mask_ds.lat.data)
var_field = np.zeros(mask_ds.mask.data.shape)
var_field[mask_ds.mask.data == True] = 1
plt.pcolormesh(lon2d, lat2d, var_field, shading='auto', cmap="Blues")
plt.xlim((-162., -102.))
plt.ylim((25., 62.))
plt_filename = "C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\" \
"value_vs_depth\\16_diva_analysis\\masks\\" \
"{}.png".format(os.path.basename(mask_filename)[:-3])
plt.savefig(plt_filename, dpi=400)
plt.close()
return plt_filename
# -----------------------------Choose data file----------------------------------
var_name = 'Oxy'
years = np.arange(1992,2018) # np.arange(1991, 2021) # [1995, 2005]
szns = ['JFM', 'AMJ', 'JAS', 'OND']
# standard_depths = np.arange(1500, 500, -50) # np.arange(3900, 2900, -100)
standard_depths = [0] # np.arange(0, 105, 5) # [5]
# Already made all 0m and 5m masks so skip to 10m
# standard_depths = get_standard_levels(
# 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\lu_docs\\WOA_Standard_Depths.txt')[75:]
radius_deg = 2 # search radius
radius_km = deg2km(radius_deg) # degrees length
# Standard level observations directory
obs_dir = 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\value_vs_depth\\' \
'14_sep_by_sl_and_year\\'
# Directory for netCDF boolean masks to be output into
out_dir = 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\value_vs_depth\\' \
'16_diva_analysis\\masks\\'
# ---------------------------------------------------------------------------------
# Use GEBCO 2021 6'x6' bathymetry file to create masks by depth
# Read in elevation file
gebco_dir = 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\diva_explore\\' \
'GEBCO_28_Oct_2021_16f8a0236741\\'
gebco_filename = os.path.join(gebco_dir + 'gebco_2021_n60_s30_w-160_e-115.nc')
gebco_data = xr.open_dataset(gebco_filename)
# Create 2d grid of lat and lon
Lon, Lat = np.meshgrid(gebco_data.lon.data, gebco_data.lat.data)
# ---------------------------------------------------------------------------------
# Iterate through all requested files
for dep in standard_depths:
print()
print('--------------------Depth: {}m--------------------'.format(dep))
for yr in years:
print(yr)
for szn in szns:
print(szn)
# Skip making mask if it already exists
if os.path.exists(out_dir + '{}_{}m_{}_{}_mask_6min.nc'.format(
var_name, dep, yr, szn)):
print('Mask already exists for this file -- skipping')
continue
# Read in standard level data file
obs_filename = os.path.join(obs_dir + '{}_{}m_{}_{}.csv'.format(
var_name, dep, yr, szn))
# Read into pandas dataframe
sldata = pd.read_csv(obs_filename)
if sldata.empty:
print('Dataframe empty -- skipping')
continue
mask_out = generate_gebco_mask_dask(np.array(sldata['Longitude']),
np.array(sldata['Latitude']),
gebco_data.elevation.data, Lon, Lat,
dep, yr, szn, out_dir)
# ----------------------------------mask for step 19--------------------------------
step19_dir = 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\' \
'value_vs_depth\\19_divand_to_full_NEP\\masks\\'
def step19_masks():
for dep in standard_depths:
print()
print('--------------------Depth: {}m--------------------'.format(dep))
for szn in szns:
print(szn)
# Skip making mask if it already exists
if os.path.exists(step19_dir + '{}_{}m_{}_mask_6min.nc'.format(
var_name, dep, szn)):
print('Mask already exists for this file -- skipping')
continue
mask_out = generate_gebco_full_mask(gebco_data.elevation.data, Lon, Lat, var_name,
dep, szn, step19_dir)
return
# ----------------------------------------------------------------------------------
# # RAM calculations
# # https://blogs.sas.com/content/iml/2014/04/28/how-much-ram-do-i-need-to-store-that-matrix.html
# r = Lon.shape[0]
# c = Lon.shape[1]
# gb_RAM_needed = r * c * 8 / 1e9
# print(gb_RAM_needed)
# ----------------------------------------------------------------------------------
# import glob
# import os
#
# # Rename files to remove v2 which was a mistake to keep
# mask_dir = 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\value_vs_depth\\' \
# '16_diva_analysis\\masks\\'
#
# nclist = glob.glob(mask_dir + '*v2.nc')
# for f in nclist:
# f_newname = f.replace('_v2', '')
# os.rename(f, f_newname)