# BRAN2020 clim demo workflow

Date: 10 April, 2024

Author = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

### BRAN2020 is on the order of 100TB of float data over nearly 9000 `netcdf` file assests in total.

#### required packages

In [1]:
import intake
import xarray as xr
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
#more plotting
import matplotlib.dates as mdates
#import seaborn as sns
import datetime

#### ignore warnings

In [2]:
import warnings
warnings.filterwarnings('ignore')

#### start a local Dask client

In [3]:
import dask
import distributed

with dask.config.set({"distributed.scheduler.worker-saturation": 1.0,
                      "distributed.nanny.pre-spawn-environ.MALLOC_TRIM_THRESHOLD_": 0,
                    "logging.distributed'": "error"}):
    client = distributed.Client()

2024-04-10 10:30:02,259 - distributed.scheduler - ERROR - Task ('blocks-transpose-store-map-20273568771256ec2cfd1d14aecb8289', 0, 0, 62) marked as failed because 4 workers died while trying to run it
2024-04-10 10:30:02,312 - distributed.scheduler - ERROR - Task ('blocks-transpose-store-map-922635579132fce480413ddec30cc5b8', 0, 0, 71) marked as failed because 4 workers died while trying to run it
2024-04-10 10:30:02,313 - distributed.scheduler - ERROR - Task ('blocks-transpose-store-map-20273568771256ec2cfd1d14aecb8289', 0, 0, 89) marked as failed because 4 workers died while trying to run it
2024-04-10 10:30:02,315 - distributed.scheduler - ERROR - Task ('transpose-7d6322f274277e137b0eb624799f257e', 0, 0, 89, 0) marked as failed because 4 workers died while trying to run it
2024-04-10 10:30:02,328 - distributed.scheduler - ERROR - Task ('blocks-transpose-store-map-d839f72c954c4701f7b2240b76aa1b71', 0, 0, 322) marked as failed because 4 workers died while trying to run it
2024-04-10 10

# workflow

In [8]:
%%time
### masks for ENSO composites
ONI_DF = pd.read_csv('/g/data/xv83/users/tm4888/data/ENSO/NCAR_ONI.csv')
ONI_DF.set_index('datetime',inplace=True)
ONI_DF.index = pd.to_datetime(ONI_DF.index)
el_nino_threshold = 0.5
la_nina_threshold = -0.5
el_nino_threshold_months = ONI_DF["ONI"].ge(el_nino_threshold)
la_nina_threshold_months = ONI_DF["ONI"].le(la_nina_threshold) 
ONI_DF = pd.concat([ONI_DF, el_nino_threshold_months.rename('El Nino threshold')], axis=1)
ONI_DF = pd.concat([ONI_DF, la_nina_threshold_months.rename('La Nina threshold')], axis=1)
ONI_DF = pd.concat([ONI_DF, el_nino_threshold_months.diff().ne(0).cumsum().rename('El Nino event group ID')], axis=1)
ONI_DF = pd.concat([ONI_DF, la_nina_threshold_months.diff().ne(0).cumsum().rename('La Nina event group ID')], axis=1)
#
El_Nino_Series = ONI_DF.groupby('El Nino event group ID')['ONI'].filter(lambda x: len(x) >= 5,dropna=False).where(ONI_DF['El Nino threshold'] == True)
ONI_DF = pd.concat([ONI_DF, El_Nino_Series.rename('El Nino')], axis=1)
La_Nina_Series = ONI_DF.groupby('La Nina event group ID')['ONI'].filter(lambda x: len(x) >= 5,dropna=False).where(ONI_DF['La Nina threshold'] == True)
ONI_DF = pd.concat([ONI_DF, La_Nina_Series.rename('La Nina')], axis=1)

### run var on what variable
#var_name = 'temp'
var_name = 'mld'
#var_name = 'eta_t'

#
zarr_path = '/scratch/es60/ard/reanalysis/BRAN2020/ARD/'
path_dict = {'eta_t':'BRAN2020-daily-eta_t-chunk4time-v14032024.zarr',
                 'mld':'BRAN2020-daily-mld-chunk4time-v04042024.zarr',
                 'temp':'BRAN2020-daily-temp-chunk4time-v07022024.zarr'}
depth_dict = {'eta_t':None,'mld':None,'temp':'st_ocean'}
lon_dict = {'eta_t':'xt_ocean','mld':'xt_ocean','temp':'xt_ocean'}
lat_dict = {'eta_t':'yt_ocean','mld':'yt_ocean','temp':'yt_ocean'}
time_dim = 'Time'
results_path = '/g/data/es60/users/thomas_moore/clim_demo_results/daily/draft_delivery/'
results_file = 'BRAN2020_clim_demo_'+var_name+'.nc'
collection_path = zarr_path + path_dict[var_name]
#
ds = xr.open_zarr(collection_path,consolidated=True)
clim_ds = xr.merge([ds.groupby(time_dim+'.month').mean(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'mean_'+var_name}),
                      ds.groupby(time_dim+'.month').min(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'min_'+var_name}),
                      ds.groupby(time_dim+'.month').max(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'max_'+var_name}),
                      ds.groupby(time_dim+'.month').std(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'std_'+var_name}),
                      ds.groupby(time_dim+'.month').median(dim=time_dim).rename({var_name:'median_'+var_name})
])
quant = ds.groupby(time_dim+'.month').quantile([0.05,0.95],skipna=False,dim=time_dim)
quant_ds = xr.merge([quant.isel(quantile=0).reset_coords(drop=True).rename({var_name:'quantile_05_'+var_name}),quant.isel(quantile=1).reset_coords(drop=True).rename({var_name:'quantile_95_'+var_name})])
result_ds = xr.merge([clim_ds,quant_ds])
### ENSO composites
# filter BRAN2020 data by ENSO
ONI_DF_BRANtime = ONI_DF['1993-01':'2023-06']
ONI_DF_BRANtime['El Nino LOGICAL'] = ONI_DF_BRANtime['El Nino'].notnull()
ONI_DF_BRANtime['La Nina LOGICAL'] = ONI_DF_BRANtime['La Nina'].notnull()
# shift back from middle of month
ONI_DF_BRANtime.index += pd.Timedelta(-14, 'd')
# modify end value for upsample
ONI_DF_BRANtime.loc[pd.to_datetime('2023-07-01 00:00:00')] = 'NaN'
#upsample
ONI_DF_BRANtime = ONI_DF_BRANtime.resample('D').ffill()
#drop last dummy date
ONI_DF_BRANtime = ONI_DF_BRANtime[:-1]
#
El_Nino_mask = ONI_DF_BRANtime['El Nino LOGICAL']
El_Nino_mask = El_Nino_mask.to_xarray()
El_Nino_mask = El_Nino_mask.rename({'datetime':'Time'})
sync_Time = ds.Time
El_Nino_mask['Time'] = sync_Time
#
La_Nina_mask = ONI_DF_BRANtime['La Nina LOGICAL']
La_Nina_mask = La_Nina_mask.to_xarray()
La_Nina_mask = La_Nina_mask.rename({'datetime':'Time'})
sync_Time = ds.Time
La_Nina_mask['Time'] = sync_Time
#
ONI_DF_BRANtime['Neutral LOGICAL'] = (ONI_DF_BRANtime['El Nino LOGICAL'] == False) & (ONI_DF_BRANtime['La Nina LOGICAL'] == False)
### mask out data
El_Nino_ds = ds.where(El_Nino_mask)
La_Nina_ds = ds.where(La_Nina_mask)
##### El Nino calc
clim_El_Nino_ds = xr.merge([El_Nino_ds.groupby(time_dim+'.month').mean(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'mean_'+'el_nino_'+var_name}),
                      El_Nino_ds.groupby(time_dim+'.month').min(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'min_'+'el_nino_'+var_name}),
                      El_Nino_ds.groupby(time_dim+'.month').max(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'max_'+'el_nino_'+var_name}),
                      El_Nino_ds.groupby(time_dim+'.month').std(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'std_'+'el_nino_'+var_name}),
                      El_Nino_ds.groupby(time_dim+'.month').median(dim=time_dim).rename({var_name:'median_'+'el_nino_'+var_name})
])
quant_El_Nino = El_Nino_ds.groupby(time_dim+'.month').quantile([0.05,0.95],skipna=False,dim=time_dim)
quant_El_Nino_ds = xr.merge([quant.isel(quantile=0).reset_coords(drop=True).rename({var_name:'quantile_05_'+'el_nino_'+var_name}),quant.isel(quantile=1).reset_coords(drop=True).rename({var_name:'quantile_95_'+'el_nino_'+var_name})])
result_El_Nino_ds = xr.merge([clim_El_Nino_ds,quant_El_Nino_ds])
#### La Nina calc
clim_La_Nina_ds = xr.merge([La_Nina_ds.groupby(time_dim+'.month').mean(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'mean_'+'la_nina_'+var_name}),
                      La_Nina_ds.groupby(time_dim+'.month').min(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'min_'+'la_nina_'+var_name}),
                      La_Nina_ds.groupby(time_dim+'.month').max(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'max_'+'la_nina_'+var_name}),
                      La_Nina_ds.groupby(time_dim+'.month').std(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'std_'+'la_nina_'+var_name}),
                      La_Nina_ds.groupby(time_dim+'.month').median(dim=time_dim).rename({var_name:'median_'+'la_nina_'+var_name})
])
quant_La_Nina = La_Nina_ds.groupby(time_dim+'.month').quantile([0.05,0.95],skipna=False,dim=time_dim)
quant_La_Nina_ds = xr.merge([quant.isel(quantile=0).reset_coords(drop=True).rename({var_name:'quantile_05_'+'la_nina_'+var_name}),quant.isel(quantile=1).reset_coords(drop=True).rename({var_name:'quantile_95_'+'la_nina_'+var_name})])
result_La_Nina_ds = xr.merge([clim_La_Nina_ds,quant_La_Nina_ds])
#
result_ds = xr.merge([result_ds,result_El_Nino_ds,result_La_Nina_ds])
result_ds.to_netcdf(results_path+results_file,engine='netcdf4')

  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  retu

KeyboardInterrupt: 

In [6]:
!touch /scratch/es60/ard/reanalysis/BRAN2020/ARD/logs/finished_2D_mld_results_100402024.log

# $The$ $End$

In [None]:
client.shutdown()

# looking for neative numbers

In [10]:
nc_data = xr.open_dataset(results_path+results_file)

In [11]:
nc_data

In [13]:
nc_data.max()

## Plot current vectors for August

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
import matplotlib.ticker as ticker
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy
from matplotlib import mlab, cm, gridspec
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
%matplotlib inline 

In [None]:
%%time
# Define the u and v components of the currents
time_choice = 8
u = clim_uv.u.sel(month=time_choice)
v = clim_uv.v.sel(month=time_choice)
speed = np.sqrt(u**2 + v**2)

In [None]:
#plot model data
transform = ccrs.PlateCarree()
cmap = 'Spectral_r'
cbar_label='current speed'
plot_data = speed

###
fig = plt.figure(num=None, figsize=(8, 6), dpi=300, facecolor='w', edgecolor='k')
ax = plt.subplot(projection=ccrs.PlateCarree(180))
ax.set_extent([142,160, -25, -10], ccrs.PlateCarree())
ax.add_feature(cfeature.NaturalEarthFeature('physical', 'land', '50m', edgecolor='face', facecolor='white'))
ax.coastlines('50m',linewidth=0.5,edgecolor='grey')
plot_data.plot(transform=transform,cmap=cmap,cbar_kwargs={'label': cbar_label,'shrink':0.5},robust=True)

#plot u/v vectors
# Define the x and y coordinates
x = clim_uv.xu_ocean
y = clim_uv.yu_ocean
ax.quiver(x.values,y.values,u.values,v.values,transform=transform, units='x', width=0.01, scale=0.7, headwidth=2,alpha=0.2)
ax.set_title('BRAN2020 1993-2022\ncurrent speed \n August Climatology')