# BRAN2020 clim demo workflow

Date: 8 April, 2024

Author = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

### BRAN2020 is on the order of 100TB of float data over nearly 9000 `netcdf` file assests in total.

#### required packages

In [1]:
import intake
import xarray as xr
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
#more plotting
import matplotlib.dates as mdates
#import seaborn as sns

#### ignore warnings

In [2]:
import warnings
warnings.filterwarnings('ignore')

#### start a local Dask client

In [3]:
import dask
import distributed

with dask.config.set({"distributed.scheduler.worker-saturation": 1.0,
                      "distributed.nanny.pre-spawn-environ.MALLOC_TRIM_THRESHOLD_": 0,
                    "logging.distributed'": "error"}):
    client = distributed.Client()

# workflow

In [None]:
%%time
var_name = 'temp'
#var_name = 'mld'
#var_name = 'eta_t'

#
zarr_path = '/scratch/es60/ard/reanalysis/BRAN2020/ARD/'
path_dict = {'eta_t':'BRAN2020-daily-eta_t-chunk4time-v14032024.zarr',
                 'mld':'BRAN2020-daily-mld-chunk4time-v04042024.zarr',
                 'temp':'BRAN2020-daily-temp-chunk4time-v07022024.zarr'}
depth_dict = {'eta_t':None,'mld':None,'temp':'st_ocean'}
lon_dict = {'eta_t':'xt_ocean','mld':'xt_ocean','temp':'xt_ocean'}
lat_dict = {'eta_t':'yt_ocean','mld':'yt_ocean','temp':'yt_ocean'}
time_dim = 'Time'
results_path = '/g/data/es60/users/thomas_moore/clim_demo_results/daily/draft_delivery/'
results_file = 'BRAN2020_clim_demo_'+var_name+'.nc'
collection_path = zarr_path + path_dict[var_name]
#
ds = xr.open_zarr(collection_path,consolidated=True)
clim_ds = xr.merge([ds.groupby(time_dim+'.month').mean(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'mean_'+var_name}),
                      ds.groupby(time_dim+'.month').min(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'min_'+var_name}),
                      ds.groupby(time_dim+'.month').max(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'max_'+var_name}),
                      ds.groupby(time_dim+'.month').std(dim=time_dim,engine='flox',method='cohorts').rename({var_name:'std_'+var_name}),
                      ds.groupby(time_dim+'.month').median(dim=time_dim).rename({var_name:'median_'+var_name})
])
quant = ds.groupby(time_dim+'.month').quantile([0.05,0.95],skipna=False,dim=time_dim)
quant_ds = xr.merge([quant.isel(quantile=0).reset_coords(drop=True).rename({var_name:'quantile_05_'+var_name}),quant.isel(quantile=1).reset_coords(drop=True).rename({var_name:'quantile_95_'+var_name})])
result_ds = xr.merge([clim_ds,quant_ds])
#
result_ds.to_netcdf(results_path+results_file,engine='netcdf4')

  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,


In [None]:
!touch /scratch/es60/ard/reanalysis/BRAN2020/ARD/logs/finished_3D_temp_results_080402024.log

# $The$ $End$

In [None]:
client.shutdown()

# looking for neative numbers

In [10]:
nc_data = xr.open_dataset(results_path+results_file)

In [11]:
nc_data

In [13]:
nc_data.max()

## Plot current vectors for August

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
import matplotlib.ticker as ticker
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy
from matplotlib import mlab, cm, gridspec
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
%matplotlib inline 

In [None]:
%%time
# Define the u and v components of the currents
time_choice = 8
u = clim_uv.u.sel(month=time_choice)
v = clim_uv.v.sel(month=time_choice)
speed = np.sqrt(u**2 + v**2)

In [None]:
#plot model data
transform = ccrs.PlateCarree()
cmap = 'Spectral_r'
cbar_label='current speed'
plot_data = speed

###
fig = plt.figure(num=None, figsize=(8, 6), dpi=300, facecolor='w', edgecolor='k')
ax = plt.subplot(projection=ccrs.PlateCarree(180))
ax.set_extent([142,160, -25, -10], ccrs.PlateCarree())
ax.add_feature(cfeature.NaturalEarthFeature('physical', 'land', '50m', edgecolor='face', facecolor='white'))
ax.coastlines('50m',linewidth=0.5,edgecolor='grey')
plot_data.plot(transform=transform,cmap=cmap,cbar_kwargs={'label': cbar_label,'shrink':0.5},robust=True)

#plot u/v vectors
# Define the x and y coordinates
x = clim_uv.xu_ocean
y = clim_uv.yu_ocean
ax.quiver(x.values,y.values,u.values,v.values,transform=transform, units='x', width=0.01, scale=0.7, headwidth=2,alpha=0.2)
ax.set_title('BRAN2020 1993-2022\ncurrent speed \n August Climatology')