### CESM2 - LARGE ENSEMBLE (LENS2)

 - This Notebook aims to find the maximum, minimum, mean and root mean square error of the AMOC in the South Atlantic.
 - We also calculated the maximum AMOC on line 34.5S to compare the large set with the observation.

## Imports

In [1]:
import xarray as xr
import pandas as pd
import numpy as np 
import dask
import cf_xarray
import intake
import cftime
import nc_time_axis
import intake_esm
import matplotlib.pyplot as plt
import pop_tools
from dask.distributed import Client
from ncar_jobqueue import NCARCluster
import warnings, getpass, os

### Local functions

In [2]:
def rms_da(da, dims=('moc_z','lat_aux_grid'), weights=None,  weights_sum=None):
  """
  Calculates the rms in DataArray da (optional weighted rms).

  ----------
  da : xarray.DataArray
        DataArray for which to compute (weighted) rms.

  dims : tuple, str
    Dimension(s) over which to apply reduction. Default is ('yh', 'xh').

  weights : xarray.DataArray, optional
    weights to apply. It can be a masked array.

  weights_sum : xarray.DataArray, optional
    Total weight (i.e., weights.sum()). Only computed if not provided.

  Returns
  -------
  reduction : DataSet
      xarray.Dataset with (optionally weighted) rms for da.
  """

  if weights is not None:
    if weights_sum is None: weights_sum = weights.sum(dim=dims)
    out = np.sqrt((da**2 * weights).sum(dim=dims)/weights_sum)
    # copy attrs
    out.attrs = da.attrs
    return out
  else:
    return np.sqrt((da**2).mean(dim=dims, keep_attrs=True))

## Improve the workflow using clusters 

In [3]:
mem_per_worker = 160 # in GB 
num_workers = 60 
cluster = NCARCluster(cores=1, processes=3, memory=f'{mem_per_worker} GB',resource_spec=f'select=1:ncpus=6:mem={mem_per_worker}GB', walltime='6:00:00')
cluster.scale(num_workers)
client = Client(cluster)
print(client)
client

<Client: 'tcp://10.12.206.60:42083' processes=0 threads=0, memory=0 B>


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mauricio/mrocha/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mauricio/mrocha/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.60:42083,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mauricio/mrocha/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


### Read in OGCM history file & MOC template file

In [4]:
catalog = intake.open_esm_datastore(
    '/glade/collections/cmip/catalog/intake-esm-datastore/catalogs/glade-cesm2-le.json'
)

  return pd.read_csv(catalog_path, **csv_kwargs), catalog_path


In [5]:
cat_subset = catalog.search(component='ocn',variable=['MOC'],frequency='month_1')
del catalog

In [6]:
# Load catalog entries for subset into a dictionary of xarray datasets
dset_dict_raw  = cat_subset.to_dataset_dict(zarr_kwargs={"consolidated": True}, storage_options={"anon": True})
print(f"\nDataset dictionary keys:\n {dset_dict_raw.keys()}")
del cat_subset


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.stream.forcing_variant.variable'



Dataset dictionary keys:
 dict_keys(['ocn.ssp370.pop.h.smbb.MOC', 'ocn.historical.pop.h.smbb.MOC', 'ocn.ssp370.pop.h.cmip6.MOC', 'ocn.historical.pop.h.cmip6.MOC'])


In [7]:
pd=('historical','ssp370')
ff=('cmip6','smbb')
var='MOC'
for iff in range(0,len(ff)):
    for ipd in range(0,len(pd)):
        str=f'ds_{pd[ipd]}_{ff[iff]}_{var} = dset_dict_raw[\'ocn.{pd[ipd]}.pop.h.{ff[iff]}.{var}\']'; exec(str)
    str=f'ds_{ff[iff]}_{var}=xr.combine_nested([ds_{pd[0]}_{ff[iff]}_{var},ds_{pd[1]}_{ff[iff]}_{var}],concat_dim=[\'time\'])'; exec(str)
    str=f'del ds_{pd[0]}_{ff[iff]}_{var},ds_{pd[1]}_{ff[iff]}_{var}'; exec(str) 
str=f'ds_{var}=xr.combine_nested([ds_{ff[0]}_{var},ds_{ff[1]}_{var}],concat_dim=[\'member_id\'])'; exec(str)
str=f'del ds_{ff[0]}_{var}, ds_{ff[1]}_{var}'; exec(str)
print(f'Done!')

Done!


In [None]:
area_sa=ds_MOC.coords['lat_aux_grid'].isel(lat_aux_grid=slice(86,188))

In [None]:
%%time
ilas=-33.542496 # southern boundary
ilan=0 # northern boundary
mx_depth=350000 # depth limit
ds_MOC_annual_mean=ds_MOC.MOC.where((ds_MOC.MOC['moc_z']<= mx_depth)&(ds_MOC.MOC['lat_aux_grid']>=ilas)&(ds_MOC.MOC['lat_aux_grid'] <= ilan),drop=True).sum(
    dim='moc_comp').resample(time='1Y', closed='left').mean('time').isel(transport_reg=1)

In [None]:
MOC_max = np.empty((len(ds_MOC_annual_mean['time']),len(ds_MOC_annual_mean['member_id']))) * np.nan
MOC_mean = np.empty((len(ds_MOC_annual_mean['time']),len(ds_MOC_annual_mean['member_id']))) * np.nan
MOC_min = np.empty((len(ds_MOC_annual_mean['time']),len(ds_MOC_annual_mean['member_id']))) * np.nan
MOC_rms = np.empty((len(ds_MOC_annual_mean['time']),len(ds_MOC_annual_mean['member_id']))) * np.nan

In [None]:
%%time
for it in range(0,len(ds_MOC_annual_mean['time'])): # time
    print(f'Time: {it}')        
    # Maximum AMOC for the South Atlantic (see the limits)
    amoc = ds_MOC_annual_mean.isel(time=it)
    MOC_max[it,:]=amoc.max(dim=['moc_z','lat_aux_grid']).load() 
        
    # Minimum AMOC for the South Atlantic (see the limits)
    amoc = ds_MOC_annual_mean.isel(time=it)
    MOC_min[it,:]=amoc.min(dim=['moc_z','lat_aux_grid']).load() 
        
    # Mean AMOC for the South Atlant (see the limits)
    amoc = ds_MOC_annual_mean.isel(time=it)
    MOC_mean[it,:]=amoc.weighted(area_sa).mean(dim=['moc_z','lat_aux_grid']).load() 
        
    # Error AMOC for the South Atlantic (see the limits)
    amoc = ds_MOC_annual_mean.isel(time=it)
    MOC_rms[it,:]=rms_da(amoc, weights=area_sa, weights_sum=area_sa.sum())
    del amoc

In [None]:
da1 = xr.Dataset({'MOC_rms': (('time','member_id'), MOC_rms)}, coords={'time': ds_MOC_annual_mean.coords['time'],'member_id': ds_MOC_annual_mean.coords['member_id']})
da2 = xr.Dataset({'MOC_mean': (('time','member_id'), MOC_mean)}, coords={'time': ds_MOC_annual_mean.coords['time'],'member_id': ds_MOC_annual_mean.coords['member_id']})
da3 = xr.Dataset({'MOC_max': (('time','member_id'), MOC_max)}, coords={'time': ds_MOC_annual_mean.coords['time'],'member_id': ds_MOC_annual_mean.coords['member_id']})
da4 = xr.Dataset({'MOC_min': (('time','member_id'), MOC_min)}, coords={'time': ds_MOC_annual_mean.coords['time'],'member_id': ds_MOC_annual_mean.coords['member_id']})
ds_out_MOC=xr.combine_by_coords([da1, da2, da3, da4])
ds_out_MOC.attrs['description'] = 'Meridional Overturning Circulation (MOC) statistics for the South Atlantic (33.542496S-0)'
ds_out_MOC.attrs['units'] = 'Sv'
ds_out_MOC.attrs['author'] = 'Mauricio Rocha'
ds_out_MOC.attrs['email'] = 'mauricio.rocha@usp.br'

In [None]:
# Total Surface Heat Fux
fig, axes = plt.subplots(1, 4, figsize=(20, 8))
# Maximum
ds_out_MOC.MOC_max.resample(time='1Y', closed='left').mean('time').plot.line(ax=axes[0],x="time",color='orange',alpha=0.01,linewidth=1,add_legend=False)
ds_out_MOC.MOC_max.resample(time='1Y', closed='left').mean('time').mean('member_id').plot.line(ax=axes[0],x="time",alpha=0.3,color='r',linewidth=1,label='Member Mean 1Y')
ds_out_MOC.MOC_max.resample(time='5Y', closed='left').mean('time').mean('member_id').plot.line(ax=axes[0],x="time",color='r',linewidth=2,label='Member Mean 5Y')
axes[0].set_xlabel('Time [Years]')
axes[0].set_ylabel('MOC Max [Sv]')
axes[0].set_title('Area Max')
axes[0].grid(color='k', linestyle='-', linewidth=0.7)
axes[0].legend()
#axes[0].set_ylim(103,108.2)
fig.tight_layout(pad=2.0)
# Mean
ds_out_MOC.MOC_mean.resample(time='1Y', closed='left').mean('time').plot.line(ax=axes[1],x="time",color='gray',alpha=0.01,linewidth=1,add_legend=False)
ds_out_MOC.MOC_mean.resample(time='1Y', closed='left').mean('time').mean('member_id').plot.line(ax=axes[1],x="time",alpha=0.3,color='k',linewidth=1,label='Member Mean 1Y')
ds_out_MOC.MOC_mean.resample(time='5Y', closed='left').mean('time').mean('member_id').plot.line(ax=axes[1],x="time",color='k',linewidth=2,label='Member Mean 5Y')
axes[1].set_xlabel('Time [Years]')
axes[1].set_ylabel('MOC Mean [Sv]')
axes[1].set_title('Area Mean')
axes[1].grid(color='k', linestyle='-', linewidth=0.7)
axes[1].legend()
#axes[1].set_ylim(17,25.5)
# Minimum
ds_out_MOC.MOC_min.resample(time='1Y', closed='left').mean('time').plot.line(ax=axes[2],x="time",color='c',alpha=0.01,linewidth=1,add_legend=False)
ds_out_MOC.MOC_min.resample(time='1Y', closed='left').mean('time').mean('member_id').plot.line(ax=axes[2],x="time",alpha=0.3,color='b',linewidth=1,label='Member Mean 1Y')
ds_out_MOC.MOC_min.resample(time='5Y', closed='left').mean('time').mean('member_id').plot.line(ax=axes[2],x="time",color='b',linewidth=2,label='Member Mean 5Y')
axes[2].set_xlabel('Time [Years]')
axes[2].set_ylabel('MOC Min [Sv]')
axes[2].set_title('Area Min')
axes[2].grid(color='k', linestyle='-', linewidth=0.7)
axes[2].legend()
#axes[2].set_ylim(-63,-51)
# Minimum
ds_out_MOC.MOC_rms.resample(time='1Y', closed='left').mean('time').plot.line(ax=axes[3],x="time",color='y',alpha=0.01,linewidth=1,add_legend=False)
ds_out_MOC.MOC_rms.resample(time='1Y', closed='left').mean('time').mean('member_id').plot.line(ax=axes[3],x="time",alpha=0.3,color='g',linewidth=1,label='Member Mean 1Y')
ds_out_MOC.MOC_rms.resample(time='5Y', closed='left').mean('time').mean('member_id').plot.line(ax=axes[3],x="time",color='g',linewidth=2,label='Member Mean 5Y')
axes[3].set_xlabel('Time [Years]')
axes[3].set_ylabel('MOC Error [Sv]')
axes[3].set_title('Area Error')
axes[3].grid(color='k', linestyle='-', linewidth=0.7)
axes[3].legend()
#axes[3].set_ylim(50.3,52)

plt.show()

### Let's save the data in netcdf format

In [None]:
# create a directory on scratch to save the output
path = '/glade/scratch/mauricio/Data/LENS2/MOC/'.format(getpass.getuser())
os.system('mkdir -p '+path)
ds_out_MOC.to_netcdf(path+'MOC_stats.nc')

### Save AMOC data set

In [27]:
%%time
ds_AMOC=ds_MOC.MOC.sum(dim='moc_comp').isel(transport_reg=1)

CPU times: user 39.1 ms, sys: 2.91 ms, total: 42 ms
Wall time: 45.9 ms


In [46]:
ds_out_AMOC = xr.Dataset({
    'amoc': xr.DataArray(
        data=ds_AMOC,
        dims=['member_id','time','moc_z','lat_aux_grid'],
        coords={'member_id': ds_MOC.MOC.coords['member_id'],
                'time': ds_MOC.MOC.coords['time'],
                'moc_z': ds_MOC.MOC.coords['moc_z'],
                'lat_aux_grid': ds_MOC.MOC.coords['lat_aux_grid']})})
ds_out_AMOC.attrs['description'] = 'Atlantic Meridional Overturning Circulation (AMOC)'
ds_out_AMOC.attrs['units'] = 'Sv'
ds_out_AMOC.attrs['author'] = 'Mauricio Rocha'
ds_out_AMOC.attrs['email'] = 'mauricio.rocha@usp.br'

In [48]:
# create a directory on scratch to save the output
path = '/glade/scratch/mauricio/Data/AMOC/LENS2/'.format(getpass.getuser())
os.system('mkdir -p '+path)
ds_out_AMOC.to_netcdf(path+'lens2_amoc.nc')