## CESM2 - LARGE ENSEMBLE (LENS2)
- In this Notebook we want to control the temperature flow: speed and temperature. For velocity, we will compute the advective terms and the eddies. 

### Imports

In [1]:
# modules I am using in this example
import xarray as xr
import xgcm
from xgcm import Grid
import pop_tools
from dask.distributed import Client, wait
from ncar_jobqueue import NCARCluster
import dask
import intake
import intake_esm

### Dask

In [2]:
mem_per_worker = 40 # memory per worker in GB 
num_workers = 40 # number of workers
cluster = NCARCluster(cores=1, processes=1, memory=f'{mem_per_worker} GB',resource_spec=f'select=1:ncpus=1:mem={mem_per_worker}GB', walltime='2:00:00')
cluster.scale(num_workers)
client = Client(cluster)
print(client)
client

<Client: 'tcp://10.12.206.54:36633' processes=0 threads=0, memory=0 B>


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mauricio/mrocha/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mauricio/mrocha/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.54:36633,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mauricio/mrocha/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


### Load data

In [3]:
catalog = intake.open_esm_datastore(
    '/glade/collections/cmip/catalog/intake-esm-datastore/catalogs/glade-cesm2-le.json'
)
cat_subset = catalog.search(component='ocn',variable=['UET','UVEL','VVEL','VNT','TEMP','N_HEAT'],frequency='month_1')
# Load catalog entries for subset into a dictionary of xarray datasets
dset_dict_raw  = cat_subset.to_dataset_dict(zarr_kwargs={'consolidated': True}, storage_options={'anon': True})
print(f'\nDataset dictionary keys:\n {dset_dict_raw.keys()}')

  return pd.read_csv(catalog_path, **csv_kwargs), catalog_path



--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.stream.forcing_variant.variable'



Dataset dictionary keys:
 dict_keys(['ocn.ssp370.pop.h.smbb.VVEL', 'ocn.historical.pop.h.smbb.N_HEAT', 'ocn.historical.pop.h.smbb.VNT', 'ocn.ssp370.pop.h.cmip6.VNT', 'ocn.historical.pop.h.cmip6.UET', 'ocn.historical.pop.h.cmip6.TEMP', 'ocn.historical.pop.h.cmip6.N_HEAT', 'ocn.ssp370.pop.h.smbb.UVEL', 'ocn.ssp370.pop.h.cmip6.TEMP', 'ocn.ssp370.pop.h.cmip6.UET', 'ocn.historical.pop.h.smbb.TEMP', 'ocn.ssp370.pop.h.smbb.VNT', 'ocn.historical.pop.h.cmip6.VVEL', 'ocn.ssp370.pop.h.cmip6.VVEL', 'ocn.historical.pop.h.smbb.UVEL', 'ocn.historical.pop.h.smbb.VVEL', 'ocn.historical.pop.h.cmip6.VNT', 'ocn.ssp370.pop.h.smbb.UET', 'ocn.ssp370.pop.h.cmip6.UVEL', 'ocn.ssp370.pop.h.smbb.N_HEAT', 'ocn.ssp370.pop.h.smbb.TEMP', 'ocn.ssp370.pop.h.cmip6.N_HEAT', 'ocn.historical.pop.h.smbb.UET', 'ocn.historical.pop.h.cmip6.UVEL'])


### Concatenation of variables

In [None]:
ff=('cmip6','smbb') # Forcings
fb=(['UET','UVEL','VVEL','VNT','TEMP','N_HEAT']) 
# Variables:
# Flux of Heat in grid-x direction
# Flux of Heat in grid-y direction
# Velocity in grid-x direction 
# Velocity in grid-y direction
# Potential Temperature

dsi = dict()
for var in fb:
    # 1- combine historical and ssp370 (concatenate in time)
    ds_dict_tmp = dict()
    for scenario in ff:
        ds_dict_tmp[scenario] = xr.combine_nested([dset_dict_raw[f'ocn.historical.pop.h.{scenario}.{var}'], dset_dict_raw[f'ocn.ssp370.pop.h.{scenario}.{var}']],concat_dim=['time'])
        
        # 2- combine cmip6 and smbb (concatenate in member_id)
    dsi[var] = xr.combine_nested([ds_dict_tmp['cmip6'], ds_dict_tmp['smbb']], concat_dim=['member_id'])
    del ds_dict_tmp

### For the South Atlantic

In [None]:
%%time
# Cutting out and centering the variables in the South Atlantic
dask.config.set({"array.slicing.split_large_chunks": True})
ilon1, flon1, ilon2, flon2 = 307, 320, 0, 54 # longitude (initial (i), final (f)) 
ilan = 0 # northernmost latitude
ilas = -34 # southernmost latitude

fb=(['UET','UVEL','VVEL','VNT','TEMP'])

for var in fb:
    dsi[f'{var}']=xr.combine_nested([[
        dsi[f'{var}'].where((dsi[f'{var}'].TLAT >= ilas) & (dsi[f'{var}'].TLAT <= ilan), drop=True).isel(
            nlon = slice(ilon1,flon1)),
        dsi[f'{var}'].where((dsi[f'{var}'].TLAT >= ilas) & (dsi[f'{var}'].TLAT <= ilan), drop=True).isel(
            nlon = slice(ilon2,flon2))]],
        concat_dim=['nlat','nlon'])   
    dsi[f'{var}'].coords['nlon'] = (dsi[f'{var}'].coords['nlon'] + 180) % 360 - 180 
    dsi[f'{var}'] = dsi[f'{var}'].sortby(dsi[f'{var}'].nlon)
del ilan, ilas, ilon1, flon1, ilon2, flon2 

### Help functions

In [None]:
def to_index(ds):
    ds = ds.copy()
    for dim in ds.dims:
        if dim in ["nlon_t", "nlat_t", "nlon_u", "nlat_u"]:
            ds = ds.drop(dim).rename({dim: dim[:-2]})
    return ds
def to_coord(ds, lon, lat):
    return ds.rename({"nlon": "nlon_" + lon,
                      "nlat": "nlat_" + lat})

### Get the POP_grid

In [None]:
help(pop_tools.to_xgcm_grid_dataset)

In [None]:
# get the cell volume and the grid from pop_tools. this should be easier for LENS low res, as your DZT is just your dz because you are not dealing with partial bottom cells (right Gustavo?) 

grid, ds_ren = pop_tools.to_xgcm_grid_dataset(dsi['UET']['UET']) # We gotta confirm if we may use just one variable to compute the grid and ds_ren
ds_ren['cell_volume'] = ds_ren.dz * ds_ren.DXT * ds_ren.DYT # Volume (check out later the unit)

### Advection

In [None]:
%%time
# get the total advection as saved by the model 
# total advection saved by model --> the difference betweem this and the mean is the eddy component
# horizontal components
print('Getting total advection term.')
#st = time.time()
uadv = -( grid.diff(to_coord((ds_ren.cell_volume * ds_ren['UET']['UET']), 'u', 't'), 
                   axis="X", boundary="extend")
        / ds_ren.cell_volume )
vadv = -( grid.diff(to_coord((ds_ren.cell_volume * ds_ren['VNT']['VNT']), 't', 'u'),
                   axis="Y", boundary="extend")
        / ds_ren.cell_volume )

In [None]:
# total horizontal
h_adv = uadv + vadv # I think you only want vnt? just copying everything here for completeness 

In [None]:
# get the advection from the mean flow 
# horizontal advection from mean flow -- you need this because the difference between this and the term above is the eddy term 
print('Getting horizontal advection from mean flow.')
st = time.time()
# u term
U_interp = grid.interp((ds_ren.UVEL * ds_ren.DZU * ds_ren.DYU),
                      axis="Y",
                      boundary="extend")
uT = U_interp * grid.interp(ds_ren.TEMP,
                            axis="X",
                            boundary="extend")
H_ADV_mean = -(grid.diff(uT, axis="X", boundary="extend")
               / ds_ren.cell_volume)
# v term
V_interp = grid.interp((ds_ren.VVEL * ds_ren.DZU * ds_ren.DXU),
                       axis="X", boundary="extend")
vT = V_interp * grid.interp(ds_ren.TEMP,
                            axis="Y", boundary="extend")
# total term, again you don't need this, you just need the V term I think, and you do want to do the operation grid.diff(...) / cell_volume
H_ADV_mean = H_ADV_mean - (grid.diff(vT, axis="Y", boundary="extend")
                           / ds_ren.cell_volume)

In [None]:
#then calculate difference for eddy part  
T_h_ADV_eddy = (h_adv - H_ADV_mean)