<code> 
<strong><font size="+2">Pipeline to run Ocetrac on all CESM-LE ensemble members</font></strong>
In this Jupyter notebook, we share a pipeline to run Ocetrac on all CESM-LE ensemble members and saving the last 40 years of data. This is because this is the time period shared with the satellite period.
</code>

<code> Import packages </code>

In [1]:
##### LOADING IN PACKAGES #--------------------------------------------------------------
import s3fs; import xarray as xr; import numpy as np
import pandas as pd; 
import dask.array as da
import ocetrac

import matplotlib.pyplot as plt; import cartopy.crs as ccrs

import warnings; import expectexception
warnings.filterwarnings('ignore')

import netCDF4 as nc; import datetime as dt
import scipy

import intake; import pprint
# Allow multiple lines per cell to be displayed without print (default is just last line)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# Enable more explicit control of DataFrame display (e.g., to omit annoying line numbers)
from IPython.display import HTML

<code> Loading CESM LE dataset </code>

In [None]:
# Open original collection description file #----------------------------------------------
cat_url_orig = '/glade/collections/cmip/catalog/intake-esm-datastore/catalogs/glade-cesm2-le.json'
coll_orig = intake.open_esm_datastore(cat_url_orig)

In [None]:
subset = coll_orig.search(component='atm',variable='SST',frequency='month_1',experiment='historical')
member_id_list = subset.df.member_id.unique()
print(member_id_list)

In [None]:
##### one at a time
# subset = coll_orig.search(component='atm',variable='SST',frequency='month_1',experiment='historical',member_id= 'r1i1011p1f2')
# dsets = subset.to_dataset_dict(zarr_kwargs={"consolidated": True}, storage_options={"anon": True})
# dsets
# ds = dsets['atm.historical.cam.h0.smbb.SST']

In [None]:
# for i in range(50,len(member_id_list)):
for i in range(0,50):
# for i in range(5):
    subset = coll_orig.search(component='atm',variable='SST',frequency='month_1',experiment='historical',member_id= str(member_id_list[i]))
    dsets = subset.to_dataset_dict(zarr_kwargs={"consolidated": True}, storage_options={"anon": True})
    ds = dsets['atm.historical.cam.h0.cmip6.SST'] # before 50
    # ds = dsets['atm.historical.cam.h0.smbb.SST'] # after 50 # Ask Liz
    SST = ds.SST.isel(member_id=0)
    SST.load()
    
    ###### DETRENDING 
    # ------------------------
    # Would be removing ENSO and teleconnections here
    # Discuss ways to do this
    # ------------------------
    # last 40 years (satellite period)
    dyr = SST.time.dt.year + (SST.time.dt.month-0.5)/12
    dyr = dyr[-481:] # can remove this line if you want to detrend across the entire period
    # Our 6 coefficient model is composed of the mean, trend, annual sine and cosine harmonics, & semi-annual sine and cosine harmonics
    model = np.array([np.ones(len(dyr))] + [dyr-np.mean(dyr)] + [np.sin(2*np.pi*dyr)] + [np.cos(2*np.pi*dyr)] + [np.sin(4*np.pi*dyr)] + [np.cos(4*np.pi*dyr)])
    # Take the pseudo-inverse of model to 'solve' least-squares problem
    pmodel = np.linalg.pinv(model)
    model_da = xr.DataArray(model.T, dims=['time','coeff'], coords={'time':SST.time.values[-481:], 'coeff':np.arange(1,7,1)}) 
    pmodel_da = xr.DataArray(pmodel.T, dims=['coeff','time'], coords={'coeff':np.arange(1,7,1), 'time':SST.time.values[-481:]})
    # resulting coefficients of the model
    sst_mod = xr.DataArray(pmodel_da.dot(SST), dims=['coeff','lat','lon'], coords={'coeff':np.arange(1,7,1), 'lat':SST.lat.values, 'lon':SST.lon.values})
    # Construct mean, trend, and seasonal cycle
    mean = model_da[:,0].dot(sst_mod[0,:,:])
    trend = model_da[:,1].dot(sst_mod[1,:,:])
    seas = model_da[:,2:].dot(sst_mod[2:,:,:])
    # compute anomalies by removing all  the model coefficients 
    ssta_notrend = SST-model_da.dot(sst_mod) #this is anomalies
    detrended = ssta_notrend
    detrended.to_netcdf('/glade/work/cassiacai/'+str(member_id_list[i])+'_detrended.nc')
    
    ###### THRESHOLD and FEATURES
    if detrended.chunks:
        detrended = detrended.chunk({'time': -1})
    threshold = detrended.groupby('time.month').quantile(0.9,dim=('time')) 
    features_ssta = detrended.where(detrended.groupby('time.month')>=threshold, other=np.nan)
    features_ssta = features_ssta[:,:,:].load()
    ##### MASKING
    full_mask_land = features_ssta
    full_masked = full_mask_land.where(full_mask_land != 0)
    binary_out_afterlandmask=np.isfinite(full_masked)
    
    newmask = np.isfinite(ds.SST[0,:,:,:][-481:])
    
    Tracker = ocetrac.Tracker(binary_out_afterlandmask[:,:,:], newmask, radius=3, min_size_quartile=0.75, timedim = 'time', xdim = 'lon', ydim='lat', positive=True)
    # we define the minimum radius above as well as the minimum size quartile
    blobs = Tracker.track()
    blobs.attrs
    mo = Tracker._morphological_operations()
    blobs.to_netcdf('/glade/work/cassiacai/'+str(member_id_list[i])+'_rad3_blobs.nc')