# S2HC_mask_ETBFcrop

**Date:** <br>
1 June 2022 <br>
**Background:** <br>
Issue -  <br>
**Author(s):**<br>
Thomas Moore<br>

### OOD documentation
https://opus.nci.org.au/display/DAE/Setting+up+a+Dask+Cluster+on+OOD

In [1]:
Author1 = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

In [2]:
import xarray as xr
import numpy as np
import xrft
import xesmf as xe
import scipy
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import os
import re
import cartopy.crs as ccrs
import proplot as pplt
from rechunker import rechunk
%config Completer.use_jedi = False

## import helper

In [3]:
import importlib.util
spec = importlib.util.spec_from_file_location("helper", "/g/data/v14/tm4888/code/helper-py/helper_tools.py")
helper = importlib.util.module_from_spec(spec)
spec.loader.exec_module(helper)

## OOD cluster

In [4]:
from dask.distributed import Client,Scheduler
from dask_jobqueue import SLURMCluster
cluster = SLURMCluster(cores=2,processes=1,memory="47GB",walltime='03:00:00')
client = Client(cluster)
cluster.scale(cores=24)

  from distributed.utils import tmpfile


In [7]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 12
Total threads: 24,Total memory: 525.24 GiB

0,1
Comm: tcp://10.0.128.166:34583,Workers: 12
Dashboard: /proxy/8787/status,Total threads: 24
Started: Just now,Total memory: 525.24 GiB

0,1
Comm: tcp://10.0.128.20:46305,Total threads: 2
Dashboard: /proxy/41415/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.20:39445,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-kgactz5w,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-kgactz5w

0,1
Comm: tcp://10.0.128.19:38789,Total threads: 2
Dashboard: /proxy/35109/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.19:35143,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-1iht1ovl,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-1iht1ovl

0,1
Comm: tcp://10.0.128.18:44053,Total threads: 2
Dashboard: /proxy/42355/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.18:34647,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-2o3m601r,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-2o3m601r

0,1
Comm: tcp://10.0.128.15:34459,Total threads: 2
Dashboard: /proxy/37697/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.15:44979,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-u4mmll8w,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-u4mmll8w

0,1
Comm: tcp://10.0.128.22:44047,Total threads: 2
Dashboard: /proxy/44789/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.22:42759,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-8m_0cofn,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-8m_0cofn

0,1
Comm: tcp://10.0.128.23:37265,Total threads: 2
Dashboard: /proxy/33131/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.23:46065,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-ecq3pkx2,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-ecq3pkx2

0,1
Comm: tcp://10.0.128.16:43531,Total threads: 2
Dashboard: /proxy/33533/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.16:42977,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-1vaeiq6x,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-1vaeiq6x

0,1
Comm: tcp://10.0.128.12:33775,Total threads: 2
Dashboard: /proxy/41781/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.12:37503,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-tn3ppbut,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-tn3ppbut

0,1
Comm: tcp://10.0.128.14:44657,Total threads: 2
Dashboard: /proxy/46619/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.14:45233,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-4e59ciba,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-4e59ciba

0,1
Comm: tcp://10.0.128.13:36291,Total threads: 2
Dashboard: /proxy/39755/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.13:41211,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-2oocvajb,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-2oocvajb

0,1
Comm: tcp://10.0.128.17:40613,Total threads: 2
Dashboard: /proxy/46239/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.17:35063,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-_1aee88p,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-_1aee88p

0,1
Comm: tcp://10.0.128.21:41243,Total threads: 2
Dashboard: /proxy/41781/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.21:38351,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-9jw3fodg,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-9jw3fodg


# Load S2HC netcdf files for 2018-02-01 start date

In [80]:
path = '/g/data/xv83/users/tm4888/data/ACCESS-S2/hindcast/'

In [81]:
S2HC_TS = xr.open_dataset(path+'S2HC_T_S_emean_20180201.nc')
S2HC_2D = xr.open_dataset(path+'S2HC_2D_emean_20180201.nc')
S2HC_U = xr.open_dataset(path+'S2HC_U_emean_20180201.nc')
S2HC_V = xr.open_dataset(path+'S2HC_V_emean_20180201.nc')

In [82]:
S2HC_2D = S2HC_2D.drop_vars(['deptht_bounds','areat','lont_bounds','latt_bounds'])
S2HC_2D = S2HC_2D.squeeze()

# rename salt and temp in 2D

In [83]:
S2HC_2D = S2HC_2D.rename_vars({'salt':'sss','temp':'sst'})

In [85]:
S2HC_TSall = xr.merge([S2HC_2D,S2HC_TS])

In [86]:
S2HC_TSall

# rename coordinates

In [87]:
S2HC_TSall = S2HC_TSall.rename({'nav_lat':'lat','nav_lon':'lon','deptht':'depth'})
S2HC_U = S2HC_U.rename({'nav_lat':'lat','nav_lon':'lon','depthu':'depth'})
S2HC_V = S2HC_V.rename({'nav_lat':'lat','nav_lon':'lon','depthv':'depth'})

# Insert land masked NaN's for all variables.
## BoM ACCESS-S2 mask file isn't available so use where values == exactly zero. Goal is to replace pure zero with NaN

In [88]:
%%time
Tgrid_masked = S2HC_TSall.where(S2HC_TSall != 0)
Ugrid_masked = S2HC_U.where(S2HC_U != 0)
Vgrid_masked = S2HC_V.where(S2HC_V != 0)

CPU times: user 7.9 s, sys: 27.2 s, total: 35.1 s
Wall time: 48.8 s


# shift lon values

In [89]:
def extend_S2_lon_across180(real_lon):
    '''
    extend_S2_lon_across180
    Notes: ACCESS-S2 runs from left to right 72.75 to 73 through 180/-180. For this specific application we are trying to bridge the 180/-180 date
        line with increasing values.  This is simply to use a .where technique to slice the mulitdimensional coordinates over the Pacific.
    Returns: 
    Defaults:
    Author: Thomas Moore
    Date created: 21/02/2022
    Assumptions:
    Use:
    Limitations:
    '''
    return (real_lon % 360)

In [90]:
Tgrid_masked['lon'] = extend_S2_lon_across180(Tgrid_masked['lon'])
Ugrid_masked['lon'] = extend_S2_lon_across180(Ugrid_masked['lon'])
Vgrid_masked['lon'] = extend_S2_lon_across180(Vgrid_masked['lon'])

# Crop dataset to ETBF bounds
## Use the `where` method to deal with multidimensional coordinates
#### See SHOYER's comments here > https://stackoverflow.com/questions/41818927/how-to-subset-data-using-multidimensional-coordinates-using-python-xarray

## approach will be to pad out beyond the needed coordinates to eliminate edge effects after regridding - we'll "crop out the cropped data" after regridding
### required coordinates are: "Region crop" at Lon : 100-240 & Lat negative 60 - 5 to assist in memory issues with laptops. 
#### Here use where : Lon = 90 - 270 & Lat = -70 - 20

In [None]:
Tgrid_masked_crop = Tgrid_masked.where((90 < Tgrid_masked.lon) & (Tgrid_masked.lon < 270)
         & (-70 < Tgrid_masked.lat) & (Tgrid_masked.lat < 20), drop=True)
Ugrid_masked_crop = Ugrid_masked.where((90 < Ugrid_masked.lon) & (Ugrid_masked.lon < 270)
         & (-70 < Ugrid_masked.lat) & (Ugrid_masked.lat < 20), drop=True)
Vgrid_masked_crop = Vgrid_masked.where((90 < Vgrid_masked.lon) & (Vgrid_masked.lon < 270)
         & (-70 < Vgrid_masked.lat) & (Vgrid_masked.lat < 20), drop=True)

# How big are these datasets in GB?

In [None]:
Tgrid_masked_crop.nbytes/1e9

In [53]:
Ugrid_masked_crop.nbytes/1e9

0.533874572

In [54]:
Vgrid_masked_crop.nbytes/1e9

0.534246092

# Rechunk U & V for all depths and space in a chunk
## ToDo: make this robust for changing regions - automatically set chunk sizes.  Currently set manually based on size of arrays after crop.

In [16]:
Ugrid_masked_crop = Ugrid_masked_crop.chunk({'x':720,'y':479,'time':1,'depth':75})

In [17]:
Vgrid_masked_crop = Vgrid_masked_crop.chunk({'x':719,'y':480,'time':1,'depth':75})

# export into intermediate zarr collections

In [18]:
%%time
Tgrid_masked_crop.to_zarr(store = '/g/data/v14/tm4888/data/ACCESS-S2/accessS2.RA.ocean.masked.AUSWCPregion.nativeTgrid.zarr',consolidated=True)

CPU times: user 11.5 s, sys: 1.34 s, total: 12.8 s
Wall time: 36.4 s


<xarray.backends.zarr.ZarrStore at 0x7f315d6ec510>

In [19]:
%%time
Ugrid_masked_crop.to_zarr(store = '/g/data/v14/tm4888/data/ACCESS-S2/accessS2.RA.ocean.masked.AUSWCPregion.nativeUgrid.zarr',consolidated=True)

CPU times: user 1min 5s, sys: 6.8 s, total: 1min 12s
Wall time: 2min 31s


<xarray.backends.zarr.ZarrStore at 0x7f314e735190>

In [20]:
%%time
Vgrid_masked_crop.to_zarr(store = '/g/data/v14/tm4888/data/ACCESS-S2/accessS2.RA.ocean.masked.AUSWCPregion.nativeVgrid.zarr',consolidated=True)

CPU times: user 58.6 s, sys: 5.4 s, total: 1min 4s
Wall time: 1min 57s


<xarray.backends.zarr.ZarrStore at 0x7f31407f5040>

# $ The\ End$

# Break glass in case of emergency
# $\Downarrow$

In [None]:
client.restart()

tornado.application - ERROR - Exception in callback functools.partial(<bound method IOLoop._discard_future_result of <zmq.eventloop.ioloop.ZMQIOLoop object at 0x7fcb04415430>>, <Task finished name='Task-304880' coro=<Scheduler.restart() done, defined at /g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/scheduler.py:5885> exception=CommClosedError("Exception while trying to call remote method 'restart' before comm was established.")>)
Traceback (most recent call last):
  File "/g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/comm/tcp.py", line 205, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/core.py", line 819, in send_recv_from_rpc
    result = awa

In [21]:
client.shutdown()

distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
asyncio.exceptions.CancelledError


In [93]:
client.restart()

tornado.application - ERROR - Exception in callback functools.partial(<bound method IOLoop._discard_future_result of <zmq.eventloop.ioloop.ZMQIOLoop object at 0x7fe51c7dee20>>, <Task finished name='Task-346239' coro=<Scheduler.restart() done, defined at /g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/scheduler.py:5885> exception=CommClosedError("Exception while trying to call remote method 'restart' before comm was established.")>)
Traceback (most recent call last):
  File "/g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/comm/tcp.py", line 205, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/core.py", line 819, in send_recv_from_rpc
    result = awa

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 11
Total threads: 22,Total memory: 481.47 GiB

0,1
Comm: tcp://10.0.128.149:42147,Workers: 11
Dashboard: /proxy/8787/status,Total threads: 22
Started: 1 hour ago,Total memory: 481.47 GiB

0,1
Comm: tcp://10.0.128.9:35059,Total threads: 2
Dashboard: /proxy/40227/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.9:44897,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-n0dt3n65,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-n0dt3n65

0,1
Comm: tcp://10.0.128.7:35901,Total threads: 2
Dashboard: /proxy/41961/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.7:45049,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-snvq9vg2,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-snvq9vg2

0,1
Comm: tcp://10.0.128.12:33729,Total threads: 2
Dashboard: /proxy/46367/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.12:33669,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-2poqf7z0,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-2poqf7z0

0,1
Comm: tcp://10.0.128.14:45411,Total threads: 2
Dashboard: /proxy/33759/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.14:41843,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-idxkm2ir,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-idxkm2ir

0,1
Comm: tcp://10.0.128.16:37973,Total threads: 2
Dashboard: /proxy/40937/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.16:42981,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-qng70e1p,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-qng70e1p

0,1
Comm: tcp://10.0.128.11:34215,Total threads: 2
Dashboard: /proxy/45663/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.11:44087,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-ffrnesdi,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-ffrnesdi

0,1
Comm: tcp://10.0.128.13:40365,Total threads: 2
Dashboard: /proxy/40843/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.13:39797,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-nj2xdumv,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-nj2xdumv

0,1
Comm: tcp://10.0.128.15:45861,Total threads: 2
Dashboard: /proxy/41171/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.15:36019,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-d6v2g7u1,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-d6v2g7u1

0,1
Comm: tcp://10.0.128.10:45881,Total threads: 2
Dashboard: /proxy/34089/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.10:45227,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-947_7mvn,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-947_7mvn

0,1
Comm: tcp://10.0.128.6:35023,Total threads: 2
Dashboard: /proxy/34301/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.6:36677,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-tzidkl_u,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-tzidkl_u

0,1
Comm: tcp://10.0.128.8:45433,Total threads: 2
Dashboard: /proxy/33907/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.8:34905,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-1nsn55ir,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-1nsn55ir
