# S2HC_mask_ETBFcrop

**Date:** <br>
1 June 2022 <br>
**Background:** <br>
Issue -  <br>
**Author(s):**<br>
Thomas Moore<br>

### OOD documentation
https://opus.nci.org.au/display/DAE/Setting+up+a+Dask+Cluster+on+OOD

In [1]:
Author1 = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

In [2]:
import xarray as xr
import numpy as np
import xrft
import xesmf as xe
import scipy
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import os
import re
import cartopy.crs as ccrs
import proplot as pplt
from rechunker import rechunk
%config Completer.use_jedi = False

## import helper

In [3]:
import importlib.util
spec = importlib.util.spec_from_file_location("helper", "/g/data/v14/tm4888/code/helper-py/helper_tools.py")
helper = importlib.util.module_from_spec(spec)
spec.loader.exec_module(helper)

## OOD cluster

In [4]:
from dask.distributed import Client,Scheduler
from dask_jobqueue import SLURMCluster
cluster = SLURMCluster(cores=2,processes=1,memory="47GB",walltime='03:00:00')
client = Client(cluster)
cluster.scale(cores=24)

  from distributed.utils import tmpfile


In [5]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.0.128.148:36479,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Load S2HC netcdf files for 2018-02-01 start date

In [47]:
path = '/g/data/xv83/users/tm4888/data/ACCESS-S2/hindcast/'

In [48]:
chunks_dict = {'y':50}

In [49]:
S2HC_TS = xr.open_mfdataset(path+'S2HC_T_S_emean_20180201.nc',chunks=chunks_dict)
S2HC_2D = xr.open_mfdataset(path+'S2HC_2D_emean_20180201.nc',chunks=chunks_dict)
S2HC_U = xr.open_mfdataset(path+'S2HC_U_emean_20180201.nc',chunks=chunks_dict)
S2HC_V = xr.open_mfdataset(path+'S2HC_V_emean_20180201.nc',chunks=chunks_dict)

In [50]:
S2HC_2D = S2HC_2D.drop_vars(['deptht_bounds','areat','lont_bounds','latt_bounds'])
S2HC_2D = S2HC_2D.squeeze()

# rename salt and temp in 2D

In [51]:
S2HC_2D = S2HC_2D.rename_vars({'salt':'sss','temp':'sst'})

In [52]:
S2HC_TSall = xr.merge([S2HC_2D,S2HC_TS])

In [53]:
S2HC_TSall

Unnamed: 0,Array,Chunk
Bytes,5.62 MiB,281.64 kiB
Shape,"(1021, 1442)","(50, 1442)"
Count,129 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.62 MiB 281.64 kiB Shape (1021, 1442) (50, 1442) Count 129 Tasks 21 Chunks Type float32 numpy.ndarray",1442  1021,

Unnamed: 0,Array,Chunk
Bytes,5.62 MiB,281.64 kiB
Shape,"(1021, 1442)","(50, 1442)"
Count,129 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.62 MiB,281.64 kiB
Shape,"(1021, 1442)","(50, 1442)"
Count,129 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.62 MiB 281.64 kiB Shape (1021, 1442) (50, 1442) Count 129 Tasks 21 Chunks Type float32 numpy.ndarray",1442  1021,

Unnamed: 0,Array,Chunk
Bytes,5.62 MiB,281.64 kiB
Shape,"(1021, 1442)","(50, 1442)"
Count,129 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,43 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 16.85 MiB 844.92 kiB Shape (3, 1021, 1442) (3, 50, 1442) Count 43 Tasks 21 Chunks Type float32 numpy.ndarray",1442  1021  3,

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,43 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,43 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 16.85 MiB 844.92 kiB Shape (3, 1021, 1442) (3, 50, 1442) Count 43 Tasks 21 Chunks Type float32 numpy.ndarray",1442  1021  3,

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,43 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 16.85 MiB 844.92 kiB Shape (3, 1021, 1442) (3, 50, 1442) Count 22 Tasks 21 Chunks Type float32 numpy.ndarray",1442  1021  3,

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 16.85 MiB 844.92 kiB Shape (3, 1021, 1442) (3, 50, 1442) Count 22 Tasks 21 Chunks Type float32 numpy.ndarray",1442  1021  3,

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 16.85 MiB 844.92 kiB Shape (3, 1021, 1442) (3, 50, 1442) Count 22 Tasks 21 Chunks Type float32 numpy.ndarray",1442  1021  3,

Unnamed: 0,Array,Chunk
Bytes,16.85 MiB,844.92 kiB
Shape,"(3, 1021, 1442)","(3, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.62 MiB,281.64 kiB
Shape,"(1021, 1442)","(50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.62 MiB 281.64 kiB Shape (1021, 1442) (50, 1442) Count 22 Tasks 21 Chunks Type float32 numpy.ndarray",1442  1021,

Unnamed: 0,Array,Chunk
Bytes,5.62 MiB,281.64 kiB
Shape,"(1021, 1442)","(50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,600 B,600 B
Shape,"(75, 2)","(75, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 600 B 600 B Shape (75, 2) (75, 2) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  75,

Unnamed: 0,Array,Chunk
Bytes,600 B,600 B
Shape,"(75, 2)","(75, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,22.47 MiB,1.10 MiB
Shape,"(1021, 1442, 4)","(50, 1442, 4)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 22.47 MiB 1.10 MiB Shape (1021, 1442, 4) (50, 1442, 4) Count 22 Tasks 21 Chunks Type float32 numpy.ndarray",4  1442  1021,

Unnamed: 0,Array,Chunk
Bytes,22.47 MiB,1.10 MiB
Shape,"(1021, 1442, 4)","(50, 1442, 4)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,22.47 MiB,1.10 MiB
Shape,"(1021, 1442, 4)","(50, 1442, 4)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 22.47 MiB 1.10 MiB Shape (1021, 1442, 4) (50, 1442, 4) Count 22 Tasks 21 Chunks Type float32 numpy.ndarray",4  1442  1021,

Unnamed: 0,Array,Chunk
Bytes,22.47 MiB,1.10 MiB
Shape,"(1021, 1442, 4)","(50, 1442, 4)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.23 GiB,61.88 MiB
Shape,"(3, 75, 1021, 1442)","(3, 75, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.23 GiB 61.88 MiB Shape (3, 75, 1021, 1442) (3, 75, 50, 1442) Count 22 Tasks 21 Chunks Type float32 numpy.ndarray",3  1  1442  1021  75,

Unnamed: 0,Array,Chunk
Bytes,1.23 GiB,61.88 MiB
Shape,"(3, 75, 1021, 1442)","(3, 75, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.23 GiB,61.88 MiB
Shape,"(3, 75, 1021, 1442)","(3, 75, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.23 GiB 61.88 MiB Shape (3, 75, 1021, 1442) (3, 75, 50, 1442) Count 22 Tasks 21 Chunks Type float32 numpy.ndarray",3  1  1442  1021  75,

Unnamed: 0,Array,Chunk
Bytes,1.23 GiB,61.88 MiB
Shape,"(3, 75, 1021, 1442)","(3, 75, 50, 1442)"
Count,22 Tasks,21 Chunks
Type,float32,numpy.ndarray


# rename coordinates

In [54]:
S2HC_TSall = S2HC_TSall.rename({'nav_lat':'lat','nav_lon':'lon','deptht':'depth'})
S2HC_U = S2HC_U.rename({'nav_lat':'lat','nav_lon':'lon','depthu':'depth'})
S2HC_V = S2HC_V.rename({'nav_lat':'lat','nav_lon':'lon','depthv':'depth'})

# Insert land masked NaN's for all variables.
## BoM ACCESS-S2 mask file isn't available so use where values == exactly zero. Goal is to replace pure zero with NaN

In [55]:
%%time
Tgrid_masked = S2HC_TSall.where(S2HC_TSall != 0)
Ugrid_masked = S2HC_U.where(S2HC_U != 0)
Vgrid_masked = S2HC_V.where(S2HC_V != 0)

CPU times: user 140 ms, sys: 9.18 ms, total: 150 ms
Wall time: 150 ms


# shift lon values

In [56]:
def extend_S2_lon_across180(real_lon):
    '''
    extend_S2_lon_across180
    Notes: ACCESS-S2 runs from left to right 72.75 to 73 through 180/-180. For this specific application we are trying to bridge the 180/-180 date
        line with increasing values.  This is simply to use a .where technique to slice the mulitdimensional coordinates over the Pacific.
    Returns: 
    Defaults:
    Author: Thomas Moore
    Date created: 21/02/2022
    Assumptions:
    Use:
    Limitations:
    '''
    return (real_lon % 360)

In [57]:
Tgrid_masked['lon'] = extend_S2_lon_across180(Tgrid_masked['lon'])
Ugrid_masked['lon'] = extend_S2_lon_across180(Ugrid_masked['lon'])
Vgrid_masked['lon'] = extend_S2_lon_across180(Vgrid_masked['lon'])

# Crop dataset to ETBF bounds
## Use the `where` method to deal with multidimensional coordinates
#### See SHOYER's comments here > https://stackoverflow.com/questions/41818927/how-to-subset-data-using-multidimensional-coordinates-using-python-xarray

## approach will be to pad out beyond the needed coordinates to eliminate edge effects after regridding - we'll "crop out the cropped data" after regridding
### required coordinates are: "Region crop" at Lon : 100-240 & Lat negative 60 - 5 to assist in memory issues with laptops. 
#### Here use where : Lon = 90 - 270 & Lat = -70 - 20

In [58]:
Tgrid_masked_crop = Tgrid_masked.where((90 < Tgrid_masked.lon) & (Tgrid_masked.lon < 270)
         & (-70 < Tgrid_masked.lat) & (Tgrid_masked.lat < 20), drop=True)
Ugrid_masked_crop = Ugrid_masked.where((90 < Ugrid_masked.lon) & (Ugrid_masked.lon < 270)
         & (-70 < Ugrid_masked.lat) & (Ugrid_masked.lat < 20), drop=True)
Vgrid_masked_crop = Vgrid_masked.where((90 < Vgrid_masked.lon) & (Vgrid_masked.lon < 270)
         & (-70 < Vgrid_masked.lat) & (Vgrid_masked.lat < 20), drop=True)

# How big are these datasets in GB?

In [59]:
Tgrid_masked_crop.nbytes/1e9

0.862380436

In [60]:
Ugrid_masked_crop.nbytes/1e9

0.533874572

In [61]:
Vgrid_masked_crop.nbytes/1e9

0.534246092

# export into intermediate NetCDF collections

In [64]:
%%time
Tgrid_masked_crop.to_netcdf(path+'accessS2.HC.ocean.masked.AUSWCPregion.nativeTgrid.nc')

CPU times: user 8.07 s, sys: 858 ms, total: 8.93 s
Wall time: 36.3 s


In [65]:
%%time
Ugrid_masked_crop.to_netcdf(path+'accessS2.HC.ocean.masked.AUSWCPregion.nativeUgrid.nc')

CPU times: user 3.44 s, sys: 334 ms, total: 3.77 s
Wall time: 16.9 s


In [66]:
%%time
Vgrid_masked_crop.to_netcdf(path+'accessS2.HC.ocean.masked.AUSWCPregion.nativeVgrid.nc')

CPU times: user 3.49 s, sys: 361 ms, total: 3.85 s
Wall time: 18.7 s


# $ The\ End$

# Break glass in case of emergency
# $\Downarrow$

In [None]:
client.restart()

tornado.application - ERROR - Exception in callback functools.partial(<bound method IOLoop._discard_future_result of <zmq.eventloop.ioloop.ZMQIOLoop object at 0x7fcb04415430>>, <Task finished name='Task-304880' coro=<Scheduler.restart() done, defined at /g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/scheduler.py:5885> exception=CommClosedError("Exception while trying to call remote method 'restart' before comm was established.")>)
Traceback (most recent call last):
  File "/g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/comm/tcp.py", line 205, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/core.py", line 819, in send_recv_from_rpc
    result = awa

In [67]:
client.shutdown()

In [93]:
client.restart()

tornado.application - ERROR - Exception in callback functools.partial(<bound method IOLoop._discard_future_result of <zmq.eventloop.ioloop.ZMQIOLoop object at 0x7fe51c7dee20>>, <Task finished name='Task-346239' coro=<Scheduler.restart() done, defined at /g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/scheduler.py:5885> exception=CommClosedError("Exception while trying to call remote method 'restart' before comm was established.")>)
Traceback (most recent call last):
  File "/g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/comm/tcp.py", line 205, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/g/data/v14/tm4888/miniconda3/envs/pangeo_hpc/lib/python3.9/site-packages/distributed/core.py", line 819, in send_recv_from_rpc
    result = awa

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 11
Total threads: 22,Total memory: 481.47 GiB

0,1
Comm: tcp://10.0.128.149:42147,Workers: 11
Dashboard: /proxy/8787/status,Total threads: 22
Started: 1 hour ago,Total memory: 481.47 GiB

0,1
Comm: tcp://10.0.128.9:35059,Total threads: 2
Dashboard: /proxy/40227/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.9:44897,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-n0dt3n65,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-n0dt3n65

0,1
Comm: tcp://10.0.128.7:35901,Total threads: 2
Dashboard: /proxy/41961/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.7:45049,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-snvq9vg2,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-snvq9vg2

0,1
Comm: tcp://10.0.128.12:33729,Total threads: 2
Dashboard: /proxy/46367/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.12:33669,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-2poqf7z0,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-2poqf7z0

0,1
Comm: tcp://10.0.128.14:45411,Total threads: 2
Dashboard: /proxy/33759/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.14:41843,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-idxkm2ir,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-idxkm2ir

0,1
Comm: tcp://10.0.128.16:37973,Total threads: 2
Dashboard: /proxy/40937/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.16:42981,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-qng70e1p,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-qng70e1p

0,1
Comm: tcp://10.0.128.11:34215,Total threads: 2
Dashboard: /proxy/45663/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.11:44087,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-ffrnesdi,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-ffrnesdi

0,1
Comm: tcp://10.0.128.13:40365,Total threads: 2
Dashboard: /proxy/40843/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.13:39797,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-nj2xdumv,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-nj2xdumv

0,1
Comm: tcp://10.0.128.15:45861,Total threads: 2
Dashboard: /proxy/41171/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.15:36019,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-d6v2g7u1,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-d6v2g7u1

0,1
Comm: tcp://10.0.128.10:45881,Total threads: 2
Dashboard: /proxy/34089/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.10:45227,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-947_7mvn,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-947_7mvn

0,1
Comm: tcp://10.0.128.6:35023,Total threads: 2
Dashboard: /proxy/34301/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.6:36677,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-tzidkl_u,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-tzidkl_u

0,1
Comm: tcp://10.0.128.8:45433,Total threads: 2
Dashboard: /proxy/33907/status,Memory: 43.77 GiB
Nanny: tcp://10.0.128.8:34905,
Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-1nsn55ir,Local directory: /local/v14/tm4888/tmp/dask-worker-space/worker-1nsn55ir
