In [1]:
import xarray
import climtas

xarray.__version__, climtas.__version__

('0.16.1', '0.2.5+11.g217faec')

In [2]:
ls /g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_*.nc

[0m[01;32m/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190101_20190131.nc[0m*
[01;32m/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190201_20190228.nc[0m*
[01;32m/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190301_20190331.nc[0m*
[01;32m/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190401_20190430.nc[0m*
[01;32m/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190501_20190531.nc[0m*
/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190601_20190630.nc
/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190701_20190731.nc
/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190801_20190831.nc
/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190901_20190930.nc
/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20191001_20191031.nc
/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20191101_20191130.nc
/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20191201_2019123

### Function to benchmark

We'll look at a simple function to start off with, that just returns the mean of the data

In [3]:
def func(da):    
    return da.mean()

## Running Once

Just running it once gives a basic idea of performance. I've used the chunking in the NetCDF file as a starting point.

In [4]:
climtas.profile.profile_once('/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190101_*.nc',
                             variable='t2m',
                             function=func,
                             chunks={'time':93, 'latitude': 91, 'longitude': 180})

{'time_total': 7.717563376761973,
 'time_open': 0.020742579828947783,
 'time_function': 0.003424877766519785,
 'time_optimize': 0.01196580519899726,
 'time_load': 7.677215476054698,
 'chunks': {'time': 93, 'latitude': 91, 'longitude': 180},
 'nchunks_in': 512,
 'nchunks_out': 1,
 'chunksize_in': '6.09 MB',
 'chunksize_out': '4 B',
 'tasks_in': 513,
 'tasks_out': 1098,
 'tasks_optimized': 1098}

### Benchmarking

Now let's benchmark with a few different chunk shapes. The function gets run three times with each of the chunk options and the minimum time taken is returned

In [5]:
climtas.profile.benchmark('/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_20190101_*.nc',
                          variable='t2m',
                          function=func,
                          chunks={'time':[93, 93, 93], 'latitude': [91, 91*2, 91], 'longitude': [180, 180*2, 180//2]})

Unnamed: 0,time_total,time_open,time_function,time_optimize,time_load,chunks,nchunks_in,nchunks_out,chunksize_in,chunksize_out,tasks_in,tasks_out,tasks_optimized
0,8.344763,0.011151,0.003014,0.010418,8.316582,"{'time': 93, 'latitude': 91, 'longitude': 180}",512,1,6.09 MB,4 B,513,1098,1098
1,8.237521,0.011947,0.001875,0.002899,8.213991,"{'time': 93, 'latitude': 182, 'longitude': 360}",128,1,24.37 MB,4 B,129,276,276
2,11.88077,0.008398,0.004415,0.020437,11.834581,"{'time': 93, 'latitude': 91, 'longitude': 90}",1024,1,3.05 MB,4 B,1025,2196,2196


### Distributed

Let's switch to Dask's distributed mode, and process a whole year of data on 4 cpus

In [6]:
import climtas.nci
client = climtas.nci.GadiClient()
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 38883 instead


0,1
Client  Scheduler: tcp://127.0.0.1:35741  Dashboard: /proxy/38883/status,Cluster  Workers: 4  Cores: 4  Memory: 17.18 GB


In [7]:
climtas.profile.benchmark('/g/data/ub4/era5/netcdf/surface/t2m/2019/t2m_era5_global_*.nc',
                          variable='t2m',
                          function=func,
                          chunks={'time':[93, 93, 93], 'latitude': [91, 91*2, 91], 'longitude': [180, 180*2, 180//2]})

Unnamed: 0,time_total,time_open,time_function,time_optimize,time_load,chunks,nchunks_in,nchunks_out,chunksize_in,chunksize_out,tasks_in,tasks_out,tasks_optimized
0,59.922452,0.119471,0.018615,0.333104,58.995246,"{'time': 93, 'latitude': 91, 'longitude': 180}",6144,1,6.09 MB,4 B,12300,19332,19332
1,46.908512,0.172809,0.006854,0.056711,46.660048,"{'time': 93, 'latitude': 182, 'longitude': 360}",1536,1,24.37 MB,4 B,3084,4860,4860
2,95.761346,0.213163,0.034142,0.829632,93.819402,"{'time': 93, 'latitude': 91, 'longitude': 90}",12288,1,3.05 MB,4 B,24588,38640,38640
