In [39]:
from copy import copy
import math
from pathlib import Path
import time

import arrow
import xarray

In [2]:
green_1905 = Path("/results2/SalishSea/nowcast-green.201905")

In [3]:
chunks = {
    "time_counter": 24,
    "deptht": 40,
    "y": 898,
    "x": 398,
}
drop_vars = {
    "axis_nbounds",
    "nvertex",
    "bounds_lon",
    "bounds_lat",
    "area",
    "deptht_bounds",
    "time_centered",
    "time_centered_bounds",
    "time_counter_bounds",
}

In [4]:
day = arrow.get("2014-01-01")
ddmmmyy = day.format('DDMMMYY').lower()
yyyymmdd = day.format('YYYYMMDD')
tracers = "ptrc_T"
ds_file = green_1905/f"{ddmmmyy}"/f"SalishSea_1h_{yyyymmdd}_{yyyymmdd}_{tracers}.nc"
ds_file

PosixPath('/results2/SalishSea/nowcast-green.201905/01jan14/SalishSea_1h_20140101_20140101_ptrc_T.nc')

In [5]:
t_start = time.time()
ds = xarray.open_dataset(ds_file, chunks=chunks, drop_variables=drop_vars)
print(f"load metadata via open_dataset(): {time.time() - t_start} s")

load metadata via open_dataset(): 0.13358736038208008 s


In [6]:
print(
    f"chunk size: {math.prod(chunks.values()) * ds[list(ds.data_vars)[0]].dtype.itemsize /1024/1024/1024} Gb"
)

chunk size: 1.2781763076782227 Gb


In [7]:
t_start = time.time()
day_avgs = ds.resample(time_counter="D").mean(
    dim="time_counter", skipna=True, keep_attrs=True
)
print(f"calc day averages: {time.time() - t_start} s")

calc day averages: 0.09916901588439941 s


In [8]:
day_avgs

Unnamed: 0,Array,Chunk
Bytes,1.43 MB,1.43 MB
Shape,"(898, 398)","(898, 398)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.43 MB 1.43 MB Shape (898, 398) (898, 398) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",398  898,

Unnamed: 0,Array,Chunk
Bytes,1.43 MB,1.43 MB
Shape,"(898, 398)","(898, 398)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.43 MB,1.43 MB
Shape,"(898, 398)","(898, 398)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.43 MB 1.43 MB Shape (898, 398) (898, 398) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",398  898,

Unnamed: 0,Array,Chunk
Bytes,1.43 MB,1.43 MB
Shape,"(898, 398)","(898, 398)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.18 MB 57.18 MB Shape (1, 40, 898, 398) (1, 40, 898, 398) Count 6 Tasks 1 Chunks Type float32 numpy.ndarray",1  1  398  898  40,

Unnamed: 0,Array,Chunk
Bytes,57.18 MB,57.18 MB
Shape,"(1, 40, 898, 398)","(1, 40, 898, 398)"
Count,6 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [9]:
t_start = time.time()
day_avgs.load()
#day_avgs.load(scheduler="processes", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

  x = np.divide(x1, x2, out)


calc the task graph: 65.76064157485962 s


In [9]:
num_workers = 8
t_start = time.time()
day_avgs.load(scheduler="threads", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

  x = np.divide(x1, x2, out)


calc the task graph: 67.3717873096466 s


In [9]:
num_workers = 16
t_start = time.time()
day_avgs.load(scheduler="processes", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

calc the task graph: 16.627882480621338 s


In [9]:
num_workers = 12
t_start = time.time()
day_avgs.load(scheduler="processes", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

calc the task graph: 16.90312123298645 s


In [9]:
num_workers = 10
t_start = time.time()
day_avgs.load(scheduler="processes", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

calc the task graph: 22.425220251083374 s


In [9]:
num_workers = 8
t_start = time.time()
day_avgs.load(scheduler="processes", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

calc the task graph: 29.264289140701294 s


In [9]:
num_workers = 6
t_start = time.time()
day_avgs.load(scheduler="processes", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

calc the task graph: 28.968526124954224 s


In [9]:
num_workers = 5
t_start = time.time()
day_avgs.load(scheduler="processes", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

calc the task graph: 36.87586212158203 s


In [9]:
num_workers = 4
t_start = time.time()
day_avgs.load(scheduler="processes", num_workers=num_workers)
print(f"calc the task graph: {time.time() - t_start} s")

calc the task graph: 39.031991720199585 s


In [35]:
day_avgs.time_counter.attrs.update({
    "time_origin": ds.time_counter.attrs["time_origin"],
    "standard_name": ds.time_counter.attrs["standard_name"],
    "long_name": ds.time_counter.attrs["long_name"],
    "_NoFill": "true",
})

In [10]:
encoding = {
    var: {
        "zlib": True, 
        "chunksizes": [1, 40, 898, 398]
    } for var in day_avgs.data_vars
}
encoding["time_counter"] = {
    "dtype": "d",
    "calendar": "gregorian",
    "units": "seconds since 1900-01-01 00:00:00",
    "chunksizes": [1],
    "_FillValue": None,
}
t_start = time.time()
day_avgs.to_netcdf(Path("/tmp")/f"SalishSea_1d_{yyyymmdd}_{yyyymmdd}_{tracers}_dask.nc", encoding=encoding)
print(f"write day avgs to .nc file: {time.time() - t_start} s")

write day avgs to .nc file: 9.716030359268188 s


In [43]:
in_chunks = {
    "time_counter": 24,
    "deptht": 40,
    "y": 898,
    "x": 398,
}
out_chunks = {
    "time_counter": 1,
    "deptht": 40,
    "y": 898,
    "x": 398,
}
out_chunks_4d = list(out_chunks.values())
out_chunks_3d = copy(out_chunks_4d)
out_chunks_3d.remove(out_chunks["deptht"])
drop_vars = {
    "axis_nbounds",
    "nvertex",
    "bounds_lon",
    "bounds_lat",
    "area",
    "deptht_bounds",
    "time_centered",
    "time_centered_bounds",
    "time_counter_bounds",
}
tracer_groups = ("grid_T", "carp_T", "ptrc_T")
green_1905 = Path("/results2/SalishSea/nowcast-green.201905")

In [45]:
start_date, end_date = arrow.get("2014-01-01"), arrow.get("2014-01-02")
num_workers = 12

In [46]:
t_start_total = time.time()
for day in arrow.Arrow.range("day", start_date, end_date):
    t_start_day = time.time()
    ddmmmyy = day.format('DDMMMYY').lower()
    yyyymmdd = day.format('YYYYMMDD')
    for tracer_group in tracer_groups:
        t_start_tracer = time.time()
        hour_avgs_file = green_1905/f"{ddmmmyy}"/f"SalishSea_1h_{yyyymmdd}_{yyyymmdd}_{tracer_group}.nc"
        hour_avgs = xarray.open_dataset(hour_avgs_file, chunks=in_chunks, drop_variables=drop_vars)
        day_avgs = hour_avgs.resample(time_counter="D").mean(
            dim="time_counter", skipna=True, keep_attrs=True
        )
        day_avgs.load(scheduler="processes", num_workers=num_workers)
        day_avgs.time_counter.attrs.update({
            "time_origin": hour_avgs.time_counter.attrs["time_origin"],
            "standard_name": hour_avgs.time_counter.attrs["standard_name"],
            "long_name": hour_avgs.time_counter.attrs["long_name"],
            "_NoFill": "true",
        })
        day_avgs_file = green_1905/f"{ddmmmyy}"/f"SalishSea_1d_{yyyymmdd}_{yyyymmdd}_{tracer_group}.nc"
        encoding = {
            var: {
                "zlib": True, 
                "chunksizes": out_chunks_4d if day_avgs[var].ndim == 4 else out_chunks_3d
            } for var in day_avgs.data_vars
        }
        encoding["time_counter"] = {
            "dtype": "d",
            "calendar": "gregorian",
            "units": "seconds since 1900-01-01 00:00:00",
            "chunksizes": [1],
            "_FillValue": None,
        }
        day_avgs.to_netcdf(day_avgs_file, encoding=encoding)
        print(f"{ddmmmyy} {tracer_group}: {time.time() - t_start_tracer} s")
    print(f"{ddmmmyy}: {time.time() - t_start_day} s")
print(f"total: {time.time() - t_start_total} s")

01jan14 grid_T: 14.716418266296387 s
01jan14 carp_T: 20.743958950042725 s
01jan14 ptrc_T: 25.615475177764893 s
01jan14: 61.07711887359619 s
02jan14 grid_T: 17.750022888183594 s
02jan14 carp_T: 20.65268349647522 s
02jan14 ptrc_T: 26.809048175811768 s
02jan14: 65.21391320228577 s
total: 126.29753756523132 s
