In [1]:
from dask_jobqueue import SLURMCluster
from dask.distributed import Client
from dask import array as da
import numpy as np
from scipy import ndimage
import os
import stat

In [7]:
local_directory="/project/bioinformatics/Danuser_lab/Dean/dean/dask_temp/"
os.makedirs(local_directory, exist_ok=True)

In [None]:

cluster_kwargs = {
    'cores': 40, # Number of threads per worker (utilizing cores within each process)
    'processes': 1, # Number of Python processes/worker.
    'memory': '220GB',
    'local_directory': local_directory,
    'interface': 'ib0', # Ensures internal worker scheduler comms use ib0
    'walltime': "36:00:00",
    'job_name': "multinode_warp",
    'queue': "256GB",       # 32GB, 128GB, 512GB Queue/partition name
    'death_timeout': "600s",
    'job_extra_directives': [
        "--nodes=1", # Each worker uses one node.
        "--ntasks=1", # Number of tasks each job launches.
        "--mail-type=FAIL",
        "--mail-user=kevin.dean@utsouthwestern.edu",
        "-o job_%j.out",
        "-e job_%j.err",
    ],
    'scheduler_options': {
        "dashboard_address": ":8788",
    }
}

try:
    cluster = SLURMCluster(**cluster_kwargs)
    cluster.scale(4)
    client = Client(cluster)

    # Wait for the workers...
    # client.wait_for_workers(8, timeout=300)
    print(f"Connected workers: {len(client.scheduler_info()['workers'])}")

except Exception as e:
    print(f"Error connecting to cluster: {e}")
    raise

# Location of the data.
base_path = "/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling"
data_path = os.path.join(base_path, "cell5_fused_tp_0_ch_0.zarr")
save_path = os.path.join(base_path, 'example_4.zarr')

# Load the Zarr file with Dask
dask_data = da.from_zarr(data_path, component='0/0')
data_shape = dask_data.shape

# Eliminate singleton dimensions, and rechunk the data.
dask_data = dask_data.squeeze()
dask_data = dask_data.rechunk((32, 64, 64))

# Difference of Gaussian Filtering.
high_pass_filtered = dask_data.map_overlap(
    ndimage.gaussian_filter, sigma=3, order=0, mode="nearest", depth=40)

low_pass_filtered = dask_data.map_overlap(
    ndimage.gaussian_filter, sigma=10, order=0, mode="nearest", depth=40)

dog_filtered = da.map_blocks(
    np.subtract, high_pass_filtered, low_pass_filtered)

# Save the filtered data to a Zarr file
dog_filtered.to_zarr(save_path, overwrite=True)

print("Filtering complete and saved to Zarr file.")

# Close the client and cluster
client.close()
cluster.close()
print("Client and cluster closed.")

In [None]:
# ssh -N -L 8788:localhost:8788 your-cluster-login

        # "dashboard": {
        #     "session_token_expiration": 3600,
        #     "dashboard_address": ":8788",
        #     "host": "10.100.161.251",
        # }