# The calculation time of histograms with the use of different packages 

###  Loading the necessary packages

In [1]:
from dask_jobqueue import SLURMCluster # pip 
from dask.distributed import Client, progress 
import dask

import re
import matplotlib as mpl
# Define Agg as Backend for matplotlib when no X server is running
mpl.use('Agg')
import socket
import os
import importlib 

import inspect
import timeit

### Function, which reads the status of the user in the slurm queue 

In [None]:
def squeue_user(username = "$USER"):
    _squeue_user = os.system("squeue --user="+str(username))
    return _squeue_user 

### Storing the path to the current repository into the variable

In [2]:
with os.popen("pwd ") as f:
    _pwd = f.readline()
pwd = re.split(r'[\n]', _pwd)[0]

### Setting the slurm job

In [139]:

extra_args=[
    "--error="+str(pwd)+"/slurm/logs/dask-worker-%j.err",
    "--output="+str(pwd)+"/slurm/output/dask-worker-%j.out"
]
cluster = SLURMCluster(
    name='dask-cluster', 
    cores=256,    
    memory="500 GB", 
    project="bb1153",
    queue= "compute", 
    walltime='04:30:50',
)
client = Client(cluster)
print(cluster.job_script())

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -p compute
#SBATCH -A bb1153
#SBATCH -n 1
#SBATCH --cpus-per-task=256
#SBATCH --mem=466G
#SBATCH -t 04:30:50

/home/b/b382267/mambaforge/envs/tropical-rainfall/bin/python -m distributed.cli.dask_worker tcp://136.172.124.7:33571 --nthreads 16 --nworkers 16 --memory-limit 29.10GiB --name dummy-name --nanny --death-timeout 60



Perhaps you already have a cluster running?
Hosting the HTTP server on port 34667 instead


### Loading the slurm job to the queue 

In [140]:
cluster.scale(jobs=1)

### Checking out slurm job in the queue 

In [144]:
squeue_user()

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           4509090   compute dask-wor  b382267  R    2:00:10      1 l40250


0

### Canceling the job if it is necessary. Set the job ID manually

In [143]:
Job_ID =  4510698
os.system("scancel " +str(Job_ID)) 

0

### Loading the extra function, which can be used in any diagnostic

In [21]:
while True:
    try:
        importlib.reload(src.shared_func)
        from  src.shared_func import time_interpreter,  animation
        break
    except NameError:
        import src.shared_func
        from  src.shared_func import time_interpreter,  animation
        break

### Setting the path to the diagnostic repository 

In [16]:
import sys
sys.path.append('..')

### Loading the tropical precipitation module

In [64]:
while True:
    try:
        importlib.reload(src.tr_pr_mod)
        from  src.tr_pr_mod import TR_PR_Diagnostic
        break
    except NameError:
        import src.tr_pr_mod
        from  src.tr_pr_mod import TR_PR_Diagnostic
        break

### Setting the class attributes

In [65]:
diag = TR_PR_Diagnostic()

diag.num_of_bins = 15
diag.first_edge = 0
diag.width_of_bin = 2000*10**(-6)/diag.num_of_bins

last_edge = diag.first_edge  + diag.num_of_bins*diag.width_of_bin

### Importing the aqua module 

In [134]:
import aqua

In [50]:
from aqua import Reader
from aqua.reader import catalogue

In [41]:
configdir = '../../../config/'
diagname  = 'tr_pr'
machine   = 'levante'

In [None]:
catalogue()

# Reading the data to test the timing of different packages

### Firstly, reading the data without regriding

In [53]:
reader = Reader(model="ICON", exp="ngc2009", configdir=configdir, source="atm_2d_ml_R02B09")
ICON_2009 = reader.retrieve()

In [54]:
ICON_2009

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.82 TiB 24.00 MiB Shape (37009, 20971520) (6, 1048576) Dask graph 123380 chunks in 2 graph layers Data type float32 numpy.ndarray",20971520  37009,

Unnamed: 0,Array,Chunk
Bytes,2.82 TiB,24.00 MiB
Shape,"(37009, 20971520)","(6, 1048576)"
Dask graph,123380 chunks in 2 graph layers,123380 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [59]:
#import xarray as xr 
#import matplotlib.pyplot as plt
#import numpy as np 

In [56]:
ICON_2009_small = ICON_2009['pr'][0:3,:]
ICON_2009_small = ICON_2009_small.compute()

### Checking the timestep of loaded dataset

In [58]:
time_interpreter(ICON_2009)

time_interpreter(ICON_2009_small) 

'30m'

### Numpy.package (Numpy.digitize + Numpy.count_nonzero)

In [66]:
%timeit -r 2 -n 1 -o diag.hist_np_digitize(ICON_2009_small,  preprocess = False)

6.29 s ± 1.01 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 6.29 s ± 1.01 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [67]:
result = _
#vars(result)
time_np_digitize, time_np_digitize_stdev =  result.average, result.stdev 

In [68]:
hist_digitize = diag.hist_np_digitize(ICON_2009_small,  preprocess = False)

### fast_histogram package (fast_histogram.histogram1d)

In [69]:
%timeit -r 2 -n 1 -o diag.hist1d_fast(ICON_2009_small,  preprocess = False)

235 ms ± 2.25 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 235 ms ± 2.25 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [70]:
result = _
time_fast, time_fast_stdev =  result.average, result.stdev 

In [71]:
hist_fast  = diag.hist1d_fast(ICON_2009_small,  preprocess = False)

### Numpy package (Numpy.histogram)

In [72]:
%timeit -r 2 -n 1 -o diag.hist1d_np(ICON_2009_small,  preprocess = False)

508 ms ± 111 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 508 ms ± 111 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [73]:
result = _
time_np, time_np_stdev =  result.average, result.stdev 

In [74]:
hist_np = diag.hist1d_np(ICON_2009_small,  preprocess = False)

In [75]:
ICON_2009_small_prep = ICON_2009_small.stack(total=['time', 'cell'])

###  matplotlib.pyplot package (matplotlib.pyplot.hist)

In [76]:
%timeit -r 2 -n 1 -o diag.hist1d_pyplot(ICON_2009_small_prep,  preprocess = False)

1.73 s ± 90.3 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 1.73 s ± 90.3 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [77]:
result = _
time_pyplt, time_pyplt_stdev =  result.average, result.stdev 

In [78]:
hist_pyplt = diag.hist1d_pyplot(ICON_2009_small_prep,  preprocess = False)

## Dask packages

In [79]:
import dask.array as da

In [80]:
ICON_2009_small_prep_dask = da.from_array(ICON_2009_small_prep)

### dask_histogram.boost package (dask_histogram.boost.Histogram)

In [81]:
%timeit -r 2 -n 1 -o diag.dask_boost(ICON_2009_small_prep_dask,  preprocess = False)

dask.array<to-dask-array, shape=(15,), dtype=float64, chunksize=(15,), chunktype=numpy.ndarray> [dask.array<array, shape=(16,), dtype=float64, chunksize=(16,), chunktype=numpy.ndarray>]
dask.array<to-dask-array, shape=(15,), dtype=float64, chunksize=(15,), chunktype=numpy.ndarray> [dask.array<array, shape=(16,), dtype=float64, chunksize=(16,), chunktype=numpy.ndarray>]
9.29 s ± 4.27 s per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 9.29 s ± 4.27 s per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [82]:
result = _
time_boost, time_boost_stdev =  result.average, result.stdev 

In [83]:
hist_boost = diag.dask_boost(ICON_2009_small_prep_dask,  preprocess = False)

dask.array<to-dask-array, shape=(15,), dtype=float64, chunksize=(15,), chunktype=numpy.ndarray> [dask.array<array, shape=(16,), dtype=float64, chunksize=(16,), chunktype=numpy.ndarray>]


### dask_histogram package  (dask_histogram.factory)

In [84]:
%timeit -r 2 -n 1 -o diag.dask_factory(ICON_2009_small_prep_dask,  preprocess = False)

1.49 s ± 314 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 1.49 s ± 314 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [85]:
result = _
time_factory, time_factory_stdev =  result.average, result.stdev 

In [86]:
hist_factory = diag.dask_factory(ICON_2009_small_prep_dask,  preprocess = False)

In [None]:
# Check the scaling 

# Loading the regrided data

###  lat + lon coordinates

In [88]:
reader = Reader(model="ICON", exp="ngc2009",  configdir=configdir, source="atm_2d_ml_R02B09", regrid="r200")

ICON_2009_reg = reader.retrieve()

In [89]:
ICON_2009_reg_small = ICON_2009_reg['pr'][0:3,:]
ICON_2009_reg_small = ICON_2009_reg_small.compute()

In [90]:
time_interpreter(ICON_2009_reg_small) 

'30m'

### Numpy.package (Numpy.digitize + Numpy.count_nonzero)

In [91]:
%timeit -r 2 -n 1 -o diag.hist_np_digitize(ICON_2009_reg_small,  preprocess = False)

6.32 s ± 10.6 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 6.32 s ± 10.6 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [92]:
result = _
time_digitize_reg, time_digitize_reg_stdev =  result.average, result.stdev 

In [93]:
hist_digitize_reg = diag.hist_np_digitize(ICON_2009_reg_small,  preprocess = False)

### fast_histogram package (fast_histogram.histogram1d)

In [94]:
%timeit -r 2 -n 1 -o  diag.hist1d_fast(ICON_2009_reg_small,  preprocess = False)

247 ms ± 5.39 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 247 ms ± 5.39 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [95]:
result = _
time_fast_reg, time_fast_reg_stdev =  result.average, result.stdev 

In [96]:
hist_fast_reg = diag.hist1d_fast(ICON_2009_reg_small,  preprocess = False)

### Numpy package (Numpy.histogram)

In [97]:
%timeit -r 2 -n 1 -o  diag.hist1d_np(ICON_2009_reg_small,  preprocess = False)

436 ms ± 22.3 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 436 ms ± 22.3 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [98]:
result = _
time_np_reg, time_np_reg_stdev =  result.average, result.stdev 

In [99]:
hist_np_reg = diag.hist1d_np(ICON_2009_reg_small,  preprocess = False)

In [100]:
ICON_2009_reg_small_prep = ICON_2009_reg_small.stack(total=['time', 'cell'])

###  matplotlib.pyplot package (matplotlib.pyplot.hist)

In [101]:
%timeit -r 2 -n 1 -o  diag.hist1d_pyplot(ICON_2009_reg_small_prep,  preprocess = False)

1.67 s ± 5.75 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 1.67 s ± 5.75 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [102]:
result = _
time_pyplt_reg, time_pyplt_reg_stdev =  result.average, result.stdev 

In [103]:
hist_pyplt_reg = diag.hist1d_pyplot(ICON_2009_reg_small_prep,  preprocess = False)

In [104]:
ICON_2009_reg_small_prep_dask = da.from_array(ICON_2009_reg_small_prep)

### dask_histogram package  (dask_histogram.factory)

In [105]:
%timeit -r 2 -n 1 -o  diag.dask_boost(ICON_2009_reg_small_prep_dask,  preprocess = False)

dask.array<to-dask-array, shape=(15,), dtype=float64, chunksize=(15,), chunktype=numpy.ndarray> [dask.array<array, shape=(16,), dtype=float64, chunksize=(16,), chunktype=numpy.ndarray>]
dask.array<to-dask-array, shape=(15,), dtype=float64, chunksize=(15,), chunktype=numpy.ndarray> [dask.array<array, shape=(16,), dtype=float64, chunksize=(16,), chunktype=numpy.ndarray>]
4.33 s ± 190 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 4.33 s ± 190 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [106]:
result = _
time_boost_reg, time_boost_reg_stdev =  result.average, result.stdev 

In [107]:
hist_boost_reg = diag.dask_boost(ICON_2009_reg_small_prep_dask,  preprocess = False)

dask.array<to-dask-array, shape=(15,), dtype=float64, chunksize=(15,), chunktype=numpy.ndarray> [dask.array<array, shape=(16,), dtype=float64, chunksize=(16,), chunktype=numpy.ndarray>]


### dask_histogram.boost package (dask_histogram.boost.Histogram)

In [108]:
%timeit -r 2 -n 1 -o  diag.dask_factory(ICON_2009_reg_small_prep_dask,  preprocess = False)

1.35 s ± 123 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


<TimeitResult : 1.35 s ± 123 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)>

In [109]:
result = _
time_factory_reg, time_factory_reg_stdev =  result.average, result.stdev 

In [110]:
hist_factory_reg = diag.dask_factory(ICON_2009_reg_small_prep_dask,  preprocess = False)

In [113]:
time_set1 = [ time_boost_reg, time_factory_reg, time_fast_reg,  time_np_reg, time_pyplt_reg,  time_digitize_reg]

time_set2 = [ time_boost, time_factory, time_fast,  time_np, time_pyplt,  time_np_digitize ]

fig = plt.figure(figsize=(12,6))

barWidth = 0.2
br1 = np.arange(len(time_set1))
br2 = [x + barWidth for x in br1]

plt.bar(br1, time_set1, color ='tab:red',  width = barWidth,
        edgecolor ='grey', label = "ICON, regrided") #'aqua, 3.10.9')

plt.bar(br2, time_set2, color ='tab:green',  width = barWidth,
        edgecolor ='grey', label = "ICON") #'aqua, 3.10.9')
#plt.bar(br1, time_set1_3_9_16, color ='red', alpha =0.6, width = barWidth, bottom = time_set1_3_8_16,
#        edgecolor ='grey', label ='CMIP, 3.9.16')
 
# Adding Xticks
plt.xlabel('Methods', fontweight ='bold', fontsize = 15)
plt.ylabel('Calculation time per element', fontweight ='bold', fontsize = 15)
plt.xticks([r + barWidth for r in range(len(time_set1))],
        ['boost', 'factory', 'fast', 'np', 'pyplt', 'digitize'], fontsize=14)  #, 'left'
plt.xticks(fontsize=14)         

plt.title('Python=3.10.10', fontsize=15)

plt.legend(fontsize=14)


plt.savefig("./figures/Calculation_time_rep2.png",
            bbox_inches ="tight",
            pad_inches = 1,
            transparent = True,
            facecolor ="w",
            edgecolor ='w',
            orientation ='landscape')


In [131]:
hist_digitize_reg



(<xarray.DataArray (bin: 15)>
 array([62657269,   119221,    43543,    21621,    13658,     9689,
            7086,     5303,     4263,     3534,     2761,     2251,
            2086,     1822,     1621])
 Coordinates:
   * bin      (bin) float64 0.0 0.0001333 0.0002667 ... 0.0016 0.001733 0.001867,
 62914560,
 6.358278512954712)

In [132]:
hist_fast

In [116]:
""" """
def check_equality():
    result = _
    if np.count_nonzero(result == True) == result.size:
        return 0
    else:
        return 1

In [122]:
hist_fast_reg == hist_fast


In [123]:
check_equality()

0

In [124]:
hist_np_reg == hist_np

In [125]:
check_equality()

0

In [130]:

fig = plt.figure()
ax = fig.add_subplot(111)

_pdf = True

diag.hist_plot(hist_fast_reg,           pdf=_pdf,   color = 'tab:red',     label='fast')
diag.hist_plot(hist_np_reg,             pdf=_pdf,   color = 'tab:green',   label='np')
diag.hist_plot(hist_pyplt_reg,          pdf=_pdf,   color = 'tab:blue',    label='pyplt')
#diag.hist_plot(hist_digitize_reg,       pdf=_pdf,   color = 'tab:blue',    label='pyplt')

diag.hist_plot(hist_fast,           pdf=_pdf,   color = 'tab:red',     label='fast')
diag.hist_plot(hist_np,             pdf=_pdf,   color = 'tab:green',   label='np')
diag.hist_plot(hist_pyplt,          pdf=_pdf,   color = 'tab:blue',    label='pyplt')
#diag.hist_plot(hist_digitize,       pdf=_pdf,   color = 'tab:blue',    label='pyplt')



ax.grid()

plt.savefig("./figures/pdf_hists_reg.png",
            bbox_inches ="tight",
            pad_inches = 1,
            transparent = True,
            facecolor ="w",
            edgecolor ='w',
            orientation ='landscape')

hist_plot function in the process 

hist_plot function in the process 

hist_plot function in the process 

hist_plot function in the process 

hist_plot function in the process 

hist_plot function in the process 

