# The demonstration of the tropical precipitation module on the different datasets

###  Loading the necessary packages

In [1]:
from dask_jobqueue import SLURMCluster # pip 
from dask.distributed import Client, progress 
import dask

import re
import matplotlib as mpl
# Define Agg as Backend for matplotlib when no X server is running
mpl.use('Agg')
import socket
import os
import importlib

import inspect
import timeit
import sys

### Function, which reads the status of the user in the slurm queue 

In [None]:
#!pip install aqua

In [2]:
def squeue_user(username = "$USER"):
    _squeue_user = os.system("squeue --user="+str(username))
    return _squeue_user 

### Storing the path to the current repository into the variable

In [3]:
with os.popen("pwd ") as f:
    _pwd = f.readline()

pwd = re.split(r'[\n]', _pwd)[0]

### Setting the slurm job

In [4]:
extra_args=[
    "--error="+str(pwd)+"/slurm/logs/dask-worker-%j.err",
    "--output="+str(pwd)+"/slurm/output/dask-worker-%j.out"
]

cluster = SLURMCluster(
    name='dask-cluster', 
    cores=16,    
    memory="100 GB", 
    project="bb1153",
    queue= "compute", 
    walltime='04:30:50',
    job_extra=extra_args,
)
client = Client(cluster)
print(cluster.job_script())



#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -p compute
#SBATCH -A bb1153
#SBATCH -n 1
#SBATCH --cpus-per-task=16
#SBATCH --mem=94G
#SBATCH -t 04:30:50
#SBATCH --error=/work/bb1153/b382267/AQUA/diagnostics/tropical_rainfall/notebooks/slurm/logs/dask-worker-%j.err
#SBATCH --output=/work/bb1153/b382267/AQUA/diagnostics/tropical_rainfall/notebooks/slurm/output/dask-worker-%j.out

/home/b/b382267/mambaforge/envs/tropical-rainfall/bin/python -m distributed.cli.dask_worker tcp://136.172.124.7:39317 --nthreads 4 --nworkers 4 --memory-limit 23.28GiB --name dummy-name --nanny --death-timeout 60



Perhaps you already have a cluster running?
Hosting the HTTP server on port 41111 instead


### Loading the slurm job to the queue 

In [5]:
cluster.scale(jobs=1)

### Checking our slurm job in the queue 

In [18]:
squeue_user()

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           4722053   compute dask-wor  b382267  R       1:45      1 l20500


0

### Canceling the job if it is necessary. Set the job ID manually

In [7]:
Job_ID =   4721900 
os.system("scancel " +str(Job_ID)) 

0

### Setting the path to the diagnostic repository 

In [8]:
sys.path.append(str(pwd)+'/../')
import src.shared_func
import src.tr_pr_mod

### Loading the extra function, which can be used in any diagnostic

In [150]:
while True:
    try:
        importlib.reload(src.shared_func)
        from  src.shared_func import time_interpreter,  animation_creator, image_creator,  xarray_attribute_update,  data_size
        break
    except NameError and AttributeError:
        import src.shared_func
        from  src.shared_func import time_interpreter,   animation_creator, image_creator,  xarray_attribute_update,  data_size
        break

### Loading the tropical precipitation module

In [149]:
while True:
    try:
        importlib.reload(src.tr_pr_mod)
        from  src.tr_pr_mod import TR_PR_Diagnostic
        break
    except NameError and AttributeError:
        import src.tr_pr_mod
        from  src.tr_pr_mod import TR_PR_Diagnostic
        break

In [11]:
configdir = '../../../config/'
diagname  = 'tr_pr'
machine   = 'levante'

### Setting the class attributes

In [151]:
diag = TR_PR_Diagnostic()

diag.num_of_bins = 15
diag.first_edge = 0
diag.width_of_bin = 1*10**(-4)/diag.num_of_bins

last_edge = diag.first_edge  + diag.num_of_bins*diag.width_of_bin

### Importing the aqua module 

In [13]:
import aqua

In [14]:
from aqua import Reader
from aqua.reader import catalogue

In [15]:
catalogue(configdir=configdir)

IFS	tco3999-ng5	2.8km experiment, coupled with FESOM
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- interpolated_global	
	- interpolated_np	
	- interpolated_sp	
	- interpolated_sp_ci	
	- lra-r100-day	
	- lra-r100-mon	
IFS	tco2559-ng5	4km experiment, coupled with FESOM
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- interpolated_global	
	- interpolated_np	
	- interpolated_sp	
	- lra-r100-day	
	- lra-r100-mon	
IFS	tco1279-orca025	9km baseline, coupled to NEMO, deep conv ON
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- lra-r100-day	
	- lra-r100-mon	
IFS	test-tco2559	4km experiment, coupled with FESOM
	- ICMGG_atm2d	2d output
	- ICMU_atm3d	3d output

FESOM	tco3999-ng5	2.5km experiment, coupuled with IFS
	- elem_grid	
	- node_grid	
	- np	nearest-neighbor interpolation to lat-lon grid
	- interpolated_global2d	
	- interpolated_global_TS	
	- interpolated_global_UV	
	- interpolated_np	
	- interpolated_sp	
	- original_2d	original 2d output
	- original_3d	original 3d output
FESOM	tco2559-ng

levante:
  args:
    path: ../../../config//levante/catalog.yaml
  description: ''
  driver: intake.catalog.local.YAMLFileCatalog
  metadata: {}


# ERA5 data

In [16]:
reader = Reader(model="ERA5", exp="era5", source="monthly_acc",configdir=configdir)
ERA5_data = reader.retrieve(fix=False)

In [None]:
ERA5_data

### size of entire dataset

In [20]:
data_size(ERA5_data)

279713280

### timestep of dataset is

In [None]:
time_interpreter(ERA5_data)

### Selecting the small part of dataset for tests

In [58]:
ERA5_data_small = ERA5_data['tp'][10:50,:]

### size of selected chunk

In [59]:
data_size(ERA5_data_small)

21683200

### Attribute update

In [None]:
#ERA5_data_small = xarray_attribute_update(ERA5_data_small, ERA5_data)

### Load selected chunk into a memory

In [60]:
ERA5_data_small = ERA5_data_small.compute()

In [61]:
ERA5_data_small

### Units of precepitation rate

In [50]:
ERA5_data_small.attrs['units']

'm'

In [95]:
time_interpreter(ERA5_data_small) 

'M'

In [63]:
ERA5_data_small

### Name of coordinates

In [64]:
diag.coordinate_names(ERA5_data_small)

('latitude', 'longitude')

### Fastest histogram

In [65]:
hist_fast_ERA5  = diag.hist1d_fast(ERA5_data_small,  preprocess = False)
hist_fast_ERA5

### Plot of obtained histogram 

In [66]:
diag.hist_plot(hist_fast_ERA5, pdf = True, smooth = False, label='ERA5', plot_title = "ERA5, precipitation rate for one timestep")

## Plot of mean and median value of precipitation rate

In [152]:
diag.mean_and_median_plot(ERA5_data_small, savelabel = 'ERA5')

#  ERA5 retrieved 

In [68]:
reader = Reader(model="ERA5", exp="era5", source="monthly_acc",configdir=configdir)
ERA5_data = reader.retrieve()

In [71]:
ERA5_data

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 95 graph layers,1 chunks in 95 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 95 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 95 graph layers,1 chunks in 95 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 95 graph layers,1 chunks in 95 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 95 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 95 graph layers,1 chunks in 95 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 95 graph layers,1 chunks in 95 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 95 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 95 graph layers,1 chunks in 95 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
