In [1]:
import glob
import re
import matplotlib as plt
import numpy as np
import scipy as sp
import xarray as xr
import intake
import intake_esm
import pandas as pd

In [4]:
from dask_jobqueue import PBSCluster
from distributed import Client

In [10]:
######## File paths ################
lustre_scratch  = "/lustre/desc1/scratch/harshah"
gdex_data       = "/gdex/data/"
# gdex_url      = 'https://data.gdex.ucar.edu/'
eol_data        = "/gdex/data/special_projects/pythia_2025/eol-cookbook/m2hats_iss2_data/prof449Mhz_30min_winds/"
#########
era5_catalog      =  gdex_data + 'special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json'
print(era5_catalog)

/gdex/data/special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json


In [7]:
cluster = PBSCluster(
        job_name = 'dask-eol-25',
        cores = 1,
        memory = '4GiB',
        processes = 1,
        local_directory = lustre_scratch + '/dask/spill',
        log_directory = lustre_scratch + '/dask/logs/',
        resource_spec = 'select=1:ncpus=1:mem=4GB',
        queue = 'casper',
        walltime = '3:00:00',
        #interface = 'ib0'
        interface = 'ext')

In [8]:
client = Client(cluster)

In [9]:
# Scale the cluster and display cluster dashboard URL
n_workers =5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Workers: 5
Total threads: 5,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.96:45699,Workers: 5
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Total threads: 5
Started: Just now,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.177:40997,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/38523/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.177:34873,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-0km9n3c0,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-0km9n3c0
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 127.75 MiB,Spilled bytes: 0 B
Read bytes: 757.18 MiB,Write bytes: 673.36 kiB

0,1
Comm: tcp://128.117.208.177:37475,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/39119/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.177:35427,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-e9msd8ot,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-e9msd8ot
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 129.27 MiB,Spilled bytes: 0 B
Read bytes: 751.29 MiB,Write bytes: 671.16 kiB

0,1
Comm: tcp://128.117.208.174:37959,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/35219/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.174:46061,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-8327tg3e,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-8327tg3e
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 128.22 MiB,Spilled bytes: 0 B
Read bytes: 644.34 MiB,Write bytes: 514.94 kiB

0,1
Comm: tcp://128.117.208.177:45159,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/45171/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.177:45339,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-n9otjjle,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-n9otjjle
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 127.90 MiB,Spilled bytes: 0 B
Read bytes: 784.85 MiB,Write bytes: 681.06 kiB

0,1
Comm: tcp://128.117.208.175:44873,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/33453/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.175:37879,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-s5e1abet,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-s5e1abet
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.50 MiB,Spilled bytes: 0 B
Read bytes: 18.75 MiB,Write bytes: 4.37 MiB


### Load data

In [25]:
%%time
prof449_wind = xr.open_mfdataset(eol_data + '*.nc',concat_dim = 'time',combine='nested')

ValueError: cannot reindex or align along dimension 'height' because of conflicting dimension sizes: {97, 61, 46, 55}

In [29]:
prof449Mhz_wind_test = xr.open_dataset(eol_data + 'prof449.20230927.winds.30.nc')
prof449Mhz_wind_test

### Load ERA5 data

In [18]:
%%time
era5_cat = intake.open_esm_datastore(era5_catalog)
era5_cat

  df = pd.read_csv(


CPU times: user 1.86 s, sys: 235 ms, total: 2.1 s
Wall time: 2.22 s


Unnamed: 0,unique
era_id,1
datatype,2
level_type,1
step_type,7
table_code,4
param_code,164
variable,212
long_name,212
units,33
year,85


In [23]:
era5_cat.df['variable'].unique()

array(['PV', 'CRWC', 'CSWC', 'Z', 'T', 'U', 'V', 'Q', 'W', 'VO', 'D', 'R',
       'O3', 'CLWC', 'CIWC', 'CC', 'ALUVP', 'ALUVD', 'ALNIP', 'ALNID',
       'CI', 'ASN', 'RSN', 'SSTK', 'ISTL1', 'ISTL2', 'ISTL3', 'ISTL4',
       'SWVL1', 'SWVL2', 'SWVL3', 'SWVL4', 'CAPE', 'LAILV', 'LAIHV',
       'TCLW', 'TCIW', 'SP', 'TCW', 'TCWV', 'STL1', 'SD', 'CHNK', 'MSL',
       'BLH', 'TCC', 'VAR_10U', 'VAR_10V', 'VAR_2T', 'VAR_2D', 'STL2',
       'STL3', 'LCC', 'MCC', 'HCC', 'SRC', 'TCO3', 'IEWS', 'INSS', 'ISHF',
       'IE', 'SKT', 'STL4', 'TSN', 'FAL', 'FSR', 'FLSR', 'LBLT', 'LTLT',
       'LSHF', 'LICT', 'LICD', 'TCRW', 'TCSW', 'U10N', 'V10N', 'VAR_100U',
       'VAR_100V', 'LMLT', 'LMLD', 'VIMA', 'VIT', 'VIKE', 'VITHE',
       'VIPIE', 'VIPILE', 'VITOE', 'VIEC', 'VIMAE', 'VIMAN', 'VIKEE',
       'VIKEN', 'VITHEE', 'VITHEN', 'VIWVE', 'VIWVN', 'VIGE', 'VIGN',
       'VITOEE', 'VITOEN', 'VIOZE', 'VIOZN', 'VILWD', 'VIIWD', 'VIMAD',
       'VIKED', 'VITHED', 'VIWVD', 'VIGD', 'VITOED', 'VIOZD', 'VILWE

In [None]:
temp_cat = era5_cat.search(variable='VAR_2T',frequency = 'hourly',year=2000)
temp_cat