## Make Diagnostic Plots for NA-CORDEX Zarr Stores

In [None]:
import xarray as xr

import shutil 
import os
from functools import reduce
import pprint
import json
from operator import mul
import random

import numpy as np
import pandas as pd


### Use Dask to Speed up Computations

In [None]:
import dask
from ncar_jobqueue import NCARCluster

# Processes is processes PER CORE.
# This one works fine.
#cluster = NCARCluster(cores=15, processes=1, memory='100GB', project='STDD0003')
# This one also works, but occasionally hangs near the end.
#cluster = NCARCluster(cores=10, processes=1, memory='50GB', project='STDD0003')

cluster = NCARCluster(cores=20, processes=1, memory='20GB', project='STDD0003')
cluster.scale(jobs=20)

from distributed import Client
from distributed.utils import format_bytes
client = Client(cluster)
cluster

### Run first over a single Zarr Store.

In [None]:
# For now, make the Zarr output directory a global variable.
dirout = '/glade/scratch/bonnland/na-cordex/zarr'

from pathlib import Path
p = Path(dirout)
#stores = list(p.rglob("*.zarr"))
stores = list(p.rglob("tasmax*.zarr"))
for store in stores:
    print(f'Opening {store}...')
    try:
        ds = xr.open_zarr(store.as_posix(), consolidated=True)
        print('\n')
    except Exception as e:
        print(e)
    break
ds

In [None]:
print(ds['member_id'].shape[0])

In [None]:
numEnsembleMembers = ds['member_id'].shape[0]

data_vars = [vname for vname in ds.data_vars]

member_names = ds.coords['member_id'].values
member_names

In [None]:
ds

In [None]:
import matplotlib.pyplot as plt

# Generate plot (may take a while as many individual maps are generated)
numPlotRows = numEnsembleMembers
numPlotCols = 4

sizeFactor = 3 #2 #4
figWidth = 20 * sizeFactor
figHeight = 30 * sizeFactor

linestyle = 'k.'
linewidth = 1 #0.5
markersize = 1

for var in data_vars:

    fig, axs = plt.subplots(numPlotRows, numPlotCols, figsize=(figWidth,figHeight))

    for index in range(numEnsembleMembers):
        mem_id = member_names[index]
        data_slice = ds[var].sel(member_id=mem_id)

        data_agg = data_slice.min(dim=['lat', 'lon'])
        axs[index, 0].plot(ds.time, data_agg, linewidth=linewidth)
        #axs[index, 0].plot(ds.time, data_agg, linestyle, markersize=markersize)
        axs[index, 0].set_title(f'min({var}), {mem_id}')

        data_agg = data_slice.max(dim=['lat', 'lon'])
        axs[index, 1].plot(ds.time, data_agg, linewidth=linewidth)
        #axs[index, 1].plot(ds.time, data_agg, linestyle, markersize=markersize)
        axs[index, 1].set_title(f'max({var}), {mem_id}')

        data_agg = data_slice.mean(dim=['lat', 'lon'])
        axs[index, 2].plot(ds.time, data_agg, linewidth=linewidth)
        #axs[index, 2].plot(ds.time, data_agg, linestyle, markersize=markersize)
        axs[index, 2].set_title(f'mean({var}), {mem_id}')

        data_agg = data_slice.std(dim=['lat', 'lon'])
        axs[index, 3].plot(ds.time, data_agg, linewidth=linewidth)
        #axs[index, 3].plot(ds.time, data_agg, linestyle, markersize=markersize)
        axs[index, 3].set_title(f'std({var}), {mem_id}')
       
    plt.suptitle(store, fontsize=50)
    plt.savefig(f'{var}_ts.pdf')



In [68]:
cluster.close()