## Make Diagnostic Plots for NA-CORDEX Zarr Stores

In [None]:
import xarray as xr

#import shutil 
#import os
#from functools import reduce
import pprint
import json
#from operator import mul
#import random

#import numpy as np
#import pandas as pd


### Use Dask to Speed up Computations

In [None]:
import dask
from ncar_jobqueue import NCARCluster

# Processes is processes PER CORE.
# This one works fine.
#cluster = NCARCluster(cores=15, processes=1, memory='100GB', project='STDD0003')
# This one also works, but occasionally hangs near the end.
#cluster = NCARCluster(cores=10, processes=1, memory='50GB', project='STDD0003')

num_jobs=30
cluster = NCARCluster(cores=num_jobs, processes=1, memory='10GB', project='STDD0003')
cluster.scale(jobs=num_jobs)

from distributed import Client
from distributed.utils import format_bytes
client = Client(cluster)
cluster

### Run first over a single Zarr Store.

In [None]:
# For now, make the Zarr output directory a global variable.
dirout = '/glade/scratch/bonnland/na-cordex/zarr'

from pathlib import Path
p = Path(dirout)
#stores = list(p.rglob("*.zarr"))
stores = list(p.rglob("tasmax*.zarr"))
for store in stores:
    print(f'Opening {store}...')
    try:
        ds = xr.open_zarr(store.as_posix(), consolidated=True)
        print('\n')
    except Exception as e:
        print(e)
    break
ds

### Gather Variable Names from the Zarr Store

In [None]:
data_vars = [vname for vname in ds.data_vars]

member_names = ds.coords['member_id'].values

numEnsembleMembers = member_names.size

## Create Time Series Plots from Zarr Store

In [None]:
import matplotlib.pyplot as plt

# Generate plot. 
#
# With 30 workers, expect 1 minute walltime for computation and 1-2 minutes for plot rendering.
#
numPlotRows = numEnsembleMembers
numPlotCols = 4

figWidth = 200 #100 
figHeight = 80

linestyle = 'k.'
linewidth = 0.5
markersize = 0.5

for var in data_vars:

    fig, axs = plt.subplots(numPlotRows, numPlotCols, figsize=(figWidth,figHeight), sharey='col')

    for index in range(numEnsembleMembers):
        mem_id = member_names[index]
        data_slice = ds[var].sel(member_id=mem_id)

        data_agg = data_slice.min(dim=['lat', 'lon'])
        axs[index, 0].plot(ds.time, data_agg, linewidth=linewidth)
        axs[index, 0].plot(ds.time, data_agg, linestyle, markersize=markersize)
        axs[index, 0].set_ylabel(mem_id, fontsize=15)

        data_agg = data_slice.max(dim=['lat', 'lon'])
        axs[index, 1].plot(ds.time, data_agg, linewidth=linewidth)
        axs[index, 1].plot(ds.time, data_agg, linestyle, markersize=markersize)
        axs[index, 1].set_ylabel(mem_id, fontsize=15)

        data_agg = data_slice.mean(dim=['lat', 'lon'])
        axs[index, 2].plot(ds.time, data_agg, linewidth=linewidth)
        axs[index, 2].plot(ds.time, data_agg, linestyle, markersize=markersize)
        axs[index, 2].set_ylabel(mem_id, fontsize=15)

        data_agg = data_slice.std(dim=['lat', 'lon'])
        axs[index, 3].plot(ds.time, data_agg, linewidth=linewidth)
        axs[index, 3].plot(ds.time, data_agg, linestyle, markersize=markersize)
        axs[index, 3].set_ylabel(mem_id, fontsize=15)
       
    
    axs[0, 0].set_title(f'min({var})', fontsize=40)
    axs[0, 1].set_title(f'max({var})', fontsize=40)
    axs[0, 2].set_title(f'mean({var})', fontsize=40)
    axs[0, 3].set_title(f'std({var})', fontsize=40)
    

    plt.suptitle(store, fontsize=50)
    plt.tight_layout(pad=20.2, w_pad=5.5, h_pad=5.5)
    plt.savefig(f'{var}_ts.pdf')



### Create Spatial Plots from Zarr Store

In [None]:
import matplotlib.pyplot as plt

# Generate plot. 
#
# With 30 workers, expect 1 minute walltime for computation and 1-2 minutes for plot rendering.
#

numPlotRows = numEnsembleMembers
numPlotCols = 4

figWidth = 17 
figHeight = 35


for var in data_vars:

    fig, axs = plt.subplots(numPlotRows, numPlotCols, figsize=(figWidth,figHeight), constrained_layout=True)

    for index in range(numEnsembleMembers):
        mem_id = member_names[index]
        data_slice = ds[var].sel(member_id=mem_id)

        data_agg = data_slice.min(dim='time')
        pcm0 = axs[index, 0].imshow(data_agg, origin='lower')
        axs[index, 0].set_ylabel(mem_id, fontsize=8)

        data_agg = data_slice.max(dim='time')
        pcm1 = axs[index, 1].imshow(data_agg, origin='lower')
        axs[index, 1].set_ylabel(mem_id, fontsize=8)

        data_agg = data_slice.mean(dim='time')
        pcm2 = axs[index, 2].imshow(data_agg, origin='lower')
        axs[index, 2].set_ylabel(mem_id, fontsize=8)

        data_agg = data_slice.std(dim='time')
        pcm3 = axs[index, 3].imshow(data_agg, origin='lower')
        axs[index, 3].set_ylabel(mem_id, fontsize=8)
       
    
    axs[0, 0].set_title(f'min({var})', fontsize=15)
    axs[0, 1].set_title(f'max({var})', fontsize=15)
    axs[0, 2].set_title(f'mean({var})', fontsize=15)
    axs[0, 3].set_title(f'std({var})', fontsize=15)
    
    plt.colorbar(pcm0, ax = axs[:, 0], location='bottom', shrink=0.7)
    plt.colorbar(pcm1, ax = axs[:, 1], location='bottom', shrink=0.7)
    plt.colorbar(pcm2, ax = axs[:, 2], location='bottom', shrink=0.7)
    plt.colorbar(pcm3, ax = axs[:, 3], location='bottom', shrink=0.7)

    plt.suptitle(store, fontsize=20)
    plt.savefig(f'{var}_maps.pdf')


### Release the workers.

In [None]:
cluster.close()