In [25]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import glob
import pandas as pd

In [26]:
# Setup your PBSCluster
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
ncores=1
nmem='10GB'
cluster = PBSCluster(
    cores=ncores, # The number of cores you want
    memory=nmem, # Amount of memory
    processes=1, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='$TMPDIR', # Use your local directory
    resource_spec='select=1:ncpus='+str(ncores)+':mem='+nmem, # Specify resources
    project='P93300641', # Input your project ID here
    walltime='01:00:00', # Amount of wall time
    #interface='ib0', # Interface to use
)

# Scale up
cluster.scale(10)

# Setup your client
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 43779 instead
  f"Port {expected} is already in use.\n"


In [27]:
def get_files(case,keys=[],id='oaat',tape='h0'):

    s='/glade/derecho/scratch/linnia/'
        
    if len(keys)==0:
        if case=='HIST':
            c='BNF_v2.n01_ctsm5.3.012_transient_'+id+'*'
        else:
            c='BNF_v2.n01_ctsm5.3.012_transient_'+case+'_'+id+'*'
        keys=[d.split('_')[-1].split('.')[0] for d in sorted(glob.glob(s+c))]
    files=[]
    nfiles=0
    goodkeys=[]
    for k in keys:
        m='/run/*.'+tape+'.*'
        if case=='HIST':
            c='BNF_v2.n01_ctsm5.3.012_transient_'+k
        else:
            c='BNF_v2.n01_ctsm5.3.012_transient_'+case+'_'+k
        d=s+c
        f=sorted(glob.glob(d+m))
        if not nfiles:
            nfiles=len(f)
        if len(f)==nfiles:
            files.append(f)
            goodkeys.append(k)
        else:
            print(case,k,len(f),'files')
    return files,goodkeys
        

In [34]:
df=pd.read_csv('ctsm6oaat_rangetests_key.csv')
keys=df.key.values

In [36]:
files,goodkeys=get_files('HIST',keys=keys,tape='h1')

In [37]:
def pp(ds):
    return ds[['TLAI']]

In [38]:
ds=xr.open_mfdataset(files,combine='nested',concat_dim=['ens','time'],parallel=True,preprocess=pp)
ds['ens']=goodkeys
tmp=xr.open_dataset(files[0][0])
ds['pft']=tmp.pfts1d_itype_veg

In [39]:
alive=(ds.TLAI.max(dim='time')>0.1).compute() #slow

In [40]:
nixes=[0,9,15,16] #less important pfts
ixpft=np.ones(17).astype(bool)
ixpft[nixes]=0
surv=alive.groupby('pft').sum()
sdef=surv.sel(ens='oaat0000')
spft=(surv/sdef).isel(pft=ixpft)

In [43]:
for sthresh in [0.5,0.6,0.7,0.75,0.8,0.9]:
    ixdead=(spft<sthresh).sum(dim='pft')>0
    params=[]
    head=True
    for ee in ds.ens[ixdead].values:
        ixk=df.key==ee
        param=df.param[ixk].values[0]
        x=spft.sel(ens=ee)
        deadpfts=x.pft[x<sthresh].values
        dstr=''
        for p in deadpfts:
            dstr+=str(int(p))+'_'
        with open('skeys/oaat_dead_rangetests'+str(int(100*sthresh))+'percent.txt','a') as f:
            if head:
                f.write('key,param,dead_pfts\n')
                head=False
            f.write(ee+','+param+','+dstr[:-1]+'\n')
    

In [44]:
nixes=[0,9,15,16] #less important pfts
ixpft=np.ones(17).astype(bool)
ixpft[nixes]=0
surv=alive.groupby('pft').sum()
sdef=surv.sel(ens='oaat0000')
spft=(surv/sdef).isel(pft=ixpft)