In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import glob

In [3]:
import pandas as pd

In [26]:
from dask_jobqueue import PBSCluster
from dask.distributed import Client
def get_cluster(workers=30,project = 'P93300641'):

    cluster = PBSCluster(
        cores=1, # The number of cores you want
        memory='10GB', # Amount of memory
        processes=1, # How many processes
        queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
        local_directory='$TMPDIR', # Use your local directory
        resource_spec='select=1:ncpus=1:mem=10GB', # Specify resources
        account=project, # Input your project ID here
        walltime='01:00:00', # Amount of wall time
    )
    
    # Scale up
    cluster.scale(workers)
    
    # Setup your client
    client = Client(cluster)
    return client

In [17]:
def get_files(case,keys=[],id='oaat',tape='h0'):
    if case=='postSASU':
        s='/glade/derecho/scratch/djk2120/postp/ppe/ctsm5.3.0/postsasu/'
    else:
        s='/glade/derecho/scratch/linnia/'
        
    if len(keys)==0:
        if case=='postSASU':
            c='*'
        else:
            if case=='HIST':
                c='ctsm5.3.0_transient_'+id+'*'
            else:
                c='ctsm5.3.0_transient_'+case+'_'+id+'*'
        keys=[d.split('_')[-1].split('.')[0] for d in sorted(glob.glob(s+c))]
    files=[]
    nfiles=0
    goodkeys=[]
    for k in keys:
        if case=='postSASU':
            m='*'
        else:
            m='/run/*.'+tape+'.*'
        if case=='HIST':
            c='ctsm5.3.0_transient_'+k
        else:
            c='ctsm5.3.0_transient_'+case+'_'+k
        d=s+c
        f=sorted(glob.glob(d+m))
        if not nfiles:
            nfiles=len(f)
        if len(f)==nfiles:
            files.append(f)
            goodkeys.append(k)
        else:
            print(case,k,len(f),'files')
    return files,goodkeys
        
        

In [27]:
client=get_cluster()

In [37]:
df=pd.read_csv('mkey.csv')
keys=df.key.values

In [38]:
files,goodkeys=get_files('HIST',keys=keys,tape='h1')

HIST oaat0081 0 files
HIST oaat0087 0 files


In [39]:
def pp(ds):
    return ds[['TLAI']]

In [40]:
ds=xr.open_mfdataset(files,combine='nested',concat_dim=['ens','time'],parallel=True,preprocess=pp)
ds['ens']=goodkeys
tmp=xr.open_dataset(files[0][0])
ds['pft']=tmp.pfts1d_itype_veg

In [56]:
alive=(ds.TLAI.max(dim='time')>0.1).compute()

In [91]:
nixes=[0,9,15,16]
ixpft=np.ones(17).astype(bool)
ixpft[nixes]=0

In [82]:
surv=alive.groupby('pft').sum()
sdef=surv.sel(ens='oaat0102')
spft=(surv/sdef).isel(pft=ixpft)

In [144]:
for sthresh in [0.5,0.6,0.7,0.75,0.8,0.9]:
    ixdead=(spft<sthresh).sum(dim='pft')>0
    params=[]
    head=True
    for ee in ds.ens[ixdead].values:
        ixk=df.key==ee
        param=df.param[ixk].values[0]
        minmax=df.minmax[ixk].values[0]
        x=spft.sel(ens=ee)
        deadpfts=x.pft[x<sthresh].values
        dstr=''
        for p in deadpfts:
            dstr+=str(int(p))+'_'
        with open('skeys/dead_'+str(int(100*sthresh))+'percent.txt','a') as f:
            if head:
                f.write('key,param,minmax,dead_pfts\n')
                head=False
            f.write(ee+','+param+','+minmax+','+dstr[:-1]+'\n')
    