In [1]:
import numpy as np
from numpy import empty
import warnings
warnings.filterwarnings('ignore')
import xarray as xr
import pftnames
import cftime

79


###### Python file to convert PFT formatted CLM output to gridded output using area maps for CFT. Use landuse map to get the correct crop area (otherwise inactive crops are lumped together with active crop areas). 
###### Adapted from:  https://github.com/NCAR/ctsm_python_gallery/blob/master/notebooks/PFT-Gridding.ipynb

In [7]:
'''--------------------- input by user ---------------------'''
expname         = 'exp13'
BASEDIR         = f'/glade/u/home/sroos/scratch/'
CASE            = f'ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_{expname}'
run             = 'h3'     #'h3', 'h4'
vars            = ['TV','TVDAY','TVMAX','TVMIN','TVNIGHT','GPP','ELAI','GRAINC_TO_FOOD', 'GRAINC_TO_FOOD_ANN', 'HSF', 'HW','CPHASE']
vars_sel        = ['ELAI', 'HSF', 'HW']
varname         = vars[-4]

'''--------------------- assign directories -----------------'''
case            = f'{BASEDIR}/cases_HS_exps/{CASE}/'
model_out       = case
path_out        = case
apply_cropmask  = True
#print(varname)

In [8]:
case

'/glade/u/home/sroos/scratch//cases_HS_exps/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13/'

In [9]:
lucdat = xr.open_dataset(
        '/glade/campaign/cesm/cesmdata/inputdata/lnd/clm2/surfdata_esmf/ctsm5.2.0/landuse.timeseries_1.9x2.5_SSP2-4.5_1850-2100_78pfts_c240216.nc')

In [10]:
'''--------------------- define temporal resolution ---------------------'''
if run == 'h0' or run == 'h2':
    tsteps          = 'month'
    ntimes          = np.arange(1, 13, 1)
    timename        = 'nmonth'
elif run == 'h1' or run == 'h3':
    tsteps          = 'days'
    ntimes          = np.arange(1, 366, 1)
    timename        = 'ndays'
elif run == 'h4':
    tsteps          = 'years'
    ntimes          = np.arange(1,2, 1)
    timename        = 'nyears'
else:
    print('ERROR: no valid CLM input dataset')

In [11]:
year_range = np.arange(1990,2015)

In [12]:
for varname in vars_sel:
    print(varname)
    for y in year_range:
        year         = y
        fname        = f'{BASEDIR}{CASE}/run/{CASE}.clm2.{run}.{year}-01-01-00000.nc'
        nc_outname   = f'{path_out}{CASE}.{run}_cropmask_{varname}_{year}_pft.nc'
        
        # daily values are end of day --> represent day before
        data1           = xr.open_dataset(fname) 
        
        area            = data1.area
        landfrac        = data1.landfrac
        lat             = data1.lat
        lon             = data1.lon
        time            = data1.time
        
        ixy             = data1.pfts1d_ixy
        jxy             = data1.pfts1d_jxy
        coltype         = data1.pfts1d_itype_col
        vegtype         = data1.pfts1d_itype_veg
        cellwtd         = data1.pfts1d_wtgcell
        
        var             = data1[varname]
        
        # get size of dimensions
        nlat            = len(lat.values)
        nlon            = len(lon.values)
        nvegtype        = len(vegtype.values)
        ntim            = len(time.values)
        npft            = (np.max(vegtype))
        pfts            = np.array(pftnames.pftname)
        npft            = npft.astype(int) + 1
        pftlist         = np.arange(0, (npft.values) +1, 1)
        
        # define new empty dataset with correct dimensions
        gridded         = empty([ntim,npft.values,nlat,nlon])
    
        # Fill in empty dataset with CLM output
        gridded[:, vegtype.values.astype(int), jxy.values.astype(int) - 1, ixy.values.astype(int) - 1] = var.values
        grid_dims       = xr.DataArray(gridded[:,:,:], dims=("time","pft","lat","lon"))
        grid_dims       = grid_dims.assign_coords(time=data1.time[:],pft=pfts,lat=lat.values,lon=lon.values)
        grid_dims.name  = var
        grid_dims       = grid_dims.where(data1.landfrac==True)
        years           = np.unique(grid_dims.indexes['time'].year)
        nyears          = len(years)
    
        if apply_cropmask == True:
            # set annual transient cropmap as mask for CFTs
            pctcft          = lucdat.PCT_CFT
            cropmask        = pctcft.where(pctcft > 0.0)
            # not interested in percentages, so we need a binary map
            binary_mask     = xr.where((cropmask.notnull()), 1, 0)
            binary_mask     = binary_mask.rename({'time': 'year', 'cft': 'pft', 'lsmlat': 'lat', 'lsmlon': 'lon'})
            yr_start        = np.where(binary_mask.year == years[0])[0][0]
            yr_end          = np.where(binary_mask.year == years[-1])[0][0]
            binary_mask     = binary_mask[yr_start:yr_end+1,:,:,:]
        
        
            # reshaping the time dimension to year, ndays (to filter data by annual masks)
            reshaped        = grid_dims.values.reshape(nyears, len(ntimes), *grid_dims.values.shape[1:])
            grid_reshape    = xr.DataArray(reshaped, dims=("year", "doy", "pft", "lat", "lon"))
            grid_renamed    = grid_reshape.assign_coords(year=years, doy=ntimes, pft=pftnames.pftname, lat=lat.values,
                                                      lon=lon.values)
        
            # add dimension day to mask to match clm grid
            expanded_mask   = binary_mask.expand_dims(doy=grid_reshape.doy).transpose("year", "doy", "pft", "lat", "lon")
        
            # subset clm dataset to match mask
            sub_clm             = grid_renamed.isel(pft=slice(15, 79))
            binary_mask['pft']  = sub_clm.pft
            binary_mask['year'] = sub_clm.year
            # mask data
            sub_clm = sub_clm.where(binary_mask)
        
        
            # convert back to time,pft,lat,lon dimensions
            sub_clm = sub_clm.stack(time=("year", "doy"))
            sub_clm = sub_clm.assign_coords(time=("time", data1.time.values))
            sub_clm = sub_clm.transpose("time", ...)
            
            
        else:
            sub_clm             = grid_dims
            
        
        #save output
        sub_clm.name = varname
        encoding = {
            varname: {
                "dtype": "float64", 
                "_FillValue": -9999.0
            }
        }
        sub_clm.to_netcdf(nc_outname, encoding=encoding)
    
        pft_ids = np.arange(0,64)#len(pftnames.pftname))  # e.g. 0 to 78
        sub_clm = sub_clm.assign_coords(pft=pft_ids)
        sub_clm.pft.attrs["long_name"] = "Plant Functional Type"
        sub_clm.pft.attrs["description"] = ", ".join(pftnames.pftname)
    
        
        sub_clm.to_netcdf(nc_outname)
        print(nc_outname)

ELAI
/glade/u/home/sroos/scratch//cases_HS_exps/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13.h3_cropmask_ELAI_1990_pft.nc
/glade/u/home/sroos/scratch//cases_HS_exps/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13.h3_cropmask_ELAI_1991_pft.nc
/glade/u/home/sroos/scratch//cases_HS_exps/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13.h3_cropmask_ELAI_1992_pft.nc
/glade/u/home/sroos/scratch//cases_HS_exps/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13.h3_cropmask_ELAI_1993_pft.nc
/glade/u/home/sroos/scratch//cases_HS_exps/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13.h3_cropmask_ELAI_1994_pft.nc
/glade/u/home/sroos/scratch//cases_HS_exps/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13.h3_cropmask_ELAI

In [6]:
#if h4
fname        = f'{BASEDIR}{CASE}/run/{CASE}.clm2.{run}.1980-01-01-00000.nc'
nc_outname   = f'{path_out}{CASE}.{run}_cropmask_GRAINC_TO_FOOD_ANN_pft.nc'

# daily values are end of day --> represent day before
data1           = xr.open_dataset(fname) 
        
area            = data1.area
landfrac        = data1.landfrac
lat             = data1.lat
lon             = data1.lon
time            = data1.time

ixy             = data1.pfts1d_ixy
jxy             = data1.pfts1d_jxy
coltype         = data1.pfts1d_itype_col
vegtype         = data1.pfts1d_itype_veg
cellwtd         = data1.pfts1d_wtgcell

var             = data1[varname]

# get size of dimensions
nlat            = len(lat.values)
nlon            = len(lon.values)
nvegtype        = len(vegtype.values)
ntim            = len(time.values)
npft            = (np.max(vegtype))
pfts            = np.array(pftnames.pftname)
npft            = npft.astype(int) + 1
pftlist         = np.arange(0, (npft.values) +1, 1)

# define new empty dataset with correct dimensions
gridded         = empty([ntim,npft.values,nlat,nlon])

# Fill in empty dataset with CLM output
gridded[:, vegtype.values.astype(int), jxy.values.astype(int) - 1, ixy.values.astype(int) - 1] = var.values
grid_dims       = xr.DataArray(gridded[:,:,:], dims=("time","pft","lat","lon"))
grid_dims       = grid_dims.assign_coords(time=data1.time[:],pft=pfts,lat=lat.values,lon=lon.values)
grid_dims.name  = var
grid_dims       = grid_dims.where(data1.landfrac==True)
years           = np.unique(grid_dims.indexes['time'].year)
nyears          = len(years)
if apply_cropmask == True:
    # set annual transient cropmap as mask for CFTs
    pctcft          = lucdat.PCT_CFT
    cropmask        = pctcft.where(pctcft > 0.0)
    # not interested in percentages, so we need a binary map
    binary_mask     = xr.where((cropmask.notnull()), 1, 0)
    binary_mask     = binary_mask.rename({'time': 'year', 'cft': 'pft', 'lsmlat': 'lat', 'lsmlon': 'lon'})
    yr_start        = np.where(binary_mask.year == years[0])[0][0]
    yr_end          = np.where(binary_mask.year == years[-1])[0][0]
    binary_mask     = binary_mask[yr_start:yr_end+1,:,:,:]


    # reshaping the time dimension to year, ndays (to filter data by annual masks)
    reshaped        = grid_dims.values.reshape(nyears, len(ntimes), *grid_dims.values.shape[1:])
    grid_reshape    = xr.DataArray(reshaped, dims=("year", "doy", "pft", "lat", "lon"))
    grid_renamed    = grid_reshape.assign_coords(year=years, doy=ntimes, pft=pftnames.pftname, lat=lat.values,
                                              lon=lon.values)

    # add dimension day to mask to match clm grid
    expanded_mask   = binary_mask.expand_dims(doy=grid_reshape.doy).transpose("year", "doy", "pft", "lat", "lon")

    # subset clm dataset to match mask
    sub_clm             = grid_renamed.isel(pft=slice(15, 79))
    binary_mask['pft']  = sub_clm.pft
    binary_mask['year'] = sub_clm.year
    # mask data
    sub_clm = sub_clm.where(binary_mask)


    # convert back to time,pft,lat,lon dimensions
    sub_clm = sub_clm.stack(time=("year", "doy"))
    sub_clm = sub_clm.assign_coords(time=("time", data1.time.values))
    sub_clm = sub_clm.transpose("time", ...)
    
    
else:
    sub_clm             = grid_dims
                    
#save output
sub_clm.name = varname
encoding = {
    varname: {
        "dtype": "float64", 
        "_FillValue": -9999.0
    }
}
sub_clm.to_netcdf(nc_outname, encoding=encoding)

pft_ids = np.arange(0,64)#len(pftnames.pftname))  # e.g. 0 to 78
sub_clm = sub_clm.assign_coords(pft=pft_ids)
sub_clm.pft.attrs["long_name"] = "Plant Functional Type"
sub_clm.pft.attrs["description"] = ", ".join(pftnames.pftname)


sub_clm.to_netcdf(nc_outname)
print(nc_outname)

/glade/u/home/sroos/scratch//cases_HS_exps/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13/ihist.e52.IHistClm50BgcCrop.f19_g17.2.HSF_exp13.h4_cropmask_GRAINC_TO_FOOD_ANN_pft.nc
