### Importing Libraries

In [1]:
%matplotlib inline
import numpy as np
from numpy import empty
import warnings
warnings.filterwarnings('ignore')
import xarray as xr
import metpy as metpy
import netCDF4
from netCDF4 import Dataset
#from ctsm_py import utils  #when using utils in the CTSM_py directory (work); also need to do pip install -e
#import esmlab  #note: need to make sure to install in current environment first
#note that CMIP6 doesn't have metpy; use conda env:miniconda-analysis

### Defining simulation information

In [2]:
datadir        = "/glade/p/cgd/tss/people/dll/TRENDY2019_History/"
sim            = "S0_control/"
datadir        = datadir + sim
simname        = "TRENDY2019_S0_control_v2.clm2.h1."
var            = "GPP"
years          = "170001-201812"

In [3]:
print(datadir+simname+var+"."+years+".nc")

/glade/p/cgd/tss/people/dll/TRENDY2019_History/S0_control/TRENDY2019_S0_control_v2.clm2.h1.GPP.170001-201812.nc


In [4]:
data1          = xr.open_dataset(datadir+simname+var+"."+years+".nc")
#Use below for testing
#data1          = xr.open_dataset("/glade/scratch/dll/archive/TRENDY2019_S0_constant_v2/lnd/hist/TRENDY2019_S0_constant_v2.clm2.h1.2018-12.nc")

### Reading in Land Use Change dataset for masking the real distribution of each crop type

In [5]:
lucdat    = xr.open_dataset("/glade/p/cesmdata/cseg/inputdata/lnd/clm2/surfdata_map/release-clm5.0.24/landuse.timeseries_0.9x1.25_hist_78pfts_TRENDY_simyr1700-2018_c190814.nc")

In [6]:
# For TRENDY use, not for CTSM_py commit
#note: HTOP ('theightpft') also requested, but info not in old script
vars_in = ['TLAI', 'FCTR', 'GPP', 'NPP', 'TOTVEGC', 'TV']
vars_out = ['lai','transpft','gpppft','npppft','cVegpft','tskinpft']
units_out_list = ['None','W m-2','kg C m-2 s-1','kg C m-2 s-1','K']
long_name_out_list = ['Leaf Area Index','Vegtype level transpiration','Vegtype level GPP','Vegtype level NPP','Vegtype level Carbon in Vegetation','Vegtype level Skin temperature']
unit_convert_list = [1,1,1.e-3,1.e-3,1.e-3,1]

### Reading in variables from data file

In [7]:
years          = np.arange(1700,2018,1)
month          = np.arange(1,12,1)
area           = data1.area
landfrac       = data1.landfrac
lat            = data1.lat
lon            = data1.lon
time           = data1.time

ixy            = data1.pfts1d_ixy
jxy            = data1.pfts1d_jxy
#coltype        = data1.pfts1d_itype_col
vegtype        = data1.pfts1d_itype_veg
cellwtd        = data1.pfts1d_wtgcell

gpp            = data1.GPP
#print(type(gpp))
#print(gpp.dims)
#print (gpp['time'].values)
#print(gpp)

### Reading in variables from LUC file

In [8]:
pctcft         = lucdat.PCT_CFT
pctcrop        = lucdat.PCT_CROP
print(pctcft)
print(pctcrop)

#seems to work properly, but takes awhile. doesn't have any associated metadata
#mask out low weights area
cropwts        = (pctcft/100) * (pctcrop/100)
print(cropwts)

#Below code only provides "True" values
cropwtsmask    = cropwts.where(cropwts>0.005)
print(cropwtsmask)
#How do I make sure this is 

#from NCL script -- only pulling values where >0.5% crop area for a certain crop type, set all other areas to fill
#cropwts0       =  where( cropwts.gt.0.005, cropwts, 0 )        ;masking out area of interest
#nonzeroarea    =  where( cropwts0.ne.0, cropwts0, cropwts0@_FillValue )

<xarray.DataArray 'PCT_CFT' (time: 319, cft: 64, lsmlat: 192, lsmlon: 288)>
[1128923136 values with dtype=float64]
Coordinates:
  * cft      (cft) int32 15 16 17 18 19 20 21 22 23 ... 71 72 73 74 75 76 77 78
  * time     (time) int32 1700 1701 1702 1703 1704 ... 2014 2015 2016 2017 2018
Dimensions without coordinates: lsmlat, lsmlon
Attributes:
    long_name:  percent crop functional type on the crop landunit (% of landu...
    units:      unitless
<xarray.DataArray 'PCT_CROP' (time: 319, lsmlat: 192, lsmlon: 288)>
[17639424 values with dtype=float64]
Coordinates:
  * time     (time) int32 1700 1701 1702 1703 1704 ... 2014 2015 2016 2017 2018
Dimensions without coordinates: lsmlat, lsmlon
Attributes:
    long_name:  total percent crop landunit
    units:      unitless
<xarray.DataArray (time: 319, cft: 64, lsmlat: 192, lsmlon: 288)>
array([[[[0., ..., 0.],
         ...,
         [0., ..., 0.]],

        ...,

        [[0., ..., 0.],
         ...,
         [0., ..., 0.]]],


       ...,

### Processing variables

In [None]:
area.metpy.convert_units('m^2')
landarea       = area*landfrac
landarea.attrs['units']   = 'm^2'

num_years       = len(time)//12
print(num_years)
npftvector      = len(ixy)
#type(cellwtd)

In [None]:
#print (cellwtd.values)
cellwt = cellwtd.values
cellwt_float =cellwt.astype(float)

#print (type(cellwt_float[1]))

nlat = len(lat.values)
nlon = len(lon.values)
#print(nlat)
#print(nlon)
nvegtype = len(vegtype.values)
ntim   = len(time.values)
#print(ntim)
#print (nvegtype)
npft = (np.max(vegtype))
npft = npft.astype(int)
ntim2  = ntim//2

#print (npftvector)

In [None]:
print (npft.values)
print(type(npft))
print(type(ntim))

pftlist = np.arange(0,(npft.values+1),1)
#print(pftlist)

In [None]:
pftname =   ["not_vegetated",
             "needleleaf_evergreen_temperate_tree",
             "needleleaf_evergreen_boreal_tree",
             "needleleaf_deciduous_boreal_tree",
             "broadleaf_evergreen_tropical_tree",
             "broadleaf_evergreen_temperate_tree",
             "broadleaf_deciduous_tropical_tree",
             "broadleaf_deciduous_temperate_tree",
             "broadleaf_deciduous_boreal_tree",
             "broadleaf_evergreen_shrub",
             "broadleaf_deciduous_temperate_shrub",
             "broadleaf_deciduous_boreal_shrub",
             "c3_arctic_grass",
             "c3_non-arctic_grass",
             "c4_grass",
             "unmanaged_c3_crop",
             "unmanaged_c3_irrigated",
             "corn",
             "irrigated_corn",
             "spring_wheat",
             "irrigated_spring_wheat",
             "winter_wheat",
             "irrigated_winter_wheat",
             "soybean",
             "irrigated_soybean",
             "barley",
             "irrigated_barley",
             "winter_barley",
             "irrigated_winter_barley",
             "rye",
             "irrigated_rye",
             "cassava",
             "irrigated_cassava",
             "citrus",
             "irrigated_citrus",
             "cocoa",
             "irrigated_cocoa",
             "coffee",
             "irrigated_coffee",
             "cotton",
             "irrigated_cotton",
             "datepalm",
             "irrigated_datepalm",
             "foddergrass",
             "irrigated_foddergrass",
             "grapes",
             "irrigated_grapes",
             "groundnuts",
             "irrigated_groundnuts",
             "millet",
             "irrigated_millet",
             "oilpalm",
             "irrigated_oilpalm",
             "potatoes",
             "irrigated_potatoes",
             "pulses",
             "irrigated_pulses",
             "rapeseed",
             "irrigated_rapeseed",
             "rice",
             "irrigated_rice",
             "sorghum",
             "irrigated_sorghum",
             "sugarbeet",
             "irrigated_sugarbeet",
             "sugarcane",
             "irrigated_sugarcane",
             "sunflower",
             "irrigated_sunflower",
             "miscanthus",
             "irrigated_miscanthus",
             "switchgrass",
             "irrigated_switchgrass",
             "tropical_corn",
             "irrigated_tropical_corn",
             "tropical_soybean",
             "irrigated_tropical_soybean"]
#print(pftname)

In [None]:
print (vegtype.values[1])
gridded = empty([ntim,npft.values+1,nlat,nlon])

In [None]:
%%time
# Note: This does not mask out over 0 areas. Perhaps it doesn't matter? 

#for i in range(npftvector):
#    if (cellwt_float[i]>0):
gridded[:, vegtype.values.astype(int), jxy.values.astype(int) - 1, ixy.values.astype(int) - 1] = gpp.values


In [None]:
#%%time
#this is time intensive: ~6.5s to regrid 1 month; 2.5min for timeseries
#gridded_test = gridded
#for i in range(npftvector):
#    #gridded = empty([ntim,77,nlat,nlon])
#    #print ('i = ',i)
#    if (cellwt_float[i]>0):
#        col     = ixy.values[i].astype(int) - 1
#        row     = jxy.values[i].astype(int) - 1 
#        vegidx = vegtype.values[i].astype(int)
#        #print (gridded[:,vegidx, row, col])
#        #print (gpp.values[:,i])
#        gridded_test[:,vegidx, row, col] = gpp.values[:,i]
#        #print (i)

### Adding dimensions and coordinates

In [None]:
#print(gridded.shape)
grid_dims = xr.DataArray(gridded, dims=("time","pft","lat","lon"))
grid_dims = grid_dims.assign_coords(time=data1.time,pft=pftlist,lat=lat.values,lon=lon.values)
grid_dims.name = var
#print(grid_dims)

#for comparing to loop
#grid_dims_test = xr.DataArray(gridded_test, dims=("time","pft","lat","lon"))
#grid_dims_test = grid_dims_test.assign_coords(time=data1.time,pft=pftlist,lat=lat.values,lon=lon.values)
#grid_dims_test.name = var

### writing netCDF file

In [None]:
grid_dims.to_netcdf(path="/glade/scratch/dll/griddedExample.nc")
#for comparing to loop
#grid_dims_test.to_netcdf(path="/glade/scratch/dll/gridded_loop_Example.nc")

To do:
- correct time dimension to start in Jan
- Add PFT names to the PFT dimension
- mask data by PFT distribution in the land surface dataset

In [None]:
#doesn't work yet... need to investigate
reshaped = grid_dims.values.reshape(num_years, 12, *gridded.values.shape[1:])
 
values = xr.DataArray(reshaped,
                      dims=('year','month','lat','lon'),
                      coords=(np.arange(1850, 1850+num_years), np.arange(12), data.lat, data.lon))
values.attrs['units'] = data.attrs['units']
sims[sim_name] = values