In [1]:
import xarray as xr
import numpy as np
from glob import glob
import gc

### STEP 1: generate masks for ESA CCI to filter on 100% agricultural land gridcells, then coarsen to ~1km spatial resolution

In [2]:
#load in all ESA CCI landcover files
files = sorted(glob('/dodrio/scratch/users/vsc10951/sderoos/data/external/landcover_esa_cci_300m/*.nc'))

#settings
lat_factor = 3
lon_factor = 3
agr_cls    = [10,11,20] #agricultural classes CCI

In [3]:
for f in files[7:]:
    yr          = f.split('-')[-2]
    ds          = xr.open_dataset(f)
    esa_lc      = ds['lccs_class']
    #mask on agricultural classess
    ag_mask     = esa_lc.isin(agr_cls).astype(int)
    # coarsen to ~1 degree
    ag_sum      = ag_mask.coarsen(lat=lat_factor, lon=lon_factor, boundary='trim').sum()
    # all 9 pixels need to be agricultural: if sum ==9 set to 1, else 0
    ag_sum      = ag_sum.isin(9).astype(int)
    print(yr)

    ds.close()

    # Replace NaN with -9999
    ag_mask_filled = ag_sum.fillna(-9999).astype('int16')  # or int8 if preferred
    ag_mask_filled.name = 'agri_100pct'
    # Set encoding so -9999 is treated as _FillValue
    encoding = {
        'agri_100pct': {
            'dtype': 'int16',
            '_FillValue': -9999,
            'zlib': True  # optional compression
        }
    }
    
    # Save to NetCDF
    ag_mask_filled.to_netcdf(f'/dodrio/scratch/users/vsc10951/sderoos/data/processed/CGLS_LAI/esa_cci_1deg/{yr}_masked_1deg.nc', encoding=encoding)

    #  Cleanup
    del esa_lc, ag_mask,ag_sum, ag_mask_filled
    gc.collect()

### STEP 2: use CDO to apply nearest neighbour so that 1/120 spatial resolution is matched to CGLS 1/112 degree spatial resolution

##### run _resample_cgls.sh_ or load CDO in python and use following CDO commands:

In [1]:
# years     = np.arange(1999,2015)
# grid_112.txt = cdo.griddes(input_CGLS.nc) 
# for y in years:
#     cdo.remapnn(grid_112.txt, "$FIN$","$FOUT$")

### STEP 3: mask out LAI using the saved masks

In [16]:
# select paths ESA_CCI mask and CGLS_LAI
path_lai  = '/dodrio/scratch/users/vsc10951/sderoos/data/external/lai_cgls_1km/'
path_mask = '/dodrio/scratch/users/vsc10951/sderoos/data/processed/CGLS_LAI/esa_cci_cgls_1km/'
years     = np.arange(1999,2015)

##### agricultural locations need to be consistent over time: all years need to be equal to 1

In [3]:
ds_mask = xr.open_dataset('/dodrio/scratch/users/vsc10951/sderoos/data/processed/CGLS_LAI/esa_cci_cgls_1km/agri_all1.nc')

In [4]:
ds_mask = ds_mask.where(ds_mask != -9999)
ds_mask = ds_mask.where(ds_mask != 0)

In [None]:
# ds_mask  = xr.open_dataset(f'{path_mask}99_14_masked.nc')
# ds_mask = (ds_mask['agri_100pct'] == 1).all(dim='time')
# ds_mask = ds_mask.astype(np.int8)
# print('here')

##### loop over years and save output

In [17]:
for year in years:
    print(year)
    fs_lai = sorted(glob(f'{path_lai}/*_{year}*.nc'))
    if len(fs_lai) != 36:
        print('warning: number of files seems incorrect')
    for flai in fs_lai:
        ds_lai  = xr.open_dataset(flai)
        datestamp = flai.split('_')[5]
        
        # Reset mask dimensions to match ds_lai
        ds_mask['time'] = ds_lai['time']
        ds_mask['lat']  = ds_lai['lat']
        ds_mask['lon']  = ds_lai['lon']

        # Replace mask fill values with NaN
        # ds_mask = ds_mask.where(ds_mask != -9999)
        # ds_mask = ds_mask.where(ds_mask != 0)

        ds_lai['LAI_masked'] = ds_lai['LAI'] * ds_mask['agri_100pct']
        

        # Save to NetCDF
        print(datestamp)
        ds_lai['LAI_masked'].to_netcdf(f'/dodrio/scratch/users/vsc10951/sderoos/data/processed/CGLS_LAI/masked_nan/CGLS_{datestamp}_masked.nc')

        # Close dataset
        ds_lai.close()

1999
199901100000
199901200000
199901310000
199902100000
199902200000
199902280000
199903100000
199903200000
199903310000
199904100000
199904200000
199904300000
199905100000
199905200000
199905310000
199906100000
199906200000
199906300000
199907100000
199907200000
199907310000
199908100000
199908200000
199908310000
199909100000
199909200000
199909300000
199910100000
199910200000
199910310000
199911100000
199911200000
199911300000
199912100000
199912200000
199912310000
2000
200001100000
200001200000
200001310000
200002100000
200002200000
200002290000
200003100000
200003200000
200003310000
200004100000
200004200000
200004300000
200005100000
200005200000
200005310000
200006100000
200006200000
200006300000
200007100000
200007200000
200007310000
200008100000
200008200000
200008310000
200009100000
200009200000
200009300000
200010100000
200010200000
200010310000
200011100000
200011200000
200011300000
200012100000
200012200000
200012310000
2001
200101100000
200101200000
200101310000
2001021000

### STEP5: match with GLASS resolution / condition 

In [2]:
files = sorted(glob('/dodrio/scratch/users/vsc10951/sderoos/data/processed/CGLS_LAI/masked/CGLS*.nc'))

In [42]:
#use masking of GLASS to mask out CGLS gridcells
ds_mask = xr.open_dataset('/dodrio/scratch/users/vsc10951/sderoos/data/processed/CGLS_LAI/masked_5km/masked_from_GLASS/GLASS_2014_mask_nn.nc')
ds_mask = ds_mask.max(dim='time')

In [43]:
# mask (can also run in loop but saving time) need to define ds_coarse for first time to run this
ds_mask['mask'] = xr.where(ds_mask['LAI'] > 0, 1, np.nan) #(ds_mask > 0).astype(int)
ds_mask['lat'] = ds_coarse['lat'] 
ds_mask['lon'] = ds_coarse['lon']

In [None]:
for f in files:
    timestamp = f.split('_')[2]
    print(timestamp)
    dir = '/dodrio/scratch/users/vsc10951/sderoos/data/processed/CGLS_LAI/masked/'
    ds  = xr.open_dataset(f'{dir}CGLS_{timestamp}_masked.nc')

    lat_factor = 6
    lon_factor = 6    

    #filter new dataset at 0.05 degrees
    ds_coarse     = xr.where(ds_mask['LAI'] > 0.0, ds_mask['LAI'], np.nan)
    ds_coarse     = ds.coarsen(lat=lat_factor, lon=lon_factor, boundary='trim').mean()
    ds_coarse     = ds_coarse * ds_mask['mask']

    # save output
    ds_coarse.to_netcdf(f'/dodrio/scratch/users/vsc10951/sderoos/data/processed/CGLS_LAI/masked_5km/CGLS_{timestamp}_masked.nc')

199901100000
199901200000
199901310000
199902100000
199902200000
199902280000
199903100000
199903200000
199903310000
199904100000
199904200000
199904300000
199905100000
199905200000
199905310000
199906100000
199906200000
199906300000
199907100000
199907200000
199907310000
199908100000
199908200000
199908310000
199909100000
199909200000
199909300000
199910100000
199910200000
199910310000
199911100000
199911200000
199911300000
199912100000
199912200000
199912310000
200001100000
200001200000
200001310000
200002100000
200002200000
200002290000
200003100000
200003200000
200003310000
200004100000
200004200000
200004300000
200005100000
200005200000
200005310000
200006100000
200006200000
200006300000
200007100000
200007200000
200007310000
200008100000
200008200000
200008310000
200009100000
200009200000
200009300000
200010100000
200010200000
200010310000
200011100000
200011200000
200011300000
200012100000
200012200000
200012310000
200101100000
200101200000
200101310000
200102100000
200102200000