# Processing CESM2-LENS PiC & lessmelt data

### Set up
#### Packages

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
from scipy import stats
import warnings
warnings.simplefilter('ignore', UserWarning)
warnings.filterwarnings('ignore')
import datetime as dt
from datetime import timedelta
from Processing_functions import FixLongitude, FixTime, FixGrid, InterPlevels
xr.set_options(display_expand_data=False);
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.diagnostics import ProgressBar
import random
import os

#### Filepaths & name variables

In [2]:
lessmelt = True

## File name
cesm2piC = 'b.e21.B1850.f09_g17.CMIP6-piControl.001_branch2' if lessmelt else 'b.e21.B1850.f09_g17.CMIP6-piControl.001' 

## Filepaths
path_to_arch = "/glade/campaign/cgd/ppc/cesm2_tuned_albedo/" if lessmelt else "/glade/campaign/collections/cmip/CMIP6/timeseries-cmip6/"
comp = 'ice'
freq = 0 # 0: monthly, 1: daily
var_ind = 2

# Variables
var_list = {'atm': ['TS','FLDS','CLOUD','FLNS','FSNS','FLNT','FSNT','PSL','U','V','T','TREFHT','RESTOM','Z3'],
            'ice': ['aice','hi', 'hs'],
            'ocn': ['MOC']}
var_ext = {0: '', 1: '_d'}
var = var_list[comp][var_ind]+var_ext[freq]

# Extensions
h_ext = {'atm': ['.h0.'],
       'ice': ['.h.','.h1.'],
       'ocn': ['.h.']}
mod_com = {'atm': 'cam',
           'ice': 'cice',
           'ocn': 'pop'}
time_path = {'atm': ['month_1'],
                'ice': ['month_1','day_1'],
                'ocn': ['month_1']}
vert_lev = {'atm': [False,False,True,False,False,False,False,False,True,True,True,False,False,True],
            'ice': [False,False,False],
            'ocn': [False]}

path_to_outdata = '/glade/work/glydia/processed_CESM2_lessmelt_data/' if lessmelt else '/glade/work/glydia/processed_CESM2_LENS_data/'
plot_levels = [300,500,850,925]

In [3]:
cluster = PBSCluster(cores    = 1,
                     memory   = '25GiB',
                     queue    = 'casper',
                     walltime = '04:00:00',
                     account  = 'UCUB0155',
                     name='piControl_'+var)
cluster.scale(4*9)
client = Client(cluster)

In [4]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/glydia/Arctic_breakdown/proxy/40713/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/glydia/Arctic_breakdown/proxy/40713/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.98:40851,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/glydia/Arctic_breakdown/proxy/40713/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
## Chunking variables
time_ch = 365*2 if freq == 1 else 600
chunks = {
    'atm': {'time': time_ch, 'lat': 96, 'lon': 144, 'lev': -1},
    'ice': {'time': time_ch, 'nj': 192, 'ni': 160},
    'ocn': {'time': time_ch, 'nlat': 64, 'nlon': 96, 'z_t': 5}
}

### Load & modify data
#### Control data

In [6]:
startyrs = np.arange(811,1162,50)
print(startyrs)

[ 811  861  911  961 1011 1061 1111 1161]


In [7]:
%%time
if var != 'RESTOM' and not vert_lev[comp][var_ind]:
    filepath = path_to_arch+cesm2piC+'/'+comp+'/proc/tseries/'
    addon = 'Kayetal22/' if (lessmelt and comp == 'ice') else time_path[comp][freq]+'/'
    filepath = filepath+addon

    if lessmelt:
        filename = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+var+'.*.'+'nc'
        ds = dask.delayed(xr.open_mfdataset)(filepath+filename,chunks=chunks[comp])
    
    num = 0
    for startyr in startyrs:
        yr_range = np.array([str(i).zfill(4) for i in np.arange(startyr,startyr+51)])
        
        endyr = startyr+49
        # start_floor = np.floor_divide(startyr,100)
        # end_floor = np.floor_divide(endyr,100)
        
        # if start_floor == end_floor:
        #     if start_floor == 0:
        #         instart = str(1).zfill(4)
        #         inend = str(99).zfill(4)
        #     elif start_floor == 19:
        #         instart = str(1900).zfill(4)
        #         inend = str(2000).zfill(4)
        #     else:
        #         instart = str(start_floor*100).zfill(4)
        #         inend = str(start_floor*100+99).zfill(4)
        
        #     yr_extn = {'in': ["."+instart+"01-"+inend+"12.", ".05000101-05991231."],
        #                'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}

        #     outpath = path_to_outdata+cesm2piC+h_ext[comp][freq]+var+yr_extn['out'][freq]+'zarr'
        #     if os.path.isdir(outpath):
        #         print(cesm2piC+h_ext[comp][freq]+var+yr_extn['out'][freq]+'zarr exists')
        #         continue
        #     else:
        #         # Open dataset
        #         filename = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+var+yr_extn['in'][freq]+'nc'
        #         print(filepath+filename)
        #         ds = dask.delayed(xr.open_dataset)(filepath+filename,chunks=chunks[comp])
        #         # ds = xr.open_dataset(filepath+filename,chunks=chunks[comp])
        # else:
        #     if start_floor == 0:
        #         instart1 = str(1).zfill(4)
        #         inend1 = str(99).zfill(4)
        #     else:
        #         instart1 = str(start_floor*100).zfill(4)
        #         inend1 = str(start_floor*100+99).zfill(4)
                
        #     if end_floor == 19:
        #         instart2 = str(1900).zfill(4)
        #         inend2 = str(2000).zfill(4)
        #     else:
        #         instart2 = str(end_floor*100).zfill(4)
        #         inend2 = str(end_floor*100+99).zfill(4)
    
        #     yr_extn1 = {'in': ["."+instart1+"01-"+inend1+"12.", ".05000101-05991231."],
        #                'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
    
        #     yr_extn2 = {'in': ["."+instart2+"01-"+inend2+"12.", ".05000101-05991231."],
        #                'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
        #     yr_extn = {'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}

        #     # Open dataset
        #     filename1 = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+var+yr_extn1['in'][freq]+'nc'
        #     filename2 = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+var+yr_extn2['in'][freq]+'nc'
        #     print(filepath+filename1)
        #     print(filepath+filename2)
        #     ds = dask.delayed(xr.open_mfdataset)([filepath+filename1, filepath+filename2],chunks=chunks[comp])
        #     # ds = xr.open_mfdataset([filepath+filename1, filepath+filename2],chunks=chunks[comp])

        yr_extn = {'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
        add_cyclic = False
        dsv = ds[var]
     
        #del ds
        
        processed_list = []
        for i in range(0,len(yr_range)-1):
            # If monthly data
            if freq == 0:
                startyr_sl = yr_range[i]
                endyr_sl = yr_range[i+1]
                ann_slice = dsv.sel(time=slice(startyr_sl+'-02-01',endyr_sl+'-01-17')) #if add_cyclic else dsv.sel(time=slice(startyr+'-02-10',endyr+'-01-17'),lat=slice(0,90))
                print('sliced '+startyr_sl+'-02-01 to '+endyr_sl+'-01-17')
                
                #fixedtime_data = dask.delayed(FixTime)(ann_slice)
                fixedtime_data = FixTime(ann_slice)
                print('   fixed time')
        
                if comp == 'ice':
                    fixedgrid_data = dask.delayed(FixGrid)(fixedtime_data,'gx1v7')
                    processed_list.append(fixedgrid_data)
                    print('   fixed CICE grid')
        
                    #del ann_slice, fixedtime_data, fixedgrid_data
                elif comp == 'ocn':
                    processed_list.append(fixedtime_data)
        
                    #del ann_slice, fixedtime_data
                else:
                    fixedgrid_data = dask.delayed(FixLongitude)(fixedtime_data, add_cyclic)
                    processed_list.append(fixedgrid_data)
                    print('   fixed longitude')
        
                    #del ann_slice, fixedtime_data, fixedgrid_data
                    
                
            # If daily data
            else:
                startyr_sl = yr_range[i]
                endyr_sl = yr_range[i]
                ann_slice = dsv.sel(time=slice(startyr_sl+'-01-01',endyr_sl+'-12-31')) #if add_cyclic else dsv.sel(time=slice(startyr+'-01-01',endyr+'-12-31'), lat=slice(0,90))
                print('sliced '+startyr_sl+'-01-01 to '+endyr_sl+'-12-31')
        
                if comp == 'ice':
                    fixedgrid_data = dask.delayed(FixGrid)(ann_slice,'gx1v7')
                    processed_list.append(fixedgrid_data)
                    print('   fixed CICE grid')
        
                    #del ann_slice, fixedgrid_data
                elif comp == 'ocn':
                    processed_list.append(ann_slice)
        
                    #del ann_slice
                else:
                    fixedgrid_data = dask.delayed(FixLongitude)(ann_slice, add_cyclic)
                    processed_list.append(fixedgrid_data)
                    print('   fixed longitude')
        
                    #del ann_slice, fixedgrid_data
        
        if freq == 0 and not vert_lev[comp][var_ind]:
            processed_comp = dask.compute(*processed_list)
            print('computed list')
            
            processed_out = xr.concat(processed_comp,dim='time').chunk({'time':111})
            print('concatenated data')
            
            processed_out.to_netcdf(path_to_outdata+cesm2piC+h_ext[comp][freq]+var+yr_extn['out'][freq]+'nc', 
                                        format='NETCDF4',encoding={var: {"zlib": True, "complevel": 1}})
            print('wrote data to disk')
    
        num = num+1

sliced 0811-02-01 to 0812-01-17
   fixed time
   fixed CICE grid
sliced 0812-02-01 to 0813-01-17
   fixed time
   fixed CICE grid
sliced 0813-02-01 to 0814-01-17
   fixed time
   fixed CICE grid
sliced 0814-02-01 to 0815-01-17
   fixed time
   fixed CICE grid
sliced 0815-02-01 to 0816-01-17
   fixed time
   fixed CICE grid
sliced 0816-02-01 to 0817-01-17
   fixed time
   fixed CICE grid
sliced 0817-02-01 to 0818-01-17
   fixed time
   fixed CICE grid
sliced 0818-02-01 to 0819-01-17
   fixed time
   fixed CICE grid
sliced 0819-02-01 to 0820-01-17
   fixed time
   fixed CICE grid
sliced 0820-02-01 to 0821-01-17
   fixed time
   fixed CICE grid
sliced 0821-02-01 to 0822-01-17
   fixed time
   fixed CICE grid
sliced 0822-02-01 to 0823-01-17
   fixed time
   fixed CICE grid
sliced 0823-02-01 to 0824-01-17
   fixed time
   fixed CICE grid
sliced 0824-02-01 to 0825-01-17
   fixed time
   fixed CICE grid
sliced 0825-02-01 to 0826-01-17
   fixed time
   fixed CICE grid
sliced 0826-02-01 to 0827

In [8]:
%%time
if var != 'RESTOM' and vert_lev[comp][var_ind]:
    filepath = path_to_arch+cesm2piC+'/'+comp+'/proc/tseries/'
    addon = 'Kayetal22/' if lessmelt else time_path[comp][freq]+'/'
    filepath = filepath+addon
    
    num = 0
    slice_list = []
    for startyr in startyrs:
        yr_range = np.array([str(i).zfill(4) for i in np.arange(startyr,startyr+75)])
        
        endyr = startyr+49
        start_floor = np.floor_divide(startyr,100)
        end_floor = np.floor_divide(endyr,100)
        
        if start_floor == end_floor:
            if start_floor == 0:
                instart = str(1).zfill(4)
                inend = str(99).zfill(4)
            elif start_floor == 19:
                instart = str(1900).zfill(4)
                inend = str(2000).zfill(4)
            else:
                instart = str(start_floor*100).zfill(4)
                inend = str(start_floor*100+99).zfill(4)
        
            yr_extn = {'in': ["."+instart+"01-"+inend+"12.", ".05000101-05991231."],
                       'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}

            
            # Open dataset
            filename = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+var+yr_extn['in'][freq]+'nc'
            print(filepath+filename)
            ds = xr.open_dataset(filepath+filename,chunks=chunks[comp])
        else:
            if start_floor == 0:
                instart1 = str(1).zfill(4)
                inend1 = str(99).zfill(4)
            else:
                instart1 = str(start_floor*100).zfill(4)
                inend1 = str(start_floor*100+99).zfill(4)
                
            if end_floor == 19:
                instart2 = str(1900).zfill(4)
                inend2 = str(2000).zfill(4)
            else:
                instart2 = str(end_floor*100).zfill(4)
                inend2 = str(end_floor*100+99).zfill(4)
    
            yr_extn1 = {'in': ["."+instart1+"01-"+inend1+"12.", ".05000101-05991231."],
                       'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
    
            yr_extn2 = {'in': ["."+instart2+"01-"+inend2+"12.", ".05000101-05991231."],
                       'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
            yr_extn = {'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}

            # Open dataset
            filename1 = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+var+yr_extn1['in'][freq]+'nc'
            filename2 = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+var+yr_extn2['in'][freq]+'nc'
            print(filepath+filename1)
            print(filepath+filename2)
            ds = xr.open_mfdataset([filepath+filename1, filepath+filename2],chunks=chunks[comp])
        
        
        add_cyclic = False
        
        dsv = ds
        if start_floor == end_floor:
            filename = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'PS'+yr_extn['in'][freq]+'nc'
            dsp = xr.open_dataset(filepath+filename,chunks=chunks[comp])
        else:
            filename1 = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'PS'+yr_extn1['in'][freq]+'nc'
            filename2 = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'PS'+yr_extn2['in'][freq]+'nc'
            dsp = xr.open_mfdataset([filepath+filename1, filepath+filename2],chunks=chunks[comp])
            
        dsv['PS'] = dsp['PS']
        #del ds
        
        processed_list = []
        for i in range(0,len(yr_range)-1):
            # Selection of final time
                
            # Selection of data    
            startyr_sl = yr_range[i]
            endyr_sl = yr_range[i+1]
            ann_slice = dsv.sel(time=slice(startyr_sl+'-02-01',endyr_sl+'-01-17')) #if add_cyclic else dsv.sel(time=slice(startyr+'-02-10',endyr+'-01-17'),lat=slice(0,90))
            print('sliced '+startyr_sl+'-02-01 to '+endyr_sl+'-01-17')
            
            ann_slice = FixTime(ann_slice)
            print('   fixed time')

            ann_slice = FixLongitude(ann_slice, add_cyclic)
            print('   fixed longitude')
            
            ann_slice = InterPlevels(ann_slice, var)
            print('   interpolated p-levels')
            processed_list.append(ann_slice)

        
        processed_out = xr.concat(processed_list,dim='time').chunk({'time':111})
        slice_list.append(processed_out)  
        print('concatenated slice '+str(num))
        num = num+1
        
    for i in range(0,len(slice_list)):
        if i == 0:
            slice_list[i].to_zarr(path_to_outdata+cesm2piC+h_ext[comp][freq]+var+'.195001-202312.zarr', 
                            group=var)
            print('saved initial zarr store')
        else:
            print('saving slice '+str(i+1))
            slice_list[i].to_zarr(path_to_outdata+cesm2piC+h_ext[comp][freq]+var+'.195001-202312.zarr', 
                                    append_dim='slice', mode='a-',group=var)
    print('wrote data to disk')
    
       

CPU times: user 9 µs, sys: 0 ns, total: 9 µs
Wall time: 11.2 µs


In [9]:
%%time

if var == 'RESTOM':
    filepath = path_to_arch+cesm2piC+'/'+comp+'/proc/tseries/'
    addon = 'Kayetal22/' if lessmelt else time_path[comp][freq]+'/'
    filepath = filepath+addon
    
    num = 0
    for startyr in startyrs:
        yr_range = np.array([str(i).zfill(4) for i in np.arange(startyr,startyr+51)])
        
        endyr = startyr+49
        start_floor = np.floor_divide(startyr,100)
        end_floor = np.floor_divide(endyr,100)
        
        if start_floor == end_floor:
            if start_floor == 0:
                instart = str(1).zfill(4)
                inend = str(99).zfill(4)
            elif start_floor == 19:
                instart = str(1900).zfill(4)
                inend = str(2000).zfill(4)
            else:
                instart = str(start_floor*100).zfill(4)
                inend = str(start_floor*100+99).zfill(4)
        
            yr_extn = {'in': ["."+instart+"01-"+inend+"12.", ".05000101-05991231."],
                       'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
        
            # Open dataset
            filename_fsnt = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'FSNT'+yr_extn['in'][freq]+'nc'
            filename_flnt = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'FLNT'+yr_extn['in'][freq]+'nc'
            print(filepath+filename_fsnt)
            ds_fsnt = dask.delayed(xr.open_dataset)(filepath+filename_fsnt,chunks=chunks[comp])
            ds_flnt = dask.delayed(xr.open_dataset)(filepath+filename_flnt,chunks=chunks[comp])

            ds_fsnt = ds_fsnt['FSNT']
            ds_flnt = ds_flnt['FLNT']

            dsv = ds_fsnt-ds_flnt
            dsv = dsv.rename(var) 
            
        else:
            if start_floor == 0:
                instart1 = str(1).zfill(4)
                inend1 = str(99).zfill(4)
            else:
                instart1 = str(start_floor*100).zfill(4)
                inend1 = str(start_floor*100+99).zfill(4)
                
            if end_floor == 19:
                instart2 = str(1900).zfill(4)
                inend2 = str(2000).zfill(4)
            else:
                instart2 = str(end_floor*100).zfill(4)
                inend2 = str(end_floor*100+99).zfill(4)
    
            yr_extn1 = {'in': ["."+instart1+"01-"+inend1+"12.", ".05000101-05991231."],
                       'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
    
            yr_extn2 = {'in': ["."+instart2+"01-"+inend2+"12.", ".05000101-05991231."],
                       'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
            yr_extn = {'out': ["."+str(startyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12.", ".05010101-05741231."]}
    
            # Open dataset
            filename1_fsnt = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'FSNT'+yr_extn1['in'][freq]+'nc'
            filename2_fsnt = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'FSNT'+yr_extn2['in'][freq]+'nc'

            filename1_flnt = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'FLNT'+yr_extn1['in'][freq]+'nc'
            filename2_flnt = cesm2piC+'.'+mod_com[comp]+h_ext[comp][freq]+'FLNT'+yr_extn2['in'][freq]+'nc'
            
            print(filepath+filename1_fsnt)
            print(filepath+filename2_fsnt)
            ds_fsnt = dask.delayed(xr.open_mfdataset)([filepath+filename1_fsnt, filepath+filename2_fsnt],chunks=chunks[comp])
            ds_flnt = dask.delayed(xr.open_mfdataset)([filepath+filename1_flnt, filepath+filename2_flnt],chunks=chunks[comp])

            ds_fsnt = ds_fsnt['FSNT']
            ds_flnt = ds_flnt['FLNT']

            dsv = ds_fsnt-ds_flnt
            dsv = dsv.rename(var)
        
        add_cyclic = False
        
        #del ds
        
        processed_list = []
        for i in range(0,len(yr_range)-1):
            # If monthly data
            if freq == 0:
                startyr = yr_range[i]
                endyr = yr_range[i+1]
                ann_slice = dsv.sel(time=slice(startyr+'-02-01',endyr+'-01-17')) #if add_cyclic else dsv.sel(time=slice(startyr+'-02-10',endyr+'-01-17'),lat=slice(0,90))
                print('sliced '+startyr+'-02-01 to '+endyr+'-01-17')
                
                fixedtime_data = dask.delayed(FixTime)(ann_slice)
                print('   fixed time')
        
                if comp == 'ice':
                    fixedgrid_data = dask.delayed(FixGrid)(fixedtime_data,'gx1v7')
                    processed_list.append(fixedgrid_data)
                    print('   fixed CICE grid')
        
                    #del ann_slice, fixedtime_data, fixedgrid_data
                elif comp == 'ocn':
                    processed_list.append(fixedtime_data)
        
                    #del ann_slice, fixedtime_data
                else:
                    fixedgrid_data = dask.delayed(FixLongitude)(fixedtime_data, add_cyclic)
                    # If 3D data, interpolate to pressure levels
                    if vert_lev[comp][var_ind]:
                        addplev_data = dask.delayed(InterPlevels)(fixedgrid_data, var)
                        processed_list.append(addplev_data)
                    else:
                        processed_list.append(fixedgrid_data)
                    print('   fixed longitude')
        
                    #del ann_slice, fixedtime_data, fixedgrid_data
                    
                
            # If daily data
            else:
                startyr = yr_range[i]
                endyr = yr_range[i]
                ann_slice = dsv.sel(time=slice(startyr+'-01-01',endyr+'-12-31')) #if add_cyclic else dsv.sel(time=slice(startyr+'-01-01',endyr+'-12-31'), lat=slice(0,90))
                print('sliced '+startyr+'-01-01 to '+endyr+'-12-31')
        
                if comp == 'ice':
                    fixedgrid_data = dask.delayed(FixGrid)(ann_slice,'gx1v7')
                    processed_list.append(fixedgrid_data)
                    print('   fixed CICE grid')
        
                    #del ann_slice, fixedgrid_data
                elif comp == 'ocn':
                    processed_list.append(ann_slice)
        
                    #del ann_slice
                else:
                    fixedgrid_data = dask.delayed(FixLongitude)(ann_slice, add_cyclic)
                    processed_list.append(fixedgrid_data)
                    print('   fixed longitude')
        
                    #del ann_slice, fixedgrid_data
        
        processed_comp = dask.compute(*processed_list)
        print('computed list')
        
        if not vert_lev[comp][var_ind] and freq == 0:
            processed_out = xr.concat(processed_comp,dim='time').chunk({'time':111})
            print('concatenated data')
            
            processed_out.to_netcdf(path_to_outdata+cesm2piC+h_ext[comp][freq]+var+yr_extn['out'][freq]+'nc', 
                                        format='NETCDF4',encoding={var: {"zlib": True, "complevel": 1}})
            print('wrote data to disk')
            
        else:
            processed_comp[0].to_zarr(path_to_outdata+cesm2piC+h_ext[comp][freq]+var+yr_extn['out'][freq]+'zarr', 
                                    group=var)
            print('saved initial zarr store')
            for i in range(1,len(processed_comp)):
                print('   saving year '+str(i+500))
                processed_comp[i].to_zarr(path_to_outdata+cesm2piC+h_ext[comp][freq]+var+yr_extn['out'][freq]+'zarr', 
                                    append_dim='time', mode='a-',group=var)
            print('wrote data to disk')
    
        num = num+1

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 6.68 µs


In [10]:
client.shutdown()