# Processing PiC_UVnudge single run data

### Set up
#### Packages

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
from scipy import stats
import warnings
warnings.simplefilter('ignore', UserWarning)
warnings.filterwarnings('ignore')
import datetime as dt
from datetime import timedelta
from Processing_functions import FixLongitude, FixTime, FixGrid, InterPlevels
xr.set_options(display_expand_data=False);
xr.set_options(keep_attrs=True);
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.diagnostics import ProgressBar
import cftime

In [2]:
## Test numbers
#tst_nums = np.arange(1,4)
begyr = 1161
endyr = 1210

## File name
run_name = 'moremelt_rsnw0'
piC_UVnudge_name = 'b.e21.B1850cmip6.f09_g17.'+run_name

## Filepaths
path_to_arch = "/glade/derecho/scratch/glydia/archive/"
# path_to_arch = "/glade/campaign/univ/ucub0155/glydia/"
comp = 'atm'
var_ind = 11

# Variables
var_list = {'atm': ['TS','FLDS','CLOUD','FLNS','FSNS','FLNT','FSNT','PSL','U','V','T','TREFHT',
                    'Target_U','Target_V','Target_T','RESTOM','Z3'],
            'ice': ['aice','hi','hs','fswdn','fswup'],
            'ocn': ['MOC']}
var = var_list[comp][var_ind]

# Extensions
h_ext = {'atm': '.h0.',
       'ice': '.h.',
       'ocn': '.h.'}
mod_com = {'atm': 'cam',
           'ice': 'cice',
           'ocn': 'pop'}
time_path = 'month_1'
yr_extn_out =  "."+str(begyr).zfill(4)+"01-"+str(endyr).zfill(4)+"12."
yr_extn_in = ".*."
vert_lev = {'atm': [False,False,True,False,False,False,False,False,True,True,True,
                    False,True,True,True,False,True],
            'ice': [False,False,False,False,False],
            'ocn': [False]}

path_to_outdata = '/glade/work/glydia/Arctic_controls_processed_data/processed_'+run_name+'_data/'

#### Filepaths & name variables

In [3]:
cluster = PBSCluster(cores    = 1,
                     memory   = '25GiB',
                     queue    = 'casper',
                     walltime = '02:00:00',
                     account  = 'UCUB0155',
                     name='piControl_'+var)
cluster.scale(4*9)
client = Client(cluster)

In [4]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/glydia/Arctic_breakdown/proxy/42009/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/glydia/Arctic_breakdown/proxy/42009/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.98:38313,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/glydia/Arctic_breakdown/proxy/42009/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
## Chunking variables
time_ch = 600
chunks = {
    'atm': {'time': time_ch, 'lat': 96, 'lon': 144, 'lev': -1},
    'ice': {'time': time_ch, 'nj': 192, 'ni': 160},
    'ocn': {'time': time_ch, 'nlat': 64, 'nlon': 96, 'z_t': 5}
}

### Load & modify data
#### Control data

In [6]:
%%time

yr_range = np.array([str(i).zfill(4) for i in np.arange(begyr, endyr+2)])

tst_name = piC_UVnudge_name+'.001'
## Load data
# Open dataset
filepath = path_to_arch+tst_name+'/'+comp+'/proc/tseries/'+time_path+'/'
filename = tst_name+h_ext[comp]+var+yr_extn_in+'nc'
print(filepath+filename)
ds = dask.delayed(xr.open_mfdataset)(filepath+filename,chunks=chunks[comp])
# ds = xr.open_mfdataset(filepath+filename,chunks=chunks[comp])

add_cyclic = False # DO NOT ADD CYCLIC UNTIL AFTER PROCESSING PLOT DATA

dsv = ds[var] if add_cyclic else ds

#del ds

processed_list = []
for j in range(0,len(yr_range)-1):
    # If monthly data
    
    startyr = yr_range[j]
    endyr = yr_range[j+1]
    ann_slice = dsv.sel(time=slice(startyr+'-02-01',endyr+'-01-17')) #if add_cyclic else dsv.sel(time=slice(startyr+'-02-10',endyr+'-01-17'),lat=slice(0,90))
    print('sliced '+startyr+'-02-01 to '+endyr+'-01-17')
    
    fixedtime_data = dask.delayed(FixTime)(ann_slice)
    print('   fixed time')

    if comp == 'ice':
        fixedgrid_data = dask.delayed(FixGrid)(fixedtime_data,'gx1v7')
        processed_list.append(fixedgrid_data)
        print('   fixed CICE grid')

        #del ann_slice, fixedtime_data, fixedgrid_data
    elif comp == 'ocn':
        processed_list.append(fixedtime_data)

        #del ann_slice, fixedtime_data
    else:
        fixedgrid_data = dask.delayed(FixLongitude)(fixedtime_data, add_cyclic)
        # If 3D data, interpolate to pressure levels
        if vert_lev[comp][var_ind]:
            addplev_data = dask.delayed(InterPlevels)(fixedgrid_data, var)
            processed_list.append(addplev_data)
        else:
            processed_list.append(fixedgrid_data)
        print('   fixed longitude')

        #del ann_slice, fixedtime_data, fixedgrid_data

if not vert_lev[comp][var_ind]:
    processed_comp = dask.compute(*processed_list)
    print('computed list')
    
    processed_out = xr.concat(processed_comp,dim='time').chunk({'time':111})
    print('concatenated data')
    
    processed_out.to_netcdf(path_to_outdata+piC_UVnudge_name+h_ext[comp]+var+yr_extn_out+'nc', 
                                format='NETCDF4',encoding={var: {"zlib": True, "complevel": 1}})
    print('wrote data to disk')
    
else:
    processed_comp = dask.compute(*processed_list)
    processed_comp[0].to_zarr(path_to_outdata+piC_UVnudge_name+h_ext[comp]+var+yr_extn_out+'zarr', 
                            group=var)
    print('saved initial zarr store')
    for i in range(1,len(yr_range)):
        yr = str(yr_range[i])
        print('   saving year '+yr)
        
        processed_comp[i].sel(time=slice(yr+'-01-01',yr+'-12-31')).to_zarr(path_to_outdata+piC_UVnudge_name+h_ext[comp]+var+yr_extn+'zarr', 
                            append_dim='time', mode='a-',group=var)
    print('wrote data to disk')

/glade/derecho/scratch/glydia/archive/b.e21.B1850cmip6.f09_g17.moremelt_rsnw0.001/atm/proc/tseries/month_1/b.e21.B1850cmip6.f09_g17.moremelt_rsnw0.001.h0.TREFHT.*.nc
sliced 1161-02-01 to 1162-01-17
   fixed time
   fixed longitude
sliced 1162-02-01 to 1163-01-17
   fixed time
   fixed longitude
sliced 1163-02-01 to 1164-01-17
   fixed time
   fixed longitude
sliced 1164-02-01 to 1165-01-17
   fixed time
   fixed longitude
sliced 1165-02-01 to 1166-01-17
   fixed time
   fixed longitude
sliced 1166-02-01 to 1167-01-17
   fixed time
   fixed longitude
sliced 1167-02-01 to 1168-01-17
   fixed time
   fixed longitude
sliced 1168-02-01 to 1169-01-17
   fixed time
   fixed longitude
sliced 1169-02-01 to 1170-01-17
   fixed time
   fixed longitude
sliced 1170-02-01 to 1171-01-17
   fixed time
   fixed longitude
sliced 1171-02-01 to 1172-01-17
   fixed time
   fixed longitude
sliced 1172-02-01 to 1173-01-17
   fixed time
   fixed longitude
sliced 1173-02-01 to 1174-01-17
   fixed time
   fixed