# Processing Wind nudging data

### Set up
#### Packages

In [None]:
import numpy as np
import xarray as xr
import pandas as pd
from scipy import stats
import warnings
warnings.simplefilter('ignore', UserWarning)
warnings.filterwarnings('ignore')
import datetime as dt
from datetime import timedelta
from cartopy.util import add_cyclic_point
from Processing_functions import FixLongitude, FixTime, CalcStatforDim, CalcStatbyGrpDim, Ensemble, FixGrid
import pop_tools

#### Filepaths & name variables

In [2]:
## Test numbers
tst_nums = np.arange(1,11)
tst_type = 'nudge_ensemble'

## Test names
control = 'b.e22.B1850.f09_g17.control_test_nudge.'
rfn263K = 'b.e22.B1850.f09_g17.cri263K_test_nudge.'

## Time averaging type
time_avg = 3 # 0: Monthly, 1: Yearly, 2: Seasonal, 3: All data

## Ensemble mean or All members
ens_type = 'Mean'

## Filtering
filter = False
filter_str = 'filtered' if filter else 'non_filtered'

## Offset
offset = False

## Filepaths
path_to_arch = "/glade/derecho/scratch/glydia/archive/"
path_to_data = "/atm/hist/"
filename_ext = ".cam.h1"
# path_to_data = "/ice/hist/"
# filename_ext = ".cice.h"
path_to_outdata = '/glade/work/glydia/Arctic_CRI_processed_data/processed_wind_nudging_coupled_ensemble_data/'

## Variables to process
var_list = np.array(['FLDS','FLUT','T','TS','CLOUD','TGCLDLWP','LCC','U','V','Target_U','Target_V','Target_T'])
var = var_list[0]

In [3]:
## Chunking variables
la_chunk = 64
lo_chunk = 96
le_chunk = 4

In [4]:
%%time
## Select plot type - yearly or monthly - to make and assign variables accordingly
# Monthly
if time_avg == 0:
    time_str = 'Month'
    grp_str = "time.month"
    tm_chunk = 30
    concat_str = 'month'

# Seasonal
elif time_avg == 2:
    time_str = 'Season'
    grp_str = "time.season"
    tm_chunk = 30
    concat_str = 'season'

# All-data average
elif time_avg == 3:
    time_str = 'All_data'
    tm_chunk = -1
    grp_str = ''

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.11 µs


In [5]:
%%time

## Set up ensemble index
ens_index = pd.Index(tst_nums, name="ensemble_member")

CPU times: user 0 ns, sys: 121 µs, total: 121 µs
Wall time: 125 µs


In [6]:
def LCC(ds):
    # Add LCC
    ds_lwp = ds['TGCLDLWP']

    da = xr.where(ds_lwp >= 0.005, 1, 0)
    da = da.rename('LCC')
    da.LCC.attrs['units'] = 'frequency'
    da.LCC.attrs['long_name'] = 'Liquid-containing cloud frequency'

    da.compute()
    return da

In [7]:
def LWP(ds):
    # Change units on LWP
    da = ds['TGCLDLWP']
    da *= 1000
    da.TGCLDLWP.attrs['units'] = 'g/m2'

    da.compute()
    return da

In [8]:
def filter_func(ds, da):
    ds_ts = ds.TS
    ds_lwp = ds.TGCLDLWP

    da = da.where(ds_ts <= 273)
    da = da.where(ds_lwp <= 0.09)
    da = da.where(ds_lwp >= 0.001)
    da.compute()
    return da

In [None]:
def LoadModEns(optics):
    ## Load data
    # Load ensemble members
    ds_list = []
    
    for i in tst_nums:
        print('ensemble member: '+str(i))
        
        # Open dataset
        tst_name = optics+str(i).zfill(3)
        path_i = path_to_arch+tst_name+path_to_data+tst_name+filename_ext+"*.nc"
        ds = xr.open_mfdataset(paths=path_i,chunks={'time':tm_chunk,'lat':la_chunk,'lon':lo_chunk,'lev':le_chunk})
    
        if var == 'LCC':
            # Add LCC
            dsv = LCC(ds)
    
        elif var == 'TGCLDLWP':
            # Change units on LWP
            dsv = LWP(ds)
    
        else:
            dsv = ds[var]

        dsv.compute()
    
        if filter:
            dsv = filter_func(ds, dsv)
            
        dsv = dsv.compute()

        if var == 'aice' or var == 'hi':
            dsv = FixTime(dsv)

            dsv.compute()

            dsv = FixGrid(dsv, 'gx1v7')

            dsv.compute()
        else:
            
            dsv = FixLongitude(dsv)

        if offset:
            dsv = dsv.loc[dict(time=slice('0001-04-01','0002-03-31'))]
        else:
            dsv = dsv.loc[dict(time=slice('0001-01-01','0001-12-31'))]
    
        print('   made changes to ds')
    
        dsv.compute()

        ds_list.append(dsv)

    dsv = xr.concat(ds_list,ens_index)
    dsv.compute()

    # If doing ensembl mean
    if ens_type == 'Mean':
        ds_avg, ds_std, n_ds = CalcStatbyGrpDim(dsv, 'ensemble_member', grp_str, 'ensemble_member', 'time', 'ensemble_member')

        return ds_avg, ds_std, n_ds

    # If doing all ensemble members
    else:
        ds_avg, ds_std, n_ds = CalcStatforDim(dsv, grp_str, 'time') 

    return ds_avg, ds_std, n_ds

In [10]:
def SaveEns(data_avg, data_std, data_n, optics):
    # Compute changes to avg
    data_avg.compute()
    print('computed avg')
    
    data_avg.to_netcdf(path_to_outdata+optics+var+'.avg.'+ens_type+'.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')
    
    # Compute changes to avg
    data_std.compute()
    print('computed std')
    
    data_std.to_netcdf(path_to_outdata+optics+var+'.std.'+ens_type+'.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')
    
    # Compute changes to avg
    data_n.compute()
    print('computed n')
    
    data_n.to_netcdf(path_to_outdata+optics+var+'.n.'+ens_type+'.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')

### Load & modify data
#### Control data

In [11]:
%%time

ds_avg_control, ds_std_control, ds_n_control = LoadModEns(control)

print('processed all ensemble members')

ensemble member: 1
   made changes to ds
ensemble member: 2
   made changes to ds
ensemble member: 3
   made changes to ds
ensemble member: 4
   made changes to ds
ensemble member: 5
   made changes to ds
ensemble member: 6
   made changes to ds
ensemble member: 7
   made changes to ds
ensemble member: 8
   made changes to ds
ensemble member: 9
   made changes to ds
ensemble member: 10
   made changes to ds
processed all ensemble members
CPU times: user 11.3 s, sys: 14 s, total: 25.3 s
Wall time: 1min 39s


In [12]:
%%time

SaveEns(ds_avg_control, ds_std_control, ds_n_control, control)

computed avg
computed std
computed n
CPU times: user 12 ms, sys: 0 ns, total: 12 ms
Wall time: 41.8 ms


#### CRI263K data

In [13]:
%%time

ds_avg_cri263K, ds_std_cri263K, ds_n_cri263K = LoadModEns(rfn263K)

print('processed all ensemble members')

ensemble member: 1
   made changes to ds
ensemble member: 2
   made changes to ds
ensemble member: 3
   made changes to ds
ensemble member: 4
   made changes to ds
ensemble member: 5
   made changes to ds
ensemble member: 6
   made changes to ds
ensemble member: 7
   made changes to ds
ensemble member: 8
   made changes to ds
ensemble member: 9
   made changes to ds
ensemble member: 10
   made changes to ds
processed all ensemble members
CPU times: user 10.2 s, sys: 15.6 s, total: 25.8 s
Wall time: 1min 41s


In [14]:
%%time

SaveEns(ds_avg_cri263K, ds_std_cri263K, ds_n_cri263K, rfn263K)

computed avg
computed std
computed n
CPU times: user 10.9 ms, sys: 0 ns, total: 10.9 ms
Wall time: 121 ms
