# Processing Wind nudging data

### Set up
#### Packages

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
from scipy import stats
import warnings
warnings.simplefilter('ignore', UserWarning)
warnings.filterwarnings('ignore')
import datetime as dt
from datetime import timedelta
from cartopy.util import add_cyclic_point
from Processing_functions import FixLongitude, FixTime, CalcStatforDim, CalcStatbyGrpDim, Ensemble

#### Filepaths & name variables

In [2]:
## Test numbers
tst_nums = np.arange(1,4)
tst_type = 'nudge_ensemble'

## Test names
control = 'f.e22.F1850.f09_f09_mg17.control_test_nudge_long.'
rfn263K = 'f.e22.F1850.f09_f09_mg17.cri263K_test_nudge_long.'

## Time averaging type
time_avg = 3 # 0: Monthly, 1: Yearly, 2: Seasonal, 3: All data

## Ensemble mean or All members
ens_type = 'Mean'

## Filtering
filter = False
filter_str = 'filtered' if filter else 'non_filtered'

## Filepaths
path_to_arch = "/glade/derecho/scratch/glydia/archive/"
path_to_data = "/atm/hist/"
filename_ext = ".cam.h0"
path_to_outdata = '/glade/work/glydia/Arctic_CRI_processed_data/processed_wind_nudging_ensemble_data/'

## Variables to process
var_list = np.array(['FLDS','T','TS','CLOUD','OPTS_MAT','U','V','Target_U','Target_V','Target_T'])
var = var_list[0]

In [3]:
## Chunking variables
la_chunk = 64
lo_chunk = 96
le_chunk = 4

In [4]:
%%time
## Select plot type - yearly or monthly - to make and assign variables accordingly
# Monthly
if time_avg == 0:
    time_str = 'Month'
    grp_str = "time.month"
    tm_chunk = 1
    concat_str = 'month'
    
# Yearly
elif time_avg == 1:
    time_str = 'Year'
    grp_str = "time.year"
    tm_chunk = 12
    concat_str = 'year'

# Seasonal
elif time_avg == 2:
    time_str = 'Season'
    grp_str = "time.season"
    tm_chunk = 3
    concat_str = 'season'

# All-data average
elif time_avg == 3:
    time_str = 'All_data'
    grp_str = ''
    tm_chunk = -1
    
# All-data timeseries
elif time_avg == 4:
    time_str = 'Timeseries'
    tm_chunk = 1

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.82 µs


In [5]:
%%time

## Set up ensemble index
ens_index = pd.Index(tst_nums, name="ensemble_member")

CPU times: user 165 µs, sys: 0 ns, total: 165 µs
Wall time: 167 µs


In [6]:
def filter_func(ds, da):
    # Filter using OPTS_MAT
    ds_om = ds.OPTS_MAT
    da = da.where(ds_om >= 0.5)
    da.compute()
    return da

In [7]:
def PS(da):
    # Change units on PS
    da /= 100
    da.attrs['units'] = 'hPa'
    da.compute()
    return da

In [8]:
def LoadModEns(optics):
    ## Load data
    # Load ensemble members
    ds_list = []
    
    for i in tst_nums:
        print('ensemble member: '+str(i))
        
        # Open dataset
        tst_name = optics+str(i).zfill(3)
        path_i = path_to_arch+tst_name+path_to_data+tst_name+filename_ext+"*.nc"
        ds = xr.open_mfdataset(paths=path_i,chunks={'time':tm_chunk,'lat':la_chunk,'lon':lo_chunk,'lev':le_chunk})
    
        dsv = ds[var]

        dsv.compute()
    
        if filter:
            dsv = filter_func(ds, dsv)

        if var == 'PS':
            dsv = PS(dsv)
            
        dsv = dsv.compute()
            
        dsv = FixLongitude(dsv)
        
        dsv = FixTime(dsv)
    
        print('   made changes to ds')

        ds_list.append(dsv)

    dsv = xr.concat(ds_list, ens_index)
    dsv.compute()

    # Note to self: like need three layers of grouping for this one - might need to write new function
    
    # If doing ensemble mean
    if ens_type == 'Mean':
        # If doing timeseries
        if time_avg == 4:
            ds_avg, ds_std, n_ds = CalcStatforDim(dsv, '', 'ensemble_member')

        elif time_avg < 2:
            ds_avg, ds_std, n_ds = CalcStatbyGrpDim(dsv, 'ensemble_member', grp_str, 'ensemble_member','time','ensemble_member')
            
        else:
            ds_avg, ds_std, n_ds = CalcStatbyGrpDim(dsv, 'time.year', grp_str, 'year','time',['year','ensemble_member'])

        return ds_avg, ds_std, n_ds

    # If doing all ensemble members
    else:
        # If doing timeseries
        if time_avg == 4:
            ds_avg = dsv
            ds_avg.compute()
            return ds_avg

        elif time_avg < 2:
            ds_avg, ds_std, n_ds = CalcStatforDim(dsv, grp_str, 'time')

        else:
            ds_avg, ds_std, n_ds = CalcStatbyGrpDim(dsv, 'time.year', grp_str, 'year', 'time','year')

        return ds_avg, ds_std, n_ds

In [9]:
def SaveEns(optics, data_avg, data_std=None, data_n=None):
    # Compute changes to avg
    data_avg.compute()
    print('computed avg')
    
    data_avg.to_netcdf(path_to_outdata+optics+var+'.avg.'+ens_type+'.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')
    
    # If not doing timeseries
    if time_avg < 4:
        # Compute changes to avg
        data_std.compute()
        print('computed std')
        
        data_std.to_netcdf(path_to_outdata+optics+var+'.std.'+ens_type+'.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')
        
        # Compute changes to avg
        data_n.compute()
        print('computed n')
        
        data_n.to_netcdf(path_to_outdata+optics+var+'.n.'+ens_type+'.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')

### Load & modify data
#### Control data

In [10]:
%%time
if time_avg == 4:
    ds_avg_control_list = LoadModEns(control)
else:
    ds_avg_control_list, ds_std_control_list, ds_n_control_list = LoadModEns(control)

print('processed all ensemble members')

ensemble member: 1
   made changes to ds
ensemble member: 2
   made changes to ds
ensemble member: 3
   made changes to ds
processed all ensemble members
CPU times: user 1min 29s, sys: 26.5 s, total: 1min 55s
Wall time: 4min 55s


In [11]:
%%time
if time_avg == 4:
     SaveEns(control, ds_avg_control_list)
else:
    SaveEns(control, ds_avg_control_list, ds_std_control_list, ds_n_control_list)

computed avg
computed std
computed n
CPU times: user 11.2 ms, sys: 2.11 ms, total: 13.3 ms
Wall time: 88 ms


#### CRI263K data

In [12]:
%%time
if time_avg == 4:
    ds_avg_cri263K_list = LoadModEns(rfn263K)
else:
    ds_avg_cri263K_list, ds_std_cri263K_list, ds_n_cri263K_list = LoadModEns(rfn263K)

print('processed all ensemble members')

ensemble member: 1
   made changes to ds
ensemble member: 2
   made changes to ds
ensemble member: 3
   made changes to ds
processed all ensemble members
CPU times: user 1min 29s, sys: 23.4 s, total: 1min 52s
Wall time: 4min 31s


In [13]:
%%time
if time_avg == 4:
    SaveEns(rfn263K,ds_avg_cri263K_list)
else:
    SaveEns(rfn263K,ds_avg_cri263K_list, ds_std_cri263K_list, ds_n_cri263K_list)

computed avg
computed std
computed n
CPU times: user 11.9 ms, sys: 0 ns, total: 11.9 ms
Wall time: 56.3 ms
