# Processing data from 40-year F1850 run

### Set up
#### Packages

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
from scipy import stats
import warnings
warnings.simplefilter('ignore', UserWarning)
warnings.filterwarnings('ignore')
import datetime as dt
from datetime import timedelta
from cartopy.util import add_cyclic_point
from Processing_functions import FixLongitude, FixTime, CalcStatforDim, CalcStatbyGrpDim

#### Filepaths & variables

In [28]:
## Test numbers
tst_type = 'extended'

## Test names
control = 'f.e22.F1850.f09_f09_mg17.control_test.'
rfn240K = 'f.e22.F1850.f09_f09_mg17.cri240K_test.'
rfn263K = 'f.e22.F1850.f09_f09_mg17.cri263K_test.'
rfn273K = 'f.e22.F1850.f09_f09_mg17.cri273K_test.'

## Time averaging type
time_avg = 3 # 0: Monthly, 1: Yearly, 2: Seasonal, 3: All data

## Filtering
filter = False
filter_str = 'filtered' if filter else 'non_filtered'

## Filepaths
path_to_arch = "/glade/derecho/scratch/glydia/archive/"
path_to_camp = "/glade/campaign/univ/ucub0137/archive/"
path_to_data = "/atm/hist/"
filename_ext = ".cam.h0."
path_to_outdata = '/glade/work/glydia/Arctic_CRI_processed_data/processed_free_evolving_data/'

## Variables to process
var_list = np.array(['FLDS','T','TS','CLOUD','LCC','TGCLDLWP','FLUT'])
var = var_list[-1]

In [3]:
## Chunking variables
la_chunk = 64
lo_chunk = 96
le_chunk = 4

In [4]:
%%time
## Select plot type - yearly or monthly - to make and assign variables accordingly
# Monthly
if time_avg == 0:
    time_str = 'Month'
    grp_str = "time.month"
    tm_chunk = 1
    concat_str = 'month'
    
# Yearly
elif time_avg == 1:
    time_str = 'Year'
    grp_str = "time.year"
    tm_chunk = 12
    concat_str = 'year'

# Seasonal
elif time_avg == 2:
    time_str = 'Season'
    grp_str = "time.season"
    tm_chunk = 3
    concat_str = 'season'

# All-data average
elif time_avg == 3:
    time_str = 'All_data'
    tm_chunk = -1
    grp_str = ''
    
    
# All-data timeseries
elif time_avg == 4:
    time_str = 'Timeseries'
    tm_chunk = 1

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.96 µs


In [5]:
def LCC(ds):
    # Add LCC
    ds_lwp = ds['TGCLDLWP']

    da = xr.where(ds_lwp >= 0.005, 1, 0)
    da = da.rename('LCC')
    da.LCC.attrs['units'] = 'frequency'
    da.LCC.attrs['long_name'] = 'Liquid-containing cloud frequency'

    da.compute()
    return da

In [6]:
def LWP(ds):
    # Change units on LWP
    da = ds['TGCLDLWP']
    da *= 1000
    da.TGCLDLWP.attrs['units'] = 'g/m2'

    da.compute()
    return da

In [7]:
def filter_func(ds, da):
    ds_ts = ds.TS
    ds_lwp = ds.TGCLDLWP

    da = da.where(ds_ts <= 273)
    da = da.where(ds_lwp <= 0.09)
    da = da.where(ds_lwp >= 0.001)
    da.compute()
    return da

In [24]:
def LoadMod(optics, cri263):
    ## Load data
    # Load both cases
    ds_list = []

    if cri263:
        tst_nums = np.arange(2,3)
        path_to_dir = path_to_arch
    else:
        tst_nums = np.arange(2,4)
        path_to_dir = path_to_camp
    
    for i in tst_nums:
        print('case: '+str(i))
        
        # Open dataset
        tst_name = optics+str(i).zfill(3)
        path_i = path_to_dir+tst_name+path_to_data+tst_name+filename_ext+"*.nc"
        print(path_i)
        ds = xr.open_mfdataset(paths=path_i,chunks={'time':tm_chunk,'lat':la_chunk,'lon':lo_chunk,'lev':le_chunk})
    
        if var == 'LCC':
            # Add LCC
            dsv = LCC(ds)
    
        elif var == 'TGCLDLWP':
            # Change units on LWP
            dsv = LWP(ds)
    
        else:
            dsv = ds[var]

        dsv.compute()
    
        if filter:
            dsv = filter_func(ds, dsv)
            
        dsv = dsv.compute()
            
        dsv = FixLongitude(dsv, True)

        dsv = FixTime(dsv)

        # Offset extended runs by 20 years
        if i == 3:
            dsv = dsv.assign_coords(dict(time=(dsv.time+timedelta(days=20*365))))
            dsv.compute()
    
        print('   made changes to ds')

        ds_list.append(dsv)

    dsv = xr.concat(ds_list, dim='time')
    dsv.compute()

    print('   combined cases')
        
    # If needing to group data
    if time_avg < 2:
        ds_avg, ds_std, n_ds = CalcStatforDim(dsv, grp_str, 'time')

        return ds_avg, ds_std, n_ds

    # If doing timeseries
    elif time_avg == 4:
        ds_avg = dsv
        ds_avg.compute()

        return ds_avg

    # If doing all-data average or seasonal
    else:
        ds_avg, ds_std, n_ds = CalcStatbyGrpDim(dsv, 'time.year', grp_str, 'year', 'time', 'year')

        return ds_avg, ds_std, n_ds

In [9]:
def Save(data_avg, data_std, data_n, optics):
    # Compute changes to avg
    data_avg.compute()
    print('computed avg')
    
    data_avg.to_netcdf(path_to_outdata+optics+var+'.avg.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')
    
    # If not doing timeseries
    if time_avg < 4:
        # Compute changes to avg
        data_std.compute()
        print('computed std')
        
        data_std.to_netcdf(path_to_outdata+optics+var+'.std.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')
        
        # Compute changes to avg
        data_n.compute()
        print('computed n')
        
        data_n.to_netcdf(path_to_outdata+optics+var+'.n.'+time_str+'.'+filter_str+'.nc', format='NETCDF4')

### Load & modify data
#### Control data

In [29]:
%%time
ds_avg_control, ds_std_control, ds_n_control = LoadMod(control, False)

print('processed all data')

case: 2
/glade/campaign/univ/ucub0137/archive/f.e22.F1850.f09_f09_mg17.control_test.002/atm/hist/f.e22.F1850.f09_f09_mg17.control_test.002.cam.h0.*.nc
   made changes to ds
case: 3
/glade/campaign/univ/ucub0137/archive/f.e22.F1850.f09_f09_mg17.control_test.003/atm/hist/f.e22.F1850.f09_f09_mg17.control_test.003.cam.h0.*.nc
   made changes to ds
   combined cases
processed all data
CPU times: user 33.9 s, sys: 1min 20s, total: 1min 54s
Wall time: 5min 29s


In [30]:
%%time

Save(ds_avg_control, ds_std_control, ds_n_control, control)

computed avg
computed std
computed n
CPU times: user 11.4 ms, sys: 0 ns, total: 11.4 ms
Wall time: 21.7 ms


#### CRI240K data

In [31]:
%%time
ds_avg_cri240K, ds_std_cri240K, ds_n_cri240K = LoadMod(rfn240K, False)

print('processed all data')

case: 2
/glade/campaign/univ/ucub0137/archive/f.e22.F1850.f09_f09_mg17.cri240K_test.002/atm/hist/f.e22.F1850.f09_f09_mg17.cri240K_test.002.cam.h0.*.nc
   made changes to ds
case: 3
/glade/campaign/univ/ucub0137/archive/f.e22.F1850.f09_f09_mg17.cri240K_test.003/atm/hist/f.e22.F1850.f09_f09_mg17.cri240K_test.003.cam.h0.*.nc
   made changes to ds
   combined cases
processed all data
CPU times: user 33.3 s, sys: 1min 20s, total: 1min 53s
Wall time: 5min 24s


In [32]:
%%time

Save(ds_avg_cri240K, ds_std_cri240K, ds_n_cri240K, rfn240K)

computed avg
computed std
computed n
CPU times: user 4.96 ms, sys: 6.14 ms, total: 11.1 ms
Wall time: 23.4 ms


#### CRI263K data

In [33]:
%%time
ds_avg_cri263K, ds_std_cri263K, ds_n_cri263K = LoadMod(rfn263K, True)

print('processed all data')

case: 2
/glade/derecho/scratch/glydia/archive/f.e22.F1850.f09_f09_mg17.cri263K_test.002/atm/hist/f.e22.F1850.f09_f09_mg17.cri263K_test.002.cam.h0.*.nc
   made changes to ds
   combined cases
processed all data
CPU times: user 33.2 s, sys: 12.5 s, total: 45.6 s
Wall time: 1min 55s


In [34]:
%%time

Save(ds_avg_cri263K, ds_std_cri263K, ds_n_cri263K, rfn263K)

computed avg
computed std
computed n
CPU times: user 13.4 ms, sys: 369 µs, total: 13.8 ms
Wall time: 211 ms


#### CRI273K data

In [35]:
%%time
ds_avg_cri273K, ds_std_cri273K, ds_n_cri273K = LoadMod(rfn273K, False)

print('processed all data')

case: 2
/glade/campaign/univ/ucub0137/archive/f.e22.F1850.f09_f09_mg17.cri273K_test.002/atm/hist/f.e22.F1850.f09_f09_mg17.cri273K_test.002.cam.h0.*.nc
   made changes to ds
case: 3
/glade/campaign/univ/ucub0137/archive/f.e22.F1850.f09_f09_mg17.cri273K_test.003/atm/hist/f.e22.F1850.f09_f09_mg17.cri273K_test.003.cam.h0.*.nc
   made changes to ds
   combined cases
processed all data
CPU times: user 32.9 s, sys: 1min 18s, total: 1min 51s
Wall time: 5min 12s


In [36]:
%%time

Save(ds_avg_cri273K, ds_std_cri273K, ds_n_cri273K, rfn273K)

computed avg
computed std
computed n
CPU times: user 4.16 ms, sys: 6.65 ms, total: 10.8 ms
Wall time: 22.3 ms
