In [1]:
import sys
import itertools
import xarray as xr
import numpy as np
import pandas as pd
%matplotlib inline
# import wrf


# import personal modules
# Path to modules
sys.path.append('../modules')
# Import my modules
from utils import select_months_ds
from statistical_tests import ttest_1samp_new

In [2]:
path_to_data = '/expanse/nfs/cw3e/cwp140/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [4]:
## load ar dates within baja region
fname = '../out/bbox_dates_baja.csv'
df = pd.read_csv(fname)
df['day'] = pd.to_datetime(df['datetime']).dt.normalize()
df = df.sort_values(by=['datetime'])
# df = df.set_index(pd.to_datetime(df['datetime'])) ## set daily values as index
ar_dates = df['day'].values
## create year month columns in dataframe 
# df['yearmonth'] = df.index.strftime("%Y%m")
new = df.drop_duplicates('day')
ar_dates = new['day'].values

In [5]:
new

Unnamed: 0.1,Unnamed: 0,datetime,day
7273,10,2000-02-23 23:00:00,2000-02-23
7272,9,2000-02-24 00:00:00,2000-02-24
5217,19,2000-04-20 22:00:00,2000-04-20
5215,17,2000-04-21 00:00:00,2000-04-21
5511,34,2000-06-17 01:00:00,2000-06-17
...,...,...,...
6220,567,2023-03-22 05:00:00,2023-03-22
7506,16,2023-06-19 02:00:00,2023-06-19
5372,709,2023-07-31 07:00:00,2023-07-31
5355,692,2023-08-01 00:00:00,2023-08-01


In [6]:
def compute_horizontal_composites(varname, anomaly, ar_dates, ssn):
    ## function that computes composites (anomaly or non-anomaly) for defined start_mon and end_mon    
    ## for DJF, MAM, JJA, SON, NDJFMA, and MJJASON
    ## compute anomaly composites - anomaly = True
    ## compute non-anomaly composites - anomaly=False

    ## set start_mon and end_mon based on ssn
    if ssn == 'DJF':
        start_mon, end_mon = (12, 2)
    elif ssn == 'MAM':
        start_mon, end_mon = (3, 5)
    elif ssn == 'JJA':
        start_mon, end_mon = (6, 8)
    elif ssn == 'SON':
        start_mon, end_mon = (9, 11)
    elif ssn == 'NDJFMA':
        start_mon, end_mon = (11, 4)
    elif ssn == 'MJJASO':
        start_mon, end_mon = (5, 10)

    ## load data
    path_to_data = '/expanse/nfs/cw3e/cwp140/downloads/ERA5/'
    out_path = '/home/dnash/DATA/preprocessed/ERA5_composites/'
    
    if anomaly == True:
        fname_pattern = path_to_data + '{0}/anomalies/daily_filtered_anomalies_{0}_*.nc'.format(varname)
    else:
        fname_pattern = path_to_data + '{0}/daily/era5_namerica_025dg_daily_{0}_*.nc'.format(varname)

    ds = xr.open_mfdataset(fname_pattern, engine='netcdf4', combine='by_coords')

    ## subset to AR dates
    ds = ds.sel(time=ar_dates)

    ## subset to start_month and end_month
    ds = select_months_ds(ds, start_mon, end_mon, 'time')
    ds = ds.load()

    ## run students t-test if anomaly == True
    if anomaly == True:    
        popmean = np.zeros([len(ds.latitude), len(ds.longitude)]) ## population mean
        ndays = len(ds.time) # number of unique days
        print(ndays)
        # calculate t-value based on ndays
        a_mean, tval_mask = ttest_1samp_new(a=ds, popmean=popmean, dim='time', n=ndays)

        ## write to netCDF
        out_fname = out_path + '{0}/baja/filtered_anomaly_composite_{0}_{1}.nc'.format(varname, ssn)
        a_mean.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')

        out_fname = out_path + '{0}/baja/filtered_anomaly_composite_tvals_{0}_{1}.nc'.format(varname, ssn)
        tval_mask.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')

    else:
        a_mean = ds.mean('time')
        out_fname = out_path + '{0}/baja/composite_{0}_{1}.nc'.format(varname, ssn)
        a_mean.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')

    return a_mean

In [7]:
%%time
## iterate through options
varname_lst = ['700z', 'ivt']
ssn_lst = ['DJF', 'MAM', 'JJA', 'SON', 'NDJFMA', 'MJJASO']
anom_lst = [True, False]

a = [varname_lst, ssn_lst, anom_lst]

option_lst = list(itertools.product(*a))
for i, lst in enumerate(option_lst):
    anomaly = lst[2]
    ssn = lst[1]
    varname = lst[0]
    print('Anomaly:', anomaly, 'Season:', ssn, 'Variable:', varname)
    tmp = compute_horizontal_composites(varname, anomaly, ar_dates, ssn)

Anomaly: True Season: DJF Variable: 700z
159
Anomaly: False Season: DJF Variable: 700z
Anomaly: True Season: MAM Variable: 700z
79
Anomaly: False Season: MAM Variable: 700z
Anomaly: True Season: JJA Variable: 700z
53
Anomaly: False Season: JJA Variable: 700z
Anomaly: True Season: SON Variable: 700z
214
Anomaly: False Season: SON Variable: 700z
Anomaly: True Season: NDJFMA Variable: 700z
271
Anomaly: False Season: NDJFMA Variable: 700z
Anomaly: True Season: MJJASO Variable: 700z
234
Anomaly: False Season: MJJASO Variable: 700z
Anomaly: True Season: DJF Variable: ivt
159
Anomaly: False Season: DJF Variable: ivt
Anomaly: True Season: MAM Variable: ivt
79
Anomaly: False Season: MAM Variable: ivt
Anomaly: True Season: JJA Variable: ivt
53
Anomaly: False Season: JJA Variable: ivt
Anomaly: True Season: SON Variable: ivt
214
Anomaly: False Season: SON Variable: ivt
Anomaly: True Season: NDJFMA Variable: ivt
271
Anomaly: False Season: NDJFMA Variable: ivt
Anomaly: True Season: MJJASO Variable: 