In [1]:
import sys
import itertools
import xarray as xr
import numpy as np
import pandas as pd
%matplotlib inline
from functools import partial
# import wrf


# import personal modules
# Path to modules
sys.path.append('../modules')
# Import my modules
from utils import select_months_ds, generate_ptlst_from_start_end
from statistical_tests import ttest_1samp_new

In [2]:
path_to_data = '/expanse/nfs/cw3e/cwp140/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [3]:
region = 'baja' ## 'san_juan' 'baja' 'gulf_of_mexico'
## create bounding boxes for regions
d = {'baja': {'ext': [-115., -110., 27., 32.],
              'start_pt': [33.0, -118.],
              'end_pt': [29.0, -110.]},
     
     'san_juan': {'ext': [-109., -105., 37., 39.],
              'start_pt': [38.0, -112.],
              'end_pt': [38.0, -102.]},

     'gulf_of_mexico': {'ext': [-99., -93., 25., 31.],
              'start_pt': [28.0, -100.],
              'end_pt': [31.0, -93.]}
    }

coord_pairs = generate_ptlst_from_start_end(d[region]['start_pt'][1], d[region]['start_pt'][0], d[region]['end_pt'][1], d[region]['end_pt'][0], pairs=False)
print(coord_pairs)


(array([33.  , 32.75, 32.5 , 32.25, 32.  , 31.75, 31.5 , 31.25, 31.  ,
       30.75, 30.5 , 30.25, 30.  , 29.75, 29.5 , 29.25, 29.  ]), array([-118. , -117.5, -117. , -116.5, -116. , -115.5, -115. , -114.5,
       -114. , -113.5, -113. , -112.5, -112. , -111.5, -111. , -110.5,
       -110. ]))


In [4]:
## load ar dates within region
region = 'baja' ## 'san_juan' 'baja' 'gulf_of_mexico'
fname = '../out/line_dates_{0}.csv'.format(region)
df = pd.read_csv(fname)
df['day'] = pd.to_datetime(df['datetime']).dt.normalize()
df = df.sort_values(by=['datetime'])
# df = df.set_index(pd.to_datetime(df['datetime'])) ## set daily values as index
ar_dates = df['day'].values
## create year month columns in dataframe 
# df['yearmonth'] = df.index.strftime("%Y%m")
new = df.drop_duplicates('day')
ar_dates = new['day'].values
ar_dates

array(['2000-02-23T00:00:00.000000000', '2000-02-24T00:00:00.000000000',
       '2000-04-20T00:00:00.000000000', '2000-04-21T00:00:00.000000000',
       '2000-06-17T00:00:00.000000000', '2000-06-18T00:00:00.000000000',
       '2000-07-06T00:00:00.000000000', '2000-08-30T00:00:00.000000000',
       '2000-09-20T00:00:00.000000000', '2000-09-21T00:00:00.000000000',
       '2000-09-22T00:00:00.000000000', '2000-10-09T00:00:00.000000000',
       '2000-10-10T00:00:00.000000000', '2000-10-11T00:00:00.000000000',
       '2000-10-21T00:00:00.000000000', '2000-10-27T00:00:00.000000000',
       '2001-01-25T00:00:00.000000000', '2001-01-27T00:00:00.000000000',
       '2001-02-07T00:00:00.000000000', '2001-02-28T00:00:00.000000000',
       '2001-04-05T00:00:00.000000000', '2001-04-19T00:00:00.000000000',
       '2001-04-20T00:00:00.000000000', '2001-08-19T00:00:00.000000000',
       '2002-01-29T00:00:00.000000000', '2002-01-30T00:00:00.000000000',
       '2002-09-05T00:00:00.000000000', '2002-09-06

In [15]:
def compute_vertical_composites(varname, anomaly, ar_dates, ssn, region):
    ## function that computes composites (anomaly or non-anomaly) for defined start_mon and end_mon    
    ## for DJF, MAM, JJA, SON, NDJFMA, and MJJASON
    ## compute anomaly composites - anomaly = True
    ## compute non-anomaly composites - anomaly=False

    ## set start_mon and end_mon based on ssn
    if ssn == 'DJF':
        start_mon, end_mon = (12, 2)
    elif ssn == 'MAM':
        start_mon, end_mon = (3, 5)
    elif ssn == 'JJA':
        start_mon, end_mon = (6, 8)
    elif ssn == 'SON':
        start_mon, end_mon = (9, 11)
    elif ssn == 'NDJFMA':
        start_mon, end_mon = (11, 4)
    elif ssn == 'MJJASO':
        start_mon, end_mon = (5, 10)

    ## load data
    path_to_data = '/expanse/nfs/cw3e/cwp140/preprocessed/ERA5/cross_section/'
    out_path = '/home/dnash/DATA/preprocessed/ERA5_composites/cross_section/'
    
    if anomaly == True:
        fname_pattern = path_to_data + 'anomalies/daily_filtered_anomalies_{0}_*.nc'.format(region)
    else:
        fname_pattern = path_to_data + 'daily/era5_{0}_025dg_daily_uvwq_*.nc'.format(region)

    ds = xr.open_mfdataset(fname_pattern, engine='netcdf4', combine='by_coords')

    ## subset to AR dates
    ds = ds.sel(time=ar_dates)

    ## subset to start_month and end_month
    ds = select_months_ds(ds, start_mon, end_mon, 'time')
    ds = ds.load()
    ## run students t-test if anomaly == True
    if anomaly == True:    
        popmean = np.zeros([len(ds.level), len(ds.location)]) ## population mean
        ndays = len(ds.time) # number of unique days
        # calculate t-value based on ndays
        a_mean, tval_mask = ttest_1samp_new(a=ds, popmean=popmean, dim='time', n=ndays)

        ## write to netCDF
        out_fname = out_path + 'filtered_anomaly_composite_{0}_{1}_{2}.nc'.format(varname, ssn, region)
        a_mean.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')

        out_fname = out_path + 'filtered_anomaly_composite_tvals_{0}_{1}_{2}.nc'.format(varname, ssn, region)
        tval_mask.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')

    else:
        a_mean = ds.mean('time')
        out_fname = out_path + 'composite_{0}_{1}_{2}.nc'.format(varname, ssn, region)
        a_mean.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')

    return a_mean

In [16]:
%%time
## iterate through options
varname_lst = ['uvwq']
ssn_lst = ['DJF', 'MAM', 'JJA', 'SON', 'NDJFMA', 'MJJASO']
anom_lst = [True, False]

a = [varname_lst, ssn_lst, anom_lst]

option_lst = list(itertools.product(*a))
for i, lst in enumerate(option_lst):
    anomaly = lst[2]
    ssn = lst[1]
    varname = lst[0]
    print('Anomaly:', anomaly, 'Season:', ssn, 'Variable:', varname)
    tmp = compute_vertical_composites(varname, anomaly, ar_dates, ssn, region)

Anomaly: True Season: DJF Variable: uvwq
<xarray.Dataset>
Dimensions:    (location: 17, level: 23, time: 159)
Coordinates:
    longitude  (location) float32 -118.0 -117.5 -117.0 ... -111.0 -110.5 -110.0
    latitude   (location) float32 33.0 32.75 32.5 32.25 ... 29.5 29.25 29.0
  * level      (level) int32 1000 975 950 925 900 875 ... 350 300 250 225 200
  * time       (time) datetime64[ns] 2000-02-23 2000-02-24 ... 2023-01-17
    dayofyear  (time) int64 54 55 25 27 38 59 29 30 ... 361 364 365 1 15 16 17
Dimensions without coordinates: location
Data variables:
    q          (time, level, location) float64 0.008633 0.009625 ... 0.00282
    u          (time, level, location) float64 -0.06073 -0.8752 ... 17.97 16.71
    v          (time, level, location) float64 4.944 4.787 2.889 ... 23.12 22.5
    w          (time, level, location) float64 -0.006118 -0.1147 ... 0.1008
<xarray.Dataset>
Dimensions:    (location: 17, level: 23)
Coordinates:
    longitude  (location) float32 -118.0 -117.5 -