In [4]:
import sys
import xarray as xr
import numpy as np
import pandas as pd
# import wrf
import yaml

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import matplotlib.patches as mpatches
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# import personal modules
# Path to modules
sys.path.append('../modules')
# Import my modules
from utils import roundPartial, generate_ptlst_from_start_end
from plotter import draw_basemap

In [2]:
path_to_data = '/expanse/nfs/cw3e/cwp140/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [3]:
region = 'gulf_of_mexico' ## 'san_juan' 'baja' 'gulf_of_mexico'
# import configuration file for case study choice
yaml_doc = 'domains.yml'
d = yaml.load(open(yaml_doc), Loader=yaml.SafeLoader)

#### FOR BBOX METHOD ####                  
## create a dataset with lats and lons from ext
ext = d[region]['ext']
lats = np.arange(ext[2], ext[3]+0.25, 0.25)
lons = np.arange(ext[0], ext[1]+0.25, 0.25)

print(lats)
print(lons)

NameError: name 'yaml' is not defined

In [None]:
# ## create a simple map to show the areas
nrows = 1
ncols = 1
mapcrs = ccrs.PlateCarree()
datacrs = ccrs.PlateCarree()

fig = plt.figure(figsize=(5,5))

## Use gridspec to set up a plot with a series of subplots that is
## n-rows by n-columns
gs = GridSpec(nrows, ncols, height_ratios=[1], width_ratios = [1], wspace=0.025, hspace=0.05)
## use gs[rows index, columns index] to access grids

## add basemap
ext = [-140., -90., 20, 50]
dx = np.arange(ext[0],ext[1]+10,10)
dy = np.arange(ext[2],ext[3]+10,10)
ax = fig.add_subplot(gs[0, 0], projection=mapcrs)
ax = draw_basemap(ax, extent=ext, xticks=dx, yticks=dy, grid= True, left_lats=True, right_lats=False)
ax.add_feature(cfeature.STATES, edgecolor='0.4', linewidth=0.8)
ax.set_extent(ext, datacrs)

for i, domain in enumerate(['san_juan', 'baja', 'gulf_of_mexico']):
    ext3 = d[domain]['ext']
    ax.add_patch(mpatches.Rectangle(xy=[ext3[0], ext3[2]], width=ext3[1]-ext3[0], height=ext3[3]-ext3[2],
                                            fill=False,
                                            edgecolor='r',
                                            linewidth=0.75,
                                            transform=datacrs,
                                            zorder=199))

    ax.plot([d[domain]['start_pt'][1], d[domain]['end_pt'][1]],
            [d[domain]['start_pt'][0], d[domain]['end_pt'][0]], color="blue",
                transform=ccrs.PlateCarree(), zorder=3)

# Show
plt.show()

In [None]:
## load PRISM watershed precip dataset
fname = path_to_data + 'preprocessed/PRISM/PRISM_HUC8_CO_sp.nc'
PRISM = xr.open_dataset(fname)
HUC8_lst = PRISM.HUC8.values ## get list of HUC8 IDs

ds_lst = []
## load final trajectory dataset
for i, HUC8_ID in enumerate(HUC8_lst):
    fname = path_to_data + 'preprocessed/ERA5_trajectories/combined_extreme_AR/PRISM_HUC8_{0}.nc'.format(HUC8_ID)
    traj = xr.open_dataset(fname)
    ds_lst.append(traj)

## concat ds_lst along HUC8 index
ds = xr.concat(ds_lst, pd.Index(HUC8_lst, name="HUC8"))
ds

In [None]:
def flatten(xss):
    return [x for xs in xss for x in xs]
    
def find_time_bbox(ERA5, lats, lons):
    ## a function that gets the start dates and HUC8 ID 
    ## for the times when a trajectory is within the bbox
    
    ## create a dataset of the trajectory points that match ERA5 spacing
    t = xr.DataArray(ERA5.time.values, dims=['location'], name='time')
    
    # create a list of lat/lons that match ERA5 spacing
    x = xr.DataArray(roundPartial(ERA5.lon.values, 0.25), dims=['location'])
    y = xr.DataArray(roundPartial(ERA5.lat.values, 0.25), dims=['location'])
    
    x = xr.DataArray(ERA5.lon.values, dims=("location"), coords={"lon": x}, name='traj_lons')
    y = xr.DataArray(ERA5.lat.values, dims=("location"), coords={"lat": y}, name='traj_lats')
    
    # create a new dataset that has the trajectory lat and lons and the closest ERA5 lat/lons as coords
    z = xr.merge([x, y, t])
    
    ## Now loop through the lat/lon pairs and see where they match
    idx_lst = []
    for i, (x, y) in enumerate(zip(z.lon.values, z.lat.values)):
        for j, lon in enumerate(lons):
            for k, lat in enumerate(lats):
            
                ## test if lat/lon pair matches
                result_variable = (lon == x) & (lat == y)
        
                if (result_variable == True):
                    idx = (i, j, k) # (index of z, index of txtpts)
                    idx_lst.append(idx)
    ts_lst = []
    if len(idx_lst) > 0:
        for m, idx in enumerate(idx_lst):
            ## this is the time of the trajectory when it crosses west coast
            time_match = z.sel(location=idx_lst[m][0]).time.values
            ts = pd.to_datetime(str(time_match)).strftime('%Y-%m-%d %H')
            ts_lst.append(ts)

    return ts_lst

def find_time_line(ERA5, lats, lons):
    ## create a dataset of the trajectory points that match ERA5 spacing
    t = xr.DataArray(ERA5.time.values, dims=['location'], name='time')
    lev = xr.DataArray(ERA5.level.values, dims=['location'], name='level')
    
    # create a list of lat/lons that match ERA5 spacing
    x = xr.DataArray(roundPartial(ERA5.lon.values, 0.25), dims=['location'])
    y = xr.DataArray(roundPartial(ERA5.lat.values, 0.25), dims=['location'])
    
    x = xr.DataArray(ERA5.lon.values, dims=("location"), coords={"lon": x}, name='traj_lons')
    y = xr.DataArray(ERA5.lat.values, dims=("location"), coords={"lat": y}, name='traj_lats')
    
    # create a new dataset that has the trajectory lat and lons and the closest ERA5 lat/lons as coords
    z = xr.merge([x, y, t, lev])
    
    idx_lst = []
    for i, (x, y) in enumerate(zip(z.lon.values, z.lat.values)):
        for j, pair in enumerate(coord_pairs):
            lat, lon = pair
            ## test if lat/lon pair matches
            result_variable = (lon == x) & (lat == y)
    
            if (result_variable == True):
                idx = (i, j) # (index of z, index of txtpts)
                idx_lst.append(idx)
    
    ts_lst = []
    if len(idx_lst) > 0:
        for m, idx in enumerate(idx_lst):
            ## this is the time of the trajectory when it crosses the transect
            time_match = z.sel(location=idx_lst[m][0])
            ts_lst.append(time_match) ## append the entire xr dataset
    
    return ts_lst

In [None]:
#### FOR CROSS SECTION METHOD ####
coord_pairs = generate_ptlst_from_start_end(d[region]['start_pt'][1], d[region]['start_pt'][0], d[region]['end_pt'][1], d[region]['end_pt'][0], pairs=True)
print(coord_pairs)
df_lst = []
HUC8_final = []
## loop through HUC8s and start_dates
for i, HUC8 in enumerate(HUC8_lst):
    # subset to the current HUC8
    ## keep only trajectories associated with ARs
    tmp = ds.sel(HUC8=HUC8)
    tmp = tmp.where(tmp.tARget > 0, drop=True)


    t_lst = []
    ## enumerate through start_dates of current subbasin
    for i, st_date in enumerate(tmp.start_date.values):
        ERA5 = tmp.sel(start_date=st_date)
        time_lst = find_time_line(ERA5, lats, lons)
        t_lst.append(time_lst)
    
    t_lst = flatten(t_lst)
    if len(t_lst) == 0:
        pass
    elif len(t_lst) == 1:
        HUC8_final.append(HUC8)
        df_full = t_lst[0].expand_dims(dim={"time": 1}).to_dataframe()
        df_full['HUC8'] = HUC8
        df_lst.append(df_full)
    elif len(t_lst) > 1:
        HUC8_final.append(HUC8)
        df_full = xr.concat(t_lst, dim='time').to_dataframe()
        df_full['HUC8'] = HUC8
        df_lst.append(df_full)

## now we have a list of list of dates when AR trajectories were within bbox
## concat df_lst
df = pd.concat(df_lst)

## create a column with the coord pair
df['coord_pair'] = list(zip(df.lat, df.lon))

## save as CSV dates_region-name.csv
fname_out = '../out/line_dates_{0}_full.csv'.format(region)
df.to_csv(fname_out)

## make a copy of the df but keep only time/index
d = {'datetime': df.index.values}
times_df = pd.DataFrame(d)
times_df = times_df.drop_duplicates(subset=['datetime'])

## save as CSV dates_region-name.csv
fname_out = '../out/line_dates_{0}_tARget.csv'.format(region)
times_df.to_csv(fname_out)

In [None]:
df

In [None]:
coord_pairs

In [None]:
import seaborn as sns
x = df.lat.values
y = df.level.values

# Draw a combo histogram and scatterplot with density contours
f, ax = plt.subplots(figsize=(6, 6))
sns.scatterplot(x=x, y=y, s=5, color=".15")
# sns.histplot(x=x, y=y, bins=50, pthresh=.1, cmap="mako")
sns.kdeplot(x=x, y=y, levels=5, color="k", linewidths=1)
plt.gca().invert_yaxis()


In [None]:
# import numpy as np
# import matplotlib.pyplot as plt
# from scipy.interpolate import griddata

# # Create a grid for interpolation
# xi = np.arange(df.lat.values.min(), df.lat.values.max()+0.25, 0.25)
# yi = np.linspace(200, 1000, 25)
# xi, yi = np.meshgrid(xi, yi)
# z = np.sin(x * 2 * np.pi) + np.cos(y * 2 * np.pi)

# z


In [None]:
# z = np.ones(x.shape)

In [None]:
# # Interpolate the data onto the grid
# zi = griddata((x, y), z, (xi, yi), method='cubic')

# # Create the contour plot
# plt.contourf(xi, yi, zi, cmap='viridis')
# plt.colorbar()
# plt.gca().invert_yaxis()
# plt.gca().invert_xaxis()

In [None]:
df_lst = []
## loop through HUC8s and start_dates
for i, HUC8 in enumerate(HUC8_lst):
    # subset to the current HUC8
    ## keep only trajectories associated with ARs
    tmp = ds.sel(HUC8=HUC8)
    tmp = tmp.where(tmp.tARget > 0, drop=True)


    t_lst = []
    ## enumerate through start_dates of current subbasin
    for i, st_date in enumerate(tmp.start_date.values):
        ERA5 = tmp.sel(start_date=st_date)
        test = find_time_bbox(ERA5, lats, lons)
        t_lst.append(test)
    
    t_lst = flatten(t_lst)
    df = pd.DataFrame(data={'date': t_lst}) # put dates into a df
    # t_lst = pd.to_datetime(t_lst, format='%Y-%m-%d %H')
    df_lst.append(df)

## now we have a list of list of dates when AR trajectories were within bbox
## concat df_lst
df = pd.concat(df_lst)
## swap to datetime
df['datetime'] = pd.to_datetime(df['date'])

## remove duplicates
df = df.drop_duplicates(subset=['datetime'])
df = df.drop(['date'], axis=1)

## save as CSV dates_region-name.csv
fname_out = '../out/bbox_dates_{0}_tARget.csv'.format(region)
df.to_csv(fname_out)