In [1]:
# Standard Python modules
import os, sys
import glob
import numpy as np
import pandas as pd
import xarray as xr
import re
import seaborn as sns
import cartopy
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colorbar import Colorbar # different way to handle colorbar
import matplotlib.ticker as mticker
import cmocean.cm as cmo
# cartopy
import cartopy.crs as ccrs
from cartopy.mpl.geoaxes import GeoAxes
import cartopy.feature as cfeature

# extras
%matplotlib inline
import geopandas
import shapely.geometry

# import personal modules
# Path to modules
sys.path.append('../modules')
# Import my modules
from utils import roundPartial, find_closest_MERRA2_lon
from plotter import draw_basemap, plot_terrain


pd.options.display.float_format = "{:,.2f}".format # makes it so pandas tables display only first two decimals

ERROR 1: PROJ: proj_create_from_database: Open of /home/dnash/miniconda3/envs/SEAK-clim/share/proj failed


In [2]:
path_to_data = '/data/projects/Comet/cwp140/' 
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [3]:
## load Rutz AR
fname = path_to_data + 'preprocessed/MERRA2/MERRA2_Rutz_US-West.nc'
ar = xr.open_dataset(fname)

## load AR scale
fname = path_to_data + 'preprocessed/MERRA2/MERRA2_ARScale_US-West.nc'
arscale = xr.open_dataset(fname)

## load watershed trajectories
fname = '/home/dnash/comet_data/preprocessed/ERA5_trajectories/PRISM_HUC8_14080107.nc'
ERA5 = xr.open_dataset(fname)
ERA5 = ERA5.assign_coords({"lon": ERA5.longitude, "lat": ERA5.latitude, "time": ERA5.time})
ERA5 = ERA5.drop_vars(["latitude", "longitude"])
ERA5

In [7]:
def combine_IVT_and_trajectory(ERA5):
    ## load ERA5 IVT data
    start_date = ERA5.start_date.values - np.timedelta64(3,'D')
    end_date = ERA5.start_date.values
    print(start_date, end_date)

    dates = pd.date_range(start=start_date, end=end_date, freq='1D')
    # put into pandas df
    d ={"date": dates}
    df = pd.DataFrame(data=d)
    df['day']= df['date'].dt.day.map("{:02}".format)
    df['month']= df['date'].dt.month.map("{:02}".format)
    df['year']= df['date'].dt.year

    # create list of daily ERA5 files
    filenames = []
    for j, row in df.iterrows():
        filenames.append('/data/downloaded/Reanalysis/ERA5/IVT/{0}/ERA5_IVT_{0}{1}{2}.nc'.format(row['year'], row['month'], row['day']))
        # open all files within the AR period

    ivt = xr.open_mfdataset(filenames, combine='by_coords', parallel=False)


    ## interpolate IVT to trajectory points
    ivt = ivt.interp(lat=ERA5.lat, lon=ERA5.lon, time=ERA5.time)
    ivt = ivt.compute()

    ## merge IVT, uIVT, vIVT, and IWV to trajectory ds
    ERA5 = xr.merge([ivt, ERA5])
    
    return ERA5

def combine_arscale_and_trajectory(ERA5, arscale):
    ## create a list of lat/lons that match MERRA2 spacing
    ## lat and lon points from trajectory

    new_lst = []
    for lon in ERA5.lon.values:
        new_lst.append(find_closest_MERRA2_lon(lon))

    t = xr.DataArray(ERA5.time.values, dims=['location'], name='time') 
    x = xr.DataArray(new_lst, dims=['location'])
    y = xr.DataArray(roundPartial(ERA5.lat.values, 0.5), dims=['location'])

    x = xr.DataArray(ERA5.lon.values, dims=("location"), coords={"lon": x}, name='traj_lons')
    y = xr.DataArray(ERA5.lat.values, dims=("location"), coords={"lat": y}, name='traj_lats')

    # create a new dataset that has the trajectory lat and lons and the closest MERRA2 lat/lons as coords
    z = xr.merge([x, y, t])

    ## Now loop through the lat/loin pairs and see where they match
    idx_lst = []
    for i, (x, y) in enumerate(zip(z.lon.values, z.lat.values)):
        for j, (lon, lat) in enumerate(zip(arscale.lon.values, arscale.lat.values)):
            ## test if lat/lon pair matches
            result_variable = (x == lon) & (y == lat)

            if (result_variable == True):
                idx = (i, j)
                idx_lst.append(idx)

    if len(idx_lst) > 0:
        ## take first time the trajectory crosses the coast
        idx = idx_lst[0]
        print(idx)
        ## this is the time of the trajectory when it crosses west coast
        time_match = z.sel(location=idx[0]).time.values
        ## this is the value of MERRA2 AR scale etc. when the trajectory crosses the coast
        arscale_val = arscale.sel(location=idx[1]) # first grab the location - this should be an exact match
        arscale_val = arscale_val.sel(time=time_match, method='nearest').ar_scale.values # now grab the nearest time since ERA5 is hourly and MERRA2 is 3-hourly
        print(arscale_val)
        ## now put those values into the trajectory dataset
        ERA5 = ERA5.assign(ar_scale=arscale_val)
        
        ## lets also grab whether rutz et al AR was detected
        ar_val = ar.sel(location=idx[1])
        ar_val = ar_val.sel(time=time_match, method='nearest').AR.values
        print(ar_val)
        ## assign value to trajectory dataset
        ERA5 = ERA5.assign(ar=ar_val)
        
    else:
        ## since the trajectory didn't cross the west coast, set ar_scale to nan
        ERA5 = ERA5.assign(ar_scale=np.nan)
        ERA5 = ERA5.assign(ar=np.nan)

    return ERA5

In [8]:
%%time
ds_lst = []
## loop through all trajectories for that watershed
for i, st_date in enumerate(ERA5.start_date.values):
    tmp = ERA5.sel(start_date=st_date)
    ## combine IVT data   
    tmp = combine_IVT_and_trajectory(tmp)
    ## add arscale
    tmp = combine_arscale_and_trajectory(tmp, arscale)
    ds_lst.append(tmp)
    
## merge final dataset
final_ds = xr.concat(ds_lst, dim="start_date")
final_ds

2000-10-21T00:00:00.000000000 2000-10-24T00:00:00.000000000
2001-08-11T00:00:00.000000000 2001-08-14T00:00:00.000000000
2002-09-05T00:00:00.000000000 2002-09-08T00:00:00.000000000
(45, 63)
0.0
0.0
2002-09-08T00:00:00.000000000 2002-09-11T00:00:00.000000000
2002-11-06T00:00:00.000000000 2002-11-09T00:00:00.000000000
(21, 56)
3.0
1.0
2003-02-23T00:00:00.000000000 2003-02-26T00:00:00.000000000
2003-02-25T00:00:00.000000000 2003-02-28T00:00:00.000000000
(35, 58)
0.0
0.0
2003-03-14T00:00:00.000000000 2003-03-17T00:00:00.000000000
(25, 55)
0.0
0.0
2003-09-07T00:00:00.000000000 2003-09-10T00:00:00.000000000
(31, 56)
0.0
0.0
2003-11-10T00:00:00.000000000 2003-11-13T00:00:00.000000000
2003-12-31T00:00:00.000000000 2004-01-03T00:00:00.000000000
(24, 54)
0.0
0.0
2004-03-31T00:00:00.000000000 2004-04-03T00:00:00.000000000
2004-04-01T00:00:00.000000000 2004-04-04T00:00:00.000000000
2004-09-02T00:00:00.000000000 2004-09-05T00:00:00.000000000
(69, 51)
0.0
0.0
2004-09-17T00:00:00.000000000 2004-09-20T

In [10]:
out_fname = '/home/dnash/comet_data/preprocessed/ERA5_trajectories/final/PRISM_HUC8_14080107.nc'
final_ds.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')