In [None]:
#!/usr/bin/env python

# CONUS404_pr-bin_preprocessor.ipynb

In [1]:
'''File name: CONUS404_pr-changes.ipynb
    Author: Andreas Prein
    E-mail: prein@ucar.edu
    Date created: 19.05.2022
    Date last modified: 19.05.2022

    ############################################################## 
    Purpos:

    - Rean in hourly precipitation data from CONUS404 
    - Save the data at lower precission to make it easier accessible
    - Calculate changes in the hourly precipitation distribution 

'''

'File name: CONUS404_pr-changes.ipynb\n    Author: Andreas Prein\n    E-mail: prein@ucar.edu\n    Date created: 19.05.2022\n    Date last modified: 19.05.2022\n\n    ############################################################## \n    Purpos:\n\n    - Rean in hourly precipitation data from CONUS404 \n    - Save the data at lower precission to make it easier accessible\n    - Calculate changes in the hourly precipitation distribution \n\n'

In [62]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from netCDF4 import Dataset
import glob
import os
from pdb import set_trace as stop
from scipy.ndimage.filters import gaussian_filter
from scipy.ndimage import median_filter
from scipy.ndimage import label
from matplotlib import cm
from scipy import ndimage
import random
import scipy
import pickle
import datetime
import pandas as pd
import subprocess
from calendar import monthrange
import pandas as pd
import datetime
import sys 
import shapefile as shp
import matplotlib.path as mplPath
from scipy.stats import norm
import matplotlib.gridspec as gridspec
# from mpl_toolkits.basemap import Basemap, cm
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.gridspec as gridspec
from pylab import *
import string
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import shapefile
from calendar import monthrange
from tqdm import tqdm
import xarray as xr

import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import shapely.geometry as sgeom
from matplotlib.colors import LogNorm
from cartopy.feature import NaturalEarthFeature
import cartopy.feature as cf

from wrf import (to_np, getvar, smooth2d, get_cartopy, cartopy_xlim,
                 cartopy_ylim, latlon_coords)

def read_shapefile(sf):
    """
    Read a shapefile into a Pandas dataframe with a 'coords' 
    column holding the geometry information. This uses the pyshp
    package
    """
    fields = [x[0] for x in sf.fields][1:]
    records = sf.records()
    shps = [s.points for s in sf.shapes()]
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)
    return df

#### speed up interpolation
import scipy.interpolate as spint
import scipy.spatial.qhull as qhull
import numpy as np

def interp_weights(xy, uv,d=2):
    tri = qhull.Delaunay(xy)
    simplex = tri.find_simplex(uv)
    vertices = np.take(tri.simplices, simplex, axis=0)
    temp = np.take(tri.transform, simplex, axis=0)
    delta = uv - temp[:, d]
    bary = np.einsum('njk,nk->nj', temp[:, :d, :], delta)
    return vertices, np.hstack((bary, 1 - bary.sum(axis=1, keepdims=True)))

def interpolate(values, vtx, wts):
    return np.einsum('nj,nj->n', np.take(values, vtx), wts)

def deiscretice_timeseries(DATA,
                          bucked_size):
#     Discrete_timeseries = np.copy(DATA); Discrete_timeseries[:] = np.nan
#     for tt in range(len(DATA)):
#         if ~np.isnan(DATA[tt]) == True:   
#             INT, REST = divmod(DATA[tt], bucked_size)
#             Discrete_timeseries[tt] = INT * bucked_size
#             if tt != len(DATA)-1:
#                 DATA[tt+1] = DATA[tt+1]+REST
#     return Discrete_timeseries

    if len(DATA.shape) == 1:
        # make data 2D
        DATA = DATA[:,None]
    Discrete_timeseries = np.copy(DATA); Discrete_timeseries[:] = np.nan
    for tt in tqdm(range(DATA.shape[0])):
        INT, REST = np.apply_along_axis(np.divmod, 0, DATA[tt,:], bucked_size)
        FIN = ~np.isnan(INT)
        Discrete_timeseries[tt,:] = INT * bucked_size
        if tt != len(DATA)-1:
            DATA[tt+1,FIN] = DATA[tt+1,FIN]+REST[FIN]
    return Discrete_timeseries

In [4]:
##############################################################
#                READ CONUS404 CONSTANT FIELDS
sLon='XLONG'
sLat='XLAT'
sOro='HGT'
sLSM='LANDMASK'
sPlotDir = ''
GEO_EM_D1 = '/glade/campaign/ncar/USGS_Water/CONUS404/wrfconstants_d01_1979-10-01_00:00:00.nc4'

ncid=Dataset(GEO_EM_D1, mode='r') # open the netcdf
Lon=np.squeeze(ncid.variables[sLon][:])
Lat=np.squeeze(ncid.variables[sLat][:])
Height4=np.squeeze(ncid.variables[sOro][:])
LSM=np.squeeze(ncid.variables[sLSM][:])
ncid.close()

In [51]:
DataFolder = '/glade/campaign/mmm/c3we/prein/CONUS404/data/MonthlyData/'
SaveFolder = '/glade/campaign/mmm/c3we/prein/CONUS404/data/CONUS404_processed_data/'

StartDay = datetime.datetime(1981, 1, 1, 0)
StopDay = datetime.datetime(1981, 12, 31, 23)
TimeHH = pd.date_range(StartDay, end=StopDay, freq='1h')
TimeMM = pd.date_range(StartDay, end=StopDay, freq='M')
Years = np.unique(TimeMM.year)


### READ HOURLY CONUS404 PRECIPITATION DATA

In [6]:
CONUS404_hourly_pr = np.zeros((len(TimeHH), Lon.shape[0], Lon.shape[1]), dtype=np.float16)
for mm in tqdm(range(len(TimeMM))):
    YYYY = TimeMM[mm].year
    MM = TimeMM[mm].month
    rgiHours = (TimeHH.year == YYYY) & (TimeHH.month == MM)
    File_act = DataFolder + 'PREC_ACC_NC_'+str(YYYY)+str(MM).zfill(2)+'_CONUS404.nc'
    ncid=Dataset(File_act, mode='r') # open the netcdf
    CONUS404_hourly_pr[rgiHours,:,:] = np.array(np.squeeze(ncid.variables['PREC_ACC_NC'][:]), dtype=np.float16)
    ncid.close()

100%|██████████| 480/480 [11:48:35<00:00, 88.57s/it]   


### Calculate annual CDFs (sort of)

In [None]:
Subsample = 10
Seasons = ['annual','DJF','MAM','JJA','SON']
rgiSeasons = [range(1,13,1),
                 [1,2,12],
                 [3,4,5],
                 [6,7,8],
                 [9,10,11]]
for yy in range(len(Years)):
    print('work on '+str(Years[yy]))
    for se in range(len(Seasons)):
        rgiTime = (TimeHH.year == Years[yy]) & np.isin(TimeHH.month, rgiSeasons[se])
        TimeAct = TimeHH[rgiTime]
        SaveFile = SaveFolder+'CONUS404_bined_pr/PREC_ACC_NC_'+str(TimeAct[0].year)+'_'+Seasons[se]+'.nc'
        if os.path.exists(SaveFile) == False:
            print('    '+Seasons[se])
            
            Data_year = np.copy(CONUS404_hourly_pr[rgiTime,:,:])
            sort_data = np.sort(Data_year, axis=0)
            whole_div = int(np.sum(rgiTime)/Subsample)*Subsample
            binned_pr = np.mean(np.reshape(sort_data[-whole_div:,:,:], (int(whole_div/Subsample), Subsample, sort_data.shape[1], sort_data.shape[2])), axis=1)
        
            # ---------------------------------------------
            # write data to netcdf
            da = xr.DataArray(
            data=np.array(binned_pr, dtype=np.float32),
            dims=["percentile", "x", "y"],
            coords=dict(
                    percentile = np.linspace(0,100,binned_pr.shape[0]),
                    lon=(["x", "y"], Lon),
                    lat=(["x", "y"], Lat),
                ),
                attrs=dict(
                    description="sorted precipitation bin averages",
                    units="mm h-1",
                ),
            )

            ds = da.to_dataset(name='precipitation')
            ds.to_netcdf(path=SaveFile, mode='w')
            
            # ---------------------------------------------
            # Also, save the annual/seasonal maximum
            SaveFile = SaveFolder+'CONUS404_an-seas_max-PR/Max_PREC_ACC_NC_'+str(TimeAct[0].year)+'_'+Seasons[se]+'.nc'
            da = xr.DataArray(
            data=np.array(sort_data[-1,:,:], dtype=np.float32),
            dims=["x", "y"],
            coords=dict(
                    lon=(["x", "y"], Lon),
                    lat=(["x", "y"], Lat),
                ),
                attrs=dict(
                    description="maximum precipitation",
                    units="mm h-1",
                ),
            )

            ds = da.to_dataset(name='max_precipitation')
            ds.to_netcdf(path=SaveFile, mode='w')

work on 1980
work on 1981
    annual
