In [1]:
#!/usr/bin/env python

# COOP_Station_preprocessor.ipynb

In [2]:
''' 
   COOP_Station_preprocessor.ipynb

   Read COOP data and save hourly precipitation for all stations 
   with minimum completness for the evaluation of climate change trends
   and CONUS404 output
   
'''

' \n   COOP_Station_preprocessor.ipynb\n\n   Read COOP data and save hourly precipitation for all stations \n   with minimum completness for the evaluation of climate change trends\n   and CONUS404 output\n   \n'

In [30]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from netCDF4 import Dataset
import glob
import os
from pdb import set_trace as stop
from scipy.ndimage.filters import gaussian_filter
from scipy.ndimage import median_filter
from scipy.ndimage import label
from matplotlib import cm
from scipy import ndimage
import random
import scipy
import pickle
import datetime
import pandas as pd
import subprocess
from calendar import monthrange
import pandas as pd
import datetime
import sys 
import shapefile as shp
import matplotlib.path as mplPath
from scipy.stats import norm
import matplotlib.gridspec as gridspec
# from mpl_toolkits.basemap import Basemap, cm
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.gridspec as gridspec
from pylab import *
import string
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import shapefile
from calendar import monthrange
from tqdm import tqdm


# # fix pickle load issue
# np_load_old = np.load
# # modify the default parameters of np.load
# np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)


def read_shapefile(sf):
    """
    Read a shapefile into a Pandas dataframe with a 'coords' 
    column holding the geometry information. This uses the pyshp
    package
    """
    fields = [x[0] for x in sf.fields][1:]
    records = sf.records()
    shps = [s.points for s in sf.shapes()]
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)
    return df

#### speed up interpolation
import scipy.interpolate as spint
import scipy.spatial.qhull as qhull
import numpy as np

def interp_weights(xy, uv,d=2):
    tri = qhull.Delaunay(xy)
    simplex = tri.find_simplex(uv)
    vertices = np.take(tri.simplices, simplex, axis=0)
    temp = np.take(tri.transform, simplex, axis=0)
    delta = uv - temp[:, d]
    bary = np.einsum('njk,nk->nj', temp[:, :d, :], delta)
    return vertices, np.hstack((bary, 1 - bary.sum(axis=1, keepdims=True)))

def interpolate(values, vtx, wts):
    return np.einsum('nj,nj->n', np.take(values, vtx), wts)

###  USER MODIFY SECTION

In [4]:
StartDay = datetime.datetime(1979, 10, 1,0)
StopDay = datetime.datetime(2020, 9, 30,23)
rgdTimeFULL=pd.date_range(StartDay, end=StopDay, freq='h')
rgdTimeFULLDD=pd.date_range(StartDay, end=StopDay, freq='d')
rgdTimeFULLMM=pd.date_range(StartDay, end=StopDay, freq='m')
Years = np.unique(rgdTimeFULL.year)

SaveDir = '/glade/campaign/mmm/c3we/prein/Papers/2021_Hist-Ext-PR-Changes/data/'

### Read in the coordiantes and create basin mask

#### Read Hourly_PR_Stations-DSI-3240_v2

In [32]:
CO_SAVE = SaveDir+'CCOP_stations_1979-2020.npz'

if os.path.exists(CO_SAVE) == False:
    print('    Process and save COOP data')

    COdir = '/glade/campaign/mmm/c3we/prein/observations/Hourly_PR_Stations-DSI-3240_v2/data/'
    sStationFile = '/glade/campaign/mmm/c3we/prein/observations/Hourly_PR_Stations-DSI-3240_v2/original_data/station-inventory/HPD_v02r02_stationinv_c20201104.csv'
    TimeHHCO=TimeHHCO=pd.date_range(datetime.datetime(1948, 1, 1,0), end=datetime.datetime(2020, 12, 31,23), freq='h')
    
    # Core time for evaluation
    TimeSel = np.isin(TimeHHCO,rgdTimeFULL)


    # read in station location and altitude
    df = pd.read_csv (sStationFile)
    StnID = np.array(df['StnID'])
    LatCO = np.array(df['Lat'])
    LonCO = np.array(df['Lon'])
    AltCO = np.array(df['Elev'])
    rgrGridCells=[(LonCO.ravel()[ii],LatCO.ravel()[ii]) for ii in range(len(LonCO.ravel()))]

    # -----------------------
    # read precipitation data
    rgrCOPR = np.zeros((len(rgdTimeFULL), len(LatCO))); rgrCOPR[:] = np.nan
    RatioMissing = np.zeros((4,len(LatCO))); RatioMissing[:] = np.nan
#     HHStart = np.where(rgdTimeFULL[0] == TimeHHCO)[0][0]
#     HHStop = np.where(rgdTimeFULL[-1] == TimeHHCO)[0][0]
    for st in tqdm(range(len(LatCO))):
        FILEact = COdir+StnID[st]+'.nc'
        ncid=Dataset(FILEact, mode='r')
        PRact = np.squeeze(ncid.variables['PR'][TimeSel]) #[HHStart:HHStop+1])
        RatioMissing[:,st] = np.array([np.sum(~np.isnan(PRact[(rgdTimeFULL.year >= 1980+ii*10) & (rgdTimeFULL.year <= 1989+ii*10)]))/np.sum((rgdTimeFULL.year >= 1980+ii*10) & (rgdTimeFULL.year <= 1989+ii*10)) for ii in range(4)])
        rgrCOPR[:,st]= PRact
        ncid.close()
#         print('    read '+FILEact)

    rgrCOPR[rgrCOPR < 0] = np.nan
    rgrCOPR[rgrCOPR > 500] = np.nan
    
    np.savez(CO_SAVE,
            CO_DATA=rgrCOPR,
            StnIDCO=StnID,
            LonSTCO=LonCO,
            LatSTCO=LatCO,
            AltSTCO=AltCO,
            TimeHH=rgdTimeFULL,
            RatioMissing=RatioMissing)
else:
    print('    Read preprocessed ASOS data')
    DATA = np.load(CO_SAVE, allow_pickle=True)
    CO_DATA=DATA['CO_DATA']
    StnIDCO=DATA['StnIDCO']
    LonSTCO=DATA['LonSTCO']
    LatSTCO=DATA['LatSTCO']
    AltSTCO=DATA['AltSTCO']
    TimeHHCO=pd.to_datetime(DATA['TimeHH'])
    RatioMissing=DATA['RatioMissing']


    Process and save COOP data


100%|██████████| 1983/1983 [2:18:48<00:00,  4.20s/it]  


NameError: name 'LonST' is not defined

In [1]:
CO_SAVE

NameError: name 'CO_SAVE' is not defined