# Use mapping file created in step 1 to download meteorology data for gridpoints within basin shapefile + buffer distance

In [4]:
# data processing
import os
import pandas as pd, numpy as np, dask, json
import ogh
import geopandas as gpd
import ogh_xarray_landlab as oxl
import xarray as xr

# data migration library
from utilities import hydroshare

# plotting and shape libraries
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [5]:
#hs=hydroshare.hydroshare()
#homedir = hs.getContentPath(os.environ["HS_RES_ID"])
homedir = '/home/jovyan/work/notebooks/GriddedMetDataDownload/projects/TemperatureSensorRegion'
os.chdir(homedir)
print('Data will be saved to:'+homedir)

Data will be saved to:/home/jovyan/work/notebooks/GriddedMetDataDownload/projects/TemperatureSensorRegion


##  open meta_file for use in functions below

In [6]:
# initialize ogh_meta
meta_file = dict(ogh.ogh_meta())
sorted(meta_file.keys())

['dailymet_bclivneh2013',
 'dailymet_livneh2013',
 'dailymet_livneh2015',
 'dailyvic_livneh2013',
 'dailyvic_livneh2015',
 'dailywrf_bcsalathe2014',
 'dailywrf_salathe2014',
 'hourlywrf_pnnl']

In [7]:
#check characteristics of data
meta_file['dailywrf_salathe2014']['start_date']

'1950-01-01'

## Re-establish the paths to the mapping file

In [8]:
# map the mapping files generated for Sauk-Suiattle, Elwha, and Upper Rio Salado from usecase1
mappingfile_PNNL = os.path.join(homedir,'Temp_P_mappingfile_i.csv')

mappingfile_VIC = os.path.join(homedir, 'Temp_VIC_mappingfile.csv') 

t1 = ogh.mappingfileSummary(listofmappingfiles = [mappingfile_PNNL,mappingfile_VIC], 
                            listofwatershednames = ['Puyallup_PNNL','Puyallup_VIC'],
                            meta_file=meta_file)

t1

Watershed,Puyallup_PNNL,Puyallup_VIC
Median elevation in meters [range](Number of gridded cells),1613[997-1857] (n=34),1603[866-1958] (n=37)
dailymet_bclivneh2013,0,1603[866-1958] (n=37)
dailymet_livneh2013,0,1603[866-1958] (n=37)
dailymet_livneh2015,0,1603[866-1958] (n=37)
dailyvic_livneh2013,0,1603[866-1958] (n=37)
dailyvic_livneh2015,0,1603[866-1958] (n=37)
dailywrf_bcsalathe2014,0,1623[866-1958] (n=34)
dailywrf_salathe2014,0,1623[866-1958] (n=34)
hourlywrf_pnnl,1613[997-1857] (n=34),0


### convert mapping file to dataframe: maptable

In [9]:
maptable_p, nstations_p = ogh.mappingfileToDF_PNNL(mappingfile_PNNL)
maptable_p
nstations_p

Number of gridded data files:34
Minimum elevation: 997.522521973m
Mean elevation: 1518.119246539206m
Maximum elevation: 1857.13256836m


34

In [10]:
maptable_v, nstations_v = ogh.mappingfileToDF(mappingfile_VIC)
maptable_v
nstations_v

Number of gridded data files:34
Minimum elevation: 866.0m
Mean elevation: 1541.0882352941176m
Maximum elevation: 1958.0m


34

In [12]:
maptable_p.head()
#maptable_v


Unnamed: 0,FID,SN,WE,LAT,LONG_,ELEV,hourlywrf_pnnl
0,0,57.0,60.0,48.464809,-120.792419,1566.227539,/home/jovyan/work/notebooks/GriddedMetDataDown...
1,1,57.0,61.0,48.469715,-120.711258,1740.342163,/home/jovyan/work/notebooks/GriddedMetDataDown...
2,2,57.0,62.0,48.474552,-120.630096,1806.5271,/home/jovyan/work/notebooks/GriddedMetDataDown...
3,3,58.0,59.0,48.513668,-120.881042,1689.336548,/home/jovyan/work/notebooks/GriddedMetDataDown...
4,4,58.0,60.0,48.518616,-120.799835,1700.479858,/home/jovyan/work/notebooks/GriddedMetDataDown...


## Download grid points in mapping file from of PNNL 2018 netCDF files, convert to ascii
#### becuase of memore problem with dataset.to_dataframe(), use chunks: 1981to1987, 1988 to 1994, 1995 to 2001, 2002 to 2008,  2009 to 2015

In [13]:
#chunks = [('1981-01-01','1987-12-31'),('1988-01-01','1994-12-31'),('1995-01-01','2001-12-31'),('2002-01-01','2008-12-31'),('2009-01-01','2015-12-31')]
#chunks = [('2001-01-01','2005-12-31'),('2006-01-01','2010-12-31'),('2011-01-01','2015-12-31')]
chunks = [('2006-01-01','2010-12-31')]

In [14]:
type(chunks)
chunks


[('2006-01-01', '2010-12-31')]

### Define grid cells to be clipped from NetCDF files

In [15]:
spatialbounds = {'minx':maptable_p.WE.min().astype(np.int64), 'maxx':maptable_p.WE.max().astype(np.int64),
                 'miny':maptable_p.SN.min().astype(np.int64), 'maxy':maptable_p.SN.max().astype(np.int64)}
spatialbounds

{'minx': 56, 'maxx': 65, 'miny': 57, 'maxy': 62}

#### check directory that data will be saved to

In [16]:
datadir = homedir
datadir

'/home/jovyan/work/notebooks/GriddedMetDataDownload/projects/TemperatureSensorRegion'

In [None]:
%%time
for chunk in chunks:

    #update datadir to save downloaded data in appropriate location
    #use get_x_hourlywrf_PNNL2018_dir if only directory needed
    outputfiles = oxl.get_x_hourlywrf_PNNL2018(homedir=datadir,
                                                spatialbounds=spatialbounds,
                                                subdir='PNNL/' + chunk[0] + 'to' + chunk[1] +'/',
                                                nworkers=40,
                                                start_date=chunk[0],
                                                end_date=chunk[1],
                                                file_prefix='sp_',
                                                replace_file=True)


    outputlist = oxl.netcdf_to_ascii_PNNL2018(homedir=homedir,
                                 subdir='PNNL/ASCII/' + chunk[0] + 'to' + chunk[1] +'/',
                                 netcdfs=outputfiles,
                                 mappingfile=mappingfile_PNNL,
                                 catalog_label='hourlywrf_pnnl',
                                 meta_file=meta_file)
                                 #time_nm = '20060101to20101231')

## Download Salathe and Livneh ascii files that corrispond to gridpoints in mappingfile 

In [None]:
%%time

ogh.getDailyMET_livneh2013(homedir, mappingfile_VIC)
ogh.getDailyMET_bcLivneh2013(homedir, mappingfile_VIC)
ogh.getDailyMET_livneh2015(homedir, mappingfile_VIC)
ogh.getDailyVIC_livneh2013(homedir, mappingfile_VIC)
ogh.getDailyVIC_livneh2015(homedir, mappingfile_VIC)
ogh.getDailyWRF_salathe2014(homedir, mappingfile_VIC)
ogh.getDailyWRF_bcsalathe2014(homedir, mappingfile_VIC)

### Update mapping file summary

In [17]:
t1 = ogh.mappingfileSummary(listofmappingfiles = [mappingfile_PNNL,mappingfile_VIC], 
                            listofwatershednames = ['Temp_PNNL','Temp_VIC'],
                            meta_file=meta_file)

t1
#t1.to_csv(os.path.join(homedir, 'watershed_table.txt'), sep='\t', header=True, index=True)



Watershed,Temp_PNNL,Temp_VIC
Median elevation in meters [range](Number of gridded cells),1613[997-1857] (n=34),1603[866-1958] (n=37)
dailymet_bclivneh2013,0,1603[866-1958] (n=37)
dailymet_livneh2013,0,1603[866-1958] (n=37)
dailymet_livneh2015,0,1603[866-1958] (n=37)
dailyvic_livneh2013,0,1603[866-1958] (n=37)
dailyvic_livneh2015,0,1603[866-1958] (n=37)
dailywrf_bcsalathe2014,0,1623[866-1958] (n=34)
dailywrf_salathe2014,0,1623[866-1958] (n=34)
hourlywrf_pnnl,1613[997-1857] (n=34),0


## Combine and summarize individual ascii files into a dictionary: ltm

#### Livneh 2013

In [18]:
# Livneh et al., 2013
dr1 = meta_file['dailymet_livneh2013']

# Salathe et al., 2014
dr2 = meta_file['dailywrf_salathe2014']

# define overlapping time window
dr = ogh.overlappingDates(date_set1=tuple([dr1['start_date'], dr1['end_date']]), 
                          date_set2=tuple([dr2['start_date'], dr2['end_date']]))
dr


('1950-01-01', '2010-12-31')

In [20]:
%%time
ltm = ogh.gridclim_dict(mappingfile=mappingfile_VIC,
                        metadata=meta_file,
                        dataset='dailymet_livneh2013',
                        subset_start_date='1915-01-01',
                        subset_end_date='2011-12-31')

# compute sums and mean monthly an yearly sums
ltm = ogh.aggregate_space_time_sum(df_dict=ltm,
                                   suffix='PRECIP_dailymet_livneh2013',
                                   start_date=dr[0],
                                   end_date=dr[1])

DatetimeIndex(['1915-01-01', '1915-01-02', '1915-01-03', '1915-01-04',
               '1915-01-05', '1915-01-06', '1915-01-07', '1915-01-08',
               '1915-01-09', '1915-01-10',
               ...
               '2011-12-22', '2011-12-23', '2011-12-24', '2011-12-25',
               '2011-12-26', '2011-12-27', '2011-12-28', '2011-12-29',
               '2011-12-30', '2011-12-31'],
              dtype='datetime64[ns]', length=35429, freq='D')
DatetimeIndex(['1915-01-01', '1915-01-02', '1915-01-03', '1915-01-04',
               '1915-01-05', '1915-01-06', '1915-01-07', '1915-01-08',
               '1915-01-09', '1915-01-10',
               ...
               '2011-12-22', '2011-12-23', '2011-12-24', '2011-12-25',
               '2011-12-26', '2011-12-27', '2011-12-28', '2011-12-29',
               '2011-12-30', '2011-12-31'],
              dtype='datetime64[ns]', length=35429, freq='D')
Number of data files within elevation range (866.0-1958.0 m): 37
PRECIP dataframe reading comple

#### Salathe 2014

In [21]:
%%time
ltm = ogh.gridclim_dict(mappingfile=mappingfile_VIC,
                        metadata=meta_file,
                        dataset='dailywrf_salathe2014',
                        subset_start_date='1915-01-01',
                        subset_end_date='2011-12-31',
                        df_dict = ltm)

# compute sums and mean monthly an yearly sums
ltm = ogh.aggregate_space_time_sum(df_dict=ltm,
                                   suffix='PRECIP_dailywrf_salathe2014',
                                   start_date='1950-01-01',
                                   end_date='2010-12-31')

DatetimeIndex(['1950-01-01', '1950-01-02', '1950-01-03', '1950-01-04',
               '1950-01-05', '1950-01-06', '1950-01-07', '1950-01-08',
               '1950-01-09', '1950-01-10',
               ...
               '2010-12-22', '2010-12-23', '2010-12-24', '2010-12-25',
               '2010-12-26', '2010-12-27', '2010-12-28', '2010-12-29',
               '2010-12-30', '2010-12-31'],
              dtype='datetime64[ns]', length=22280, freq='D')
DatetimeIndex(['1915-01-01', '1915-01-02', '1915-01-03', '1915-01-04',
               '1915-01-05', '1915-01-06', '1915-01-07', '1915-01-08',
               '1915-01-09', '1915-01-10',
               ...
               '2011-12-22', '2011-12-23', '2011-12-24', '2011-12-25',
               '2011-12-26', '2011-12-27', '2011-12-28', '2011-12-29',
               '2011-12-30', '2011-12-31'],
              dtype='datetime64[ns]', length=35429, freq='D')
Number of data files within elevation range (866.0-1958.0 m): 34
PRECIP dataframe reading comple

In [22]:
#check data sets stored in ltm dictionary
ltm.keys()

dict_keys(['PRECIP_dailymet_livneh2013', 'TMAX_dailymet_livneh2013', 'TMIN_dailymet_livneh2013', 'WINDSPD_dailymet_livneh2013', 'meanalldailysum_PRECIP_dailymet_livneh2013', 'monthsum_PRECIP_dailymet_livneh2013', 'meanbymonthsum_PRECIP_dailymet_livneh2013', 'meanmonthsum_PRECIP_dailymet_livneh2013', 'meanallmonthsum_PRECIP_dailymet_livneh2013', 'yearsum_PRECIP_dailymet_livneh2013', 'meanbyyearsum_PRECIP_dailymet_livneh2013', 'meanyearsum_PRECIP_dailymet_livneh2013', 'meanallyearsum_PRECIP_dailymet_livneh2013', 'PRECIP_dailywrf_salathe2014', 'TMAX_dailywrf_salathe2014', 'TMIN_dailywrf_salathe2014', 'WINDSPD_dailywrf_salathe2014', 'meanalldailysum_PRECIP_dailywrf_salathe2014', 'monthsum_PRECIP_dailywrf_salathe2014', 'meanbymonthsum_PRECIP_dailywrf_salathe2014', 'meanmonthsum_PRECIP_dailywrf_salathe2014', 'meanallmonthsum_PRECIP_dailywrf_salathe2014', 'yearsum_PRECIP_dailywrf_salathe2014', 'meanbyyearsum_PRECIP_dailywrf_salathe2014', 'meanyearsum_PRECIP_dailywrf_salathe2014', 'meanallyear

#### PNNL 2018

### combine chunks then add to ltm dictionary

#### chunks used to download data:

In [24]:
#chunks = [('2001-01-01','2005-12-31')]
#chunks = [('2001-01-01','2005-12-31'),('2006-01-01','2010-12-31'),('2011-01-01','2015-12-31')]
type(chunks)
chunks


[('2006-01-01', '2010-12-31')]

#### Create dictionary Ldict: each chunk has a key in dictionary. Within each key are all of the variables from the PNNL dataset

In [25]:
%%time
Ldict ={}
for ind, chunk in enumerate(chunks):
    
    homedir = '/home/jovyan/work/notebooks/GriddedMetDataDownload/projects/TemperatureSensorRegion/PNNL/ASCII/'+ chunk[0] + 'to' + chunk[1] +'/'
    os.chdir(homedir)

    %%time
    file_start_date = chunk[0]
    file_end_date = chunk[1] + ' 23:00:00'
    subset_start_date = file_start_date
    subset_end_date =file_end_date
    #change time step in metafile to hourly
    Ldict[ind] = ogh.gridclim_dict(mappingfile=mappingfile_PNNL,
                            metadata=meta_file,
                            dataset='hourlywrf_pnnl',
                            file_start_date = file_start_date,
                            file_end_date = file_end_date,
                            subset_start_date=subset_start_date,
                            subset_end_date=subset_end_date)

CPU times: user 10 µs, sys: 0 ns, total: 10 µs
Wall time: 21 µs
DatetimeIndex(['2006-01-01 00:00:00', '2006-01-01 01:00:00',
               '2006-01-01 02:00:00', '2006-01-01 03:00:00',
               '2006-01-01 04:00:00', '2006-01-01 05:00:00',
               '2006-01-01 06:00:00', '2006-01-01 07:00:00',
               '2006-01-01 08:00:00', '2006-01-01 09:00:00',
               ...
               '2010-12-31 14:00:00', '2010-12-31 15:00:00',
               '2010-12-31 16:00:00', '2010-12-31 17:00:00',
               '2010-12-31 18:00:00', '2010-12-31 19:00:00',
               '2010-12-31 20:00:00', '2010-12-31 21:00:00',
               '2010-12-31 22:00:00', '2010-12-31 23:00:00'],
              dtype='datetime64[ns]', length=43824, freq='H')
DatetimeIndex(['2006-01-01 00:00:00', '2006-01-01 01:00:00',
               '2006-01-01 02:00:00', '2006-01-01 03:00:00',
               '2006-01-01 04:00:00', '2006-01-01 05:00:00',
               '2006-01-01 06:00:00', '2006-01-01 07:00:00',


In [26]:
#look how variables are orgainzed in dataframes stored in dictionary ltm:

dty = (list(Ldict[0].keys())) #variables in PNNL data
Ldict_c = Ldict #copy of dictionary of variables, Ldict is mutable
#Ldict[0]['T2_hourlywrf_pnnl'].head()
dty
Ldict.keys()


dict_keys([0])

### for each variable, concatenate each chunks into a single time series, add timeseries to ltm dictionary


In [None]:
frames = {}
dd = {}
for var in dty:
    for k, v in Ldict.items():
          df = Ldict_c[k][var]
          mi = df.columns
          indi = pd.Index([e[0] for e in mi.tolist()])
          df.columns = indi
          #df
          frames[str(k)+var] = df  
          if k == 0:
            ltm[var] = df
          else:
            ltm[var] = ltm[var].append(df)

In [None]:
#method above removes multi comlumn index of PNNL dataset, add back in next notebook
ltm.keys()
ltm['T2_hourlywrf_pnnl'].head()

### Save ltm dictionary to a pickle file that can be easily opened in same form next notebook

In [30]:
homedir = '/home/jovyan/work/notebooks/GriddedMetDataDownload/projects/TemperatureSensorRegion'
os.chdir(homedir)

In [31]:
pwd

'/home/jovyan/work/notebooks/GriddedMetDataDownload/projects/TemperatureSensorRegion'

In [None]:
import pickle #use pick file to retain dictionary file structure
f = open("TempSensorArea_2001to2015.pkl","wb") #write binary
pickle.dump(ltm,f)
f.close() #pickle files must be closed

In [32]:
#save Ldict to examine chunks before concatenating
import pickle #use pick file to retain dictionary file structure
f = open("TempSensorArea_2006to2010_SepartePereods.pkl","wb") #write binary
pickle.dump(Ldict,f)
f.close() #pickle files must be closed
 

### Define wateryear summary functions

In [None]:
def WaterYear_Accum(df):
    
    """
    #df = S_PNNL_1981to1987['PREC_ACC_NC_hourlywrf_pnnl'] or any other dataframe in dictionary created by grid_clim_dict
    
    """
    #step 1, add water year (WY) column to data
    aa = df.index.to_series() #convert index to series so that lambda function can be applied 
    df['WY'] = aa.apply(lambda x: x.year if x.month < 10 else x.year+1)
    
    #step 2, remove datetime index to column, label column
    df_re = df.rename_axis('Date').reset_index()
    
    #step 3, move WY as index
    df_re.set_index('WY', inplace=True)
    
    #step 4, sum values in each column (grid point) by WY
    WYs_Sta = df_re.groupby(df_re.index).sum()
    
    #step 5, take mean of each grid cell as basin value
    WYs_basin = WYs_Sta.mean(axis=1)
    
    return(WYs_basin)


def WaterYear_Mean(df):
    
    """
    #df = S_PNNL_1981to1987['PREC_ACC_NC_hourlywrf_pnnl'] or any other dataframe in dictionary created by grid_clim_dict
    
    """
    #step 1, add water year (WY) column to data
    aa = df.index.to_series() #convert index to series so that lambda function can be applied 
    df['WY'] = aa.apply(lambda x: x.year if x.month < 10 else x.year+1)
    
    #step 2, remove datetime index to column, label column
    df_re = df.rename_axis('Date').reset_index()
    
    #step 3, move WY as index
    df_re.set_index('WY', inplace=True)
    
    #step 4, average values in each column (grid point) by WY
    WYm_Sta = df_re.groupby(df_re.index).mean()
    
    #step 5, take mean of each grid cell as basin value
    WYm_basin = WYm_Sta.mean(axis=1)
    
    return(WYm_basin)

In [None]:
PNNL_hp = ltm['PREC_ACC_NC_hourlywrf_pnnl']
#water year accumulated precip
WYp = WaterYear_Accum(PNNL_hp)
#water year mean temp
PNNL_hT = ltm['T2_hourlywrf_pnnl']
WYt = WaterYear_Mean(PNNL_hT)
#water year mean solar radiation W/m2
PNNL_hRs = ltm['SWDOWN_hourlywrf_pnnl']
WYs = WaterYear_Mean(PNNL_hRs)

In [None]:
WYp
type(WYt)