## Calculating delta pressure scalers for the 1 ts emulator

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
from parflow import Run
from parflow.tools.io import read_pfb, read_clm, write_pfb
from parflow.tools.fs import mkdir
from parflow.tools.settings import set_working_directory
import subsettools as st
import hf_hydrodata as hf
import pandas as pd

In [2]:
# Register the hydrodata pin
email = 'lecondon@email.arizona.edu' 
pin = '1234'
print('Registering ' + email + ' (PIN=' + pin + ') for HydroData download' ) #use lecondon@email.arizona.edu and 1234
hf.register_api_pin(email, pin)

Registering lecondon@email.arizona.edu (PIN=1234) for HydroData download


In [3]:
# Read in the mask file
options = {
      "dataset":"conus2_domain", "variable": "mask"}
mask = hf.get_gridded_data(options)
print(mask.shape)
print(np.sum(mask))


(3256, 4442)
7852823.0


In [4]:
# Set constants for reading
# interval is the interval that files will be read at (i.e. every interval hours)
# WY 2003 is the only transient year available for CONUS2 
# NZ is the number of layers in CONUS2

interval = 13 #picking a prime number here to ensure we don't grab the same time of day consistently
wy=2003
nz=10
hend= 8760 #hour to end at to do short test runs, set to 8760 to do the entire year

## Calculate the mean hourly pressure difference for every layer
Note: to speed this up just sampling pressure differences every <interval> hours

In [5]:
# Loop through the year and get the delta pressures and calculate the mean
#Initialize some variables
wy_hour=interval + 5
pdif_sum = np.zeros(nz)
hour_count = 0

while wy_hour<=hend:
    print(wy_hour)
    
    fin1 = f"/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/run_inputs/spinup.wy{wy}.out.press.{wy_hour:05d}.pfb"
    fin0 = f"/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/run_inputs/spinup.wy{wy}.out.press.{(wy_hour-1):05d}.pfb"
    p1 = read_pfb(fin1)
    p0 = read_pfb(fin0)
    #print("read", fin1, " and ", fin0)
    pdif = p1 - p0

    #Q's: 1) Could I avoid creating pdif_z, 2)could I avoid the for loop
    for z in range(nz):
        pdif_z = pdif[z,:,:] 
        pdif_sum[z]=pdif_sum[z]+np.sum(pdif_z[mask==1])

    hour_count=hour_count + 1
    wy_hour=wy_hour+interval

pdif_mean = pdif_sum/(np.sum(mask)*hour_count)
print(pdif_sum)
print(pdif_mean)

18
31
44
57
70
83
96
109
122
135
148
161
174
187
200
213
226
239
252
265
278
291
304
317
330
343
356
369
382
395
408
421
434
447
460
473
486
499
512
525
538
551
564
577
590
603
616
629
642
655
668
681
694
707
720
733
746
759
772
785
798
811
824
837
850
863
876
889
902
915
928
941
954
967
980
993
1006
1019
1032
1045
1058
1071
1084
1097
1110
1123
1136
1149
1162
1175
1188
1201
1214
1227
1240
1253
1266
1279
1292
1305
1318
1331
1344
1357
1370
1383
1396
1409
1422
1435
1448
1461
1474
1487
1500
1513
1526
1539
1552
1565
1578
1591
1604
1617
1630
1643
1656
1669
1682
1695
1708
1721
1734
1747
1760
1773
1786
1799
1812
1825
1838
1851
1864
1877
1890
1903
1916
1929
1942
1955
1968
1981
1994
2007
2020
2033
2046
2059
2072
2085
2098
2111
2124
2137
2150
2163
2176
2189
2202
2215
2228
2241
2254
2267
2280
2293
2306
2319
2332
2345
2358
2371
2384
2397
2410
2423
2436
2449
2462
2475
2488
2501
2514
2527
2540
2553
2566
2579
2592
2605
2618
2631
2644
2657
2670
2683
2696
2709
2722
2735
2748
2761
2774
2787
2800
2813
282

### Calculate the standard deviation of hourly pressure differences
Dividing by N to match numpy defaults

In [None]:
# Loop through the year and get the delta pressures and calculate the mean
#Initialize some variables
wy_hour=interval +5
numerator = np.zeros(nz)
hour_count = 0

while wy_hour<=hend:
    print(wy_hour)
    
    fin1 = f"/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/run_inputs/spinup.wy{wy}.out.press.{wy_hour:05d}.pfb"
    fin0 = f"/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/run_inputs/spinup.wy{wy}.out.press.{(wy_hour-1):05d}.pfb"
    p1 = read_pfb(fin1)
    p0 = read_pfb(fin0)
    pdif = (p1 - p0)

    #Q's: 1) Could I avoid creating pdif_z, 2)could I avoid the for loop
    #calculate a running sum of (pdif - pdif_mean)^2 for every layer
    for z in range(nz):
        pdif_mean_z = (pdif[z,:,:] - pdif_mean[z]) **2
        numerator[z]=numerator[z]+np.sum(pdif_mean_z[mask==1])

    hour_count=hour_count + 1
    wy_hour=wy_hour+interval

pdif_stdev = (numerator/(np.sum(mask)*hour_count))** 0.5
print(pdif_stdev)
print(pdif_mean)

18
31
44
57
70
83
96
109
122
135
148
161
174
187
200
213
226
239
252
265
278
291
304
317
330
343
356
369
382
395
408
421
434
447
460
473
486
499
512
525
538
551
564
577
590
603
616
629
642
655
668
681
694
707
720
733
746
759
772
785
798
811
824
837
850
863
876
889
902
915
928
941
954
967
980
993
1006
1019
1032
1045
1058
1071
1084
1097
1110
1123
1136
1149
1162
1175
1188
1201
1214
1227
1240
1253
1266
1279
1292
1305
1318
1331
1344
1357
1370
1383
1396
1409
1422
1435
1448
1461
1474
1487
1500
1513
1526
1539
1552
1565
1578
1591
1604
1617
1630
1643
1656
1669
1682
1695
1708
1721
1734
1747
1760
1773
1786
1799
1812
1825
1838
1851
1864
1877
1890
1903
1916
1929
1942
1955
1968
1981
1994
2007
2020
2033
2046
2059
2072
2085
2098
2111
2124
2137
2150
2163
2176
2189
2202
2215
2228
2241
2254
2267
2280
2293
2306
2319
2332
2345
2358
2371
2384
2397
2410
2423
2436
2449
2462
2475
2488
2501
2514
2527
2540
2553
2566
2579
2592
2605
2618
2631
2644
2657
2670
2683
2696
2709
2722
2735
2748
2761
2774
2787
2800
2813
282

### Make a dataframe and save as a csv

In [None]:
fout = 'pressure_differnce_scalers_' + str(interval) + 'hour.csv'
row_names = ['layer_'+str(val) for val in range(nz)]
df=pd.DataFrame({'Name':row_names, 'Mean': pdif_mean, 'stdev': pdif_stdev})
df.set_index('Name')
df.to_csv(fout, index=False)

# Save as YAML
with open('pressure_scalers.yml', 'w') as file:
    yaml.dump(df.to_dict(orient='records'), file, sort_keys=False)

## Cross checking the mean and standard deviation calcuations
Generally you should keep this turned off, to run you need to set interval=1 and hend to something small so that you can hold all the pressures in memory easily

In [None]:
# Cross checking the stdev calculation 
## Note to run this set hend to something small and interval to 1 so you can hold it all in memory 
## and make sure to run the top part with these settings first. 
## For my tests I used hend=6

pdif_test=np.zeros(((hend-interval+1), pdif_mean_z.shape[0],  pdif_mean_z.shape[1]))
print(pdif_test.shape)
layer_test = 9 # Pick a layer to test

#Read in the pressure files and make a matrix of differences
i=0
for wy_hour in range(interval, (hend+1)):
    print(wy_hour, i)
    fin1 = f"/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/run_inputs/spinup.wy{wy}.out.press.{wy_hour:05d}.pfb"
    fin0 = f"/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/run_inputs/spinup.wy{wy}.out.press.{(wy_hour-1):05d}.pfb"
    temp_dif= read_pfb(fin1) - read_pfb(fin0)
    pdif_test[i,:,:]= temp_dif[layer_test,:,:]
    i = i+1

#Calculate the mean and Sdev and compare
mask_3D=np.copy(pdif_test)*0
for i in range(hend):
    mask_3D[i,:,:]=mask
print('3D mask Made')

stdev_test = np.std(pdif_test[mask_3D==1])  
mean_test = np.mean(pdif_test[mask_3D==1])

print("Calculation Comparison for Layer", layer_test)
print(stdev_test, pdif_stdev[layer_test], (stdev_test-pdif_stdev[layer_test]))
print(mean_test, pdif_mean[layer_test], (mean_test-pdif_mean[layer_test]))