In [2]:

import numpy as np
from matplotlib import pyplot as plt
from scipy import io
import os
import sys
from optimal_interpolation import OImerge
import calendar
import auxiliary as au
from auxiliary_merge import *

########################################################################################################################

# time periods and methods
# y1 = int(sys.argv[3])
# y2 = int(sys.argv[4])
# year = [y1, y2]
year = [1980, 1980]
print('year:',year)

########################################################################################################################

# basic settings
weightmode = 'BMA' # method used to merge different reanalysis products
vars = ['prcp', 'tmean', 'trange']
hwsize = 2  # 5X5 space window used to support estimation at the center grid
lontar = np.arange(-180 + 0.05, -50, 0.1)
lattar = np.arange(85 - 0.05, 5, -0.1)

# "Gaussian": prcp will be transformed into normal distributions; "Actual": actual space
# "Gaussian" is not a good choice because station prcp regression using box-cox has large underestimation
prcp_space = 'Actual'

### Local Mac settings
# input files/paths
path_bac = '/Users/localuser/Research/EMDNA/merge' # data that will be used as background
path_obs = '/Users/localuser/Research/EMDNA/regression' # data that will be used as observation
near_file_GMET = '/Users/localuser/Research/EMDNA/regression/weight_nearstn.npz' # near station of stations/grids
file_mask = './DEM/NA_DEM_010deg_trim.mat'
FileStnInfo = '/Users/localuser/GMET/pyGMET_NA/stnlist_whole.txt'
gmet_stndatafile = '/Users/localuser/Research/EMDNA/stndata_whole.npz'

# output files/paths (can also be used as inputs once generated)
path_oimerge = '/Users/localuser/Research/EMDNA/oimerge'

### Local Mac settings


# ### Plato settings
# # input files/paths
# path_bac = '/datastore/GLOBALWATER/CommonData/EMDNA/ReanalysisCorrMerge/Reanalysis_merge'
# path_obs = '/datastore/GLOBALWATER/CommonData/EMDNA/PyGMETout'
# near_file_GMET = '/datastore/GLOBALWATER/CommonData/EMDNA/PyGMETout/weight.npz'
# file_mask = '/datastore/GLOBALWATER/CommonData/EMDNA/DEM/NA_DEM_010deg_trim.mat'
# FileStnInfo = '/home/gut428/GMET/eCAI_EMDNA/StnGridInfo/stnlist_whole.txt'
# gmet_stndatafile = '/datastore/GLOBALWATER/CommonData/EMDNA/stndata_whole.npz'

# # output files/paths (can also be used as inputs once generated)
# path_oimerge = '/home/gut428/OImerge'
# ### Plato settings

file_regression_stn = path_obs + '/daily_regression_stn.npz'
file_corrmerge_stn = [''] * len(vars)
for i in range(len(vars)):
    file_corrmerge_stn[i] = path_bac + '/mergecorr_stn_' + vars[i] + '_GWRQM_' + weightmode + '.npz'

########################################################################################################################

# basic processing
mask = io.loadmat(file_mask)
mask = mask['DEM']
mask[~np.isnan(mask)] = 1  # 1: valid pixels
nrows, ncols = np.shape(mask)

# date
date_list, date_number = m_DateList(1979, 2018, 'ByYear')

# stninfo
stnID, stninfo = au.readstnlist(FileStnInfo)
nstn = len(stnID)

year: [1980, 1980]


In [20]:
for v in range(1):
    print('OI merge at stations:', vars[v])
    filemerge_stn = path_oimerge + '/OImerge_stn_GWRQMBMA_' + vars[v] + '.npz'
#     if os.path.isfile(filemerge_stn):
#         continue

    # load station original observations
    datatemp = np.load(gmet_stndatafile)
    observation_stn = datatemp[vars[v]+'_stn']

    # load station regression estimates (obs)
    datatemp = np.load(file_regression_stn)
    regression_stn = datatemp[vars[v]]
    del datatemp

    # load corrected/merged reanalysis data at all station points (those are totally independent with station observations)
    # and find the best choice
    datatemp = np.load(file_corrmerge_stn[v])
    reamerge_stn_all = datatemp['reamerge_stn']
    reacorr_stn_all = datatemp['reacorr_stn']
    reanum, nstn, ntimes = np.shape(reacorr_stn_all)
    del datatemp
    reafinal_stn = np.nan * np.zeros([nstn, ntimes], dtype=np.float32)
    for m in range(12):
        indm =  (date_number['mm'] == m + 1)
        rearmse = np.zeros([nstn, reanum + 1])
        rearmse[:, 0] = calmetric(reamerge_stn_all[:, indm], observation_stn[:, indm], metname='RMSE')
        for i in range(reanum):
            rearmse[:, i + 1] = calmetric(reacorr_stn_all[i, :, indm].T, observation_stn[:, indm], metname='RMSE')
        bestchoice = np.argmin(rearmse, axis=1)
        for i in range(nstn):
            if bestchoice[i] > 0:
                reafinal_stn[i, indm] = reacorr_stn_all[bestchoice[i] - 1, i, indm]
            else:
                reafinal_stn[i, indm] = reamerge_stn_all[i, indm]

    # load near station information
    datatemp = np.load(near_file_GMET)
    if vars[v] == 'prcp':
        near_loc = datatemp['near_stn_prcpLoc']
        near_weight = datatemp['near_stn_prcpWeight']
    else:
        near_loc = datatemp['near_stn_tempLoc']
        near_weight = datatemp['near_stn_tempWeight']
    del datatemp


OI merge at stations: prcp


In [59]:
    # start OI merging
    oimerge_stn = np.zeros([nstn, ntimes])
    for m in range(12):
#         print('month', m + 1)
        indm = (date_number['mm'] > m + 1)
        nday = sum(indm)

        # use optimal interpolation to get OI-merged estimate at each station points
        for i in range(4000,4001):
#             if np.mod(i,2000)==0:
#                 print('station',i,nstn)

            if not np.mod(i, 5)==0:
                continue

            if np.isnan(observation_stn[i, 0]):
                continue

            near_loci = near_loc[i, :]
            near_loci = near_loci[near_loci > -1]

            b_tar = reafinal_stn[i, indm]
            o_tar = regression_stn[i, indm]
            b_near = reafinal_stn[near_loci,:][:, indm]
            o_near = regression_stn[near_loci,:][:, indm]

            tar_err_b = b_tar - observation_stn[i, indm]
            near_err_b = b_near - observation_stn[near_loci,:][:, indm]
            near_err_o = o_near - observation_stn[near_loci,:][:, indm]
            weight = OImerge(tar_err_b[31:], near_err_b[:,31:], near_err_o[:,31:], eye_o=0)
            if np.any(np.isnan(weight)) or np.any(abs(weight) > 2):
                weight = near_weight[i, 0:len(near_loci)]
                weight = weight / np.sum(weight)

            diff = o_near - b_near
            merge_est = b_tar.copy()
            for id in range(nday):
                merge_est[id] = merge_est[id] + np.dot(weight, diff[:, id])

            oimerge_stn[i, indm] = merge_est

In [60]:
print(au.metric(observation_stn[i,366:],regression_stn[i,366:]))
print(au.metric(observation_stn[i,366:],reafinal_stn[i,366:]))
print(au.metric(observation_stn[i,366:],oimerge_stn[i,366:]))

[0.62822921 1.21617925 2.6009481  5.28557121]
[0.43289948 0.87695855 2.91936135 6.61388205]
[0.64924751 1.19372947 2.57689691 5.06135172]


In [51]:
print(au.metric(observation_stn[i,366:],regression_stn[i,366:]))
print(au.metric(observation_stn[i,366:],reafinal_stn[i,366:]))
print(au.metric(observation_stn[i,366:],oimerge_stn[i,366:]))

[0.62822921 1.21617925 2.6009481  5.28557121]
[0.43289948 0.87695855 2.91936135 6.61388205]
[0.65349947 1.20496713 2.57704736 5.0521345 ]
