In [3]:
import platform
import sys
import datetime
import netCDF4
import numpy as np
import importlib
import time
from tqdm import tqdm

base_FP = '/home/subin/data'
cpuserver_data_FP = '/home/subin/cpuserver_data'
sys.path.append(base_FP + '/python_modules')

import develop_HydroAI.HydroAI.TC_like as hTCL
import develop_HydroAI.HydroAI.Vectorization as hVec
import matplotlib.pyplot as plt
importlib.reload(hTCL);
importlib.reload(hVec);

import warnings
warnings.filterwarnings("ignore")

In [4]:
def ETC_vec(X, Y, Z, nod_th=30, corr_th=0):
    '''Edit TCA_vec_old without scaling the data'''

    # 0. check the NaN and fill with NaN if any of X,Y, and Z value is nan.
    combined_nan_mask = np.isnan(X) | np.isnan(Y) | np.isnan(Z)
    X[combined_nan_mask] = np.nan
    Y[combined_nan_mask] = np.nan
    Z[combined_nan_mask] = np.nan

    # 1. calculation for the originial data
    # Calculate covariance
    cov_corr_results = hVec.cov_corr_three(X, Y, Z)
    covXY = cov_corr_results.get('covXY')
    covXZ = cov_corr_results.get('covXZ')
    covYZ = cov_corr_results.get('covYZ')

    corrXY = cov_corr_results.get('corrXY')
    corrXZ = cov_corr_results.get('corrXZ')
    corrYZ = cov_corr_results.get('corrYZ')

    # 2. Vectorized TC calculatio
    # No-Scale the data
    Xs = X 
    Ys = Y
    Zs = Z
    X=[]; Y=[]; Z=[];

    # Calculate covariance with scaled Xs, Ys, and Zs
    cov_corr_results_s = hVec.cov_corr_three(Xs, Ys, Zs)

    covXXs = cov_corr_results_s.get('covXX')
    covYYs = cov_corr_results_s.get('covYY')
    covZZs = cov_corr_results_s.get('covZZ')
    covXYs = cov_corr_results_s.get('covXY')
    covXZs = cov_corr_results_s.get('covXZ')
    covYZs = cov_corr_results_s.get('covYZ')

    # Calculate correlation with scaled Xs, Ys, and Zs
    corrXYs = cov_corr_results_s.get('corrXYs')
    corrXZs = cov_corr_results_s.get('corrXZs')
    corrYZs = cov_corr_results_s.get('corrYZs')

    var_Xserr = covXXs - covXYs*covXZs/covYZs
    var_Yserr = covYYs - covXYs*covYZs/covXZs
    var_Zserr = covZZs - covXZs*covYZs/covXYs

    # Calcuate TC numbers
    SNR_Xs =  (covXYs * covXZs / covYZs) / var_Xserr
    SNR_Ys =  (covXYs * covYZs / covXZs) / var_Yserr
    SNR_Zs =  (covXZs * covYZs / covXYs) / var_Zserr

    R_XYs = 1 / ((1 + 1/SNR_Xs) * (1 + 1/SNR_Ys)) ** 0.5
    R_XZs = 1 / ((1 + 1/SNR_Xs) * (1 + 1/SNR_Zs)) ** 0.5
    R_YZs = 1 / ((1 + 1/SNR_Ys) * (1 + 1/SNR_Zs)) ** 0.5

    fMSE_Xs = 1 / (1 + SNR_Xs)
    fMSE_Ys = 1 / (1 + SNR_Ys)
    fMSE_Zs = 1 / (1 + SNR_Zs)

    R_XXs = 1 / (1 + 1/SNR_Xs)
    R_YYs = 1 / (1 + 1/SNR_Ys)
    R_ZZs = 1 / (1 + 1/SNR_Zs)

    SNRdb_Xs = 10 * np.log10(SNR_Xs)
    SNRdb_Ys = 10 * np.log10(SNR_Ys)
    SNRdb_Zs = 10 * np.log10(SNR_Zs)

    VAR_err = {'x': var_Xserr, 'y': var_Yserr, 'z': var_Zserr}
    SNR = {'x': SNR_Xs, 'y': SNR_Ys, 'z': SNR_Zs}
    SNRdb = {'x': SNRdb_Xs, 'y': SNRdb_Ys, 'z': SNRdb_Zs}
    R = {'x': R_XXs, 'y': R_YYs, 'z': R_ZZs}
    fMSE = {'x': fMSE_Xs, 'y': fMSE_Ys, 'z': fMSE_Zs}

    # 3. set the flags
    # flag on non-scaled data
    condition_corr = (corrXY < corr_th) | (corrXZ < corr_th) | (corrYZ < corr_th) #flag 1

    # falg on scaled data
    condition_n_valid = np.sum(~np.isnan(Xs) & ~np.isnan(Ys) & ~np.isnan(Zs), axis=2) < nod_th #flag 2
    condition_fMSE = (fMSE_Xs < 0) | (fMSE_Ys < 0) | (fMSE_Zs < 0) | (fMSE_Xs > 1) | (fMSE_Ys > 1) | (fMSE_Zs > 1) #flag 3
    condition_negative_vars_err  = (var_Xserr < 0) | (var_Yserr < 0) | (var_Zserr < 0) #flag 4

    flags = {'condition_corr': condition_corr,
            'condition_n_valid': condition_n_valid,
            'condition_fMSE': condition_fMSE,
            'condition_negative_vars_err': condition_negative_vars_err}

    return VAR_err, SNR, SNRdb, R, fMSE, flags

In [5]:
np.random.seed(0)  # For reproducibility

N = 1000  # Number of data points

# Generate the true signal T
T = np.random.normal(0, 1, 4*N)  # Mean 0, standard deviation 1

# Generate errors for each dataset
e1 = np.random.normal(0, 0.5, 4*N)  # Error variance 0.25
e2 = np.random.normal(0, 0.7, 4*N)  # Error variance 0.49
e3 = np.random.normal(0, 0.9, 4*N)  # Error variance 0.81

# Generate the datasets
D1 = T + e1
D2 = T + e2
D3 = T + e3

# Reshape data to 3D arrays with shape (1, 1, N)
X = D1.reshape(2, 2, N)
Y = D2.reshape(2, 2, N)
Z = D3.reshape(2, 2, N)

In [8]:
VAR_err_TCA, SNR_TCA, SNRdb_TCA, R_TCA, fMSE_TCA, flags_TCA = ETC_vec(X, Y, Z)

print('ETC_vec: without scaling')
print('\n1.New version\n', VAR_err_TCA['x'])
print('\n2.New version\n', VAR_err_TCA['y'])
print('\n3.New version\n', VAR_err_TCA['z'])

ETC_vec: without scaling

1.New version
 [[0.26481391 0.18906668]
 [0.28502206 0.26616256]]

2.New version
 [[0.46120713 0.53868863]
 [0.46783316 0.47956407]]

3.New version
 [[0.8428336  0.77998994]
 [0.74911562 0.81002849]]
