In [1]:
import platform
import sys
import datetime
import netCDF4
import numpy as np
import importlib
import time
from tqdm import tqdm

base_FP = '/home/subin/data'
cpuserver_data_FP = '/home/subin/cpuserver_data'
sys.path.append(base_FP + '/python_modules')

import develop_HydroAI.HydroAI.TC_like as hTCL
import develop_HydroAI.HydroAI.Vectorization as hVec
import matplotlib.pyplot as plt
importlib.reload(hTCL);
importlib.reload(hVec);

import warnings
warnings.filterwarnings("ignore")
# Define your directory where to save nc files
nc_save_dir = cpuserver_data_FP + '/extracted_nc'

### New Functions for TCA

In [12]:
def ETC_vec(X, Y, Z, nod_th=30, corr_th=0):
    '''Edit TCA_vec_old without scaling the data'''

    # 0. check the NaN and fill with NaN if any of X,Y, and Z value is nan.
    combined_nan_mask = np.isnan(X) | np.isnan(Y) | np.isnan(Z)
    X[combined_nan_mask] = np.nan
    Y[combined_nan_mask] = np.nan
    Z[combined_nan_mask] = np.nan

    # 1. calculation for the originial data
    # Calculate covariance
    cov_corr_results = hVec.cov_corr_three(X, Y, Z)
    covXY = cov_corr_results.get('covXY')
    covXZ = cov_corr_results.get('covXZ')
    covYZ = cov_corr_results.get('covYZ')

    corrXY = cov_corr_results.get('corrXY')
    corrXZ = cov_corr_results.get('corrXZ')
    corrYZ = cov_corr_results.get('corrYZ')

    # 2. Vectorized TC calculatio
    # No-Scale the data
    Xs = X 
    Ys = Y
    Zs = Z
    X=[]; Y=[]; Z=[];

    # Calculate covariance with scaled Xs, Ys, and Zs
    cov_corr_results_s = hVec.cov_corr_three(Xs, Ys, Zs)

    covXXs = cov_corr_results_s.get('covXX')
    covYYs = cov_corr_results_s.get('covYY')
    covZZs = cov_corr_results_s.get('covZZ')
    covXYs = cov_corr_results_s.get('covXY')
    covXZs = cov_corr_results_s.get('covXZ')
    covYZs = cov_corr_results_s.get('covYZ')

    # Calculate correlation with scaled Xs, Ys, and Zs
    corrXYs = cov_corr_results_s.get('corrXYs')
    corrXZs = cov_corr_results_s.get('corrXZs')
    corrYZs = cov_corr_results_s.get('corrYZs')

    var_Xserr = covXXs - covXYs*covXZs/covYZs
    var_Yserr = covYYs - covXYs*covYZs/covXZs
    var_Zserr = covZZs - covXZs*covYZs/covXYs

    # Calcuate TC numbers
    SNR_Xs =  (covXYs * covXZs / covYZs) / var_Xserr
    SNR_Ys =  (covXYs * covYZs / covXZs) / var_Yserr
    SNR_Zs =  (covXZs * covYZs / covXYs) / var_Zserr

    R_XYs = 1 / ((1 + 1/SNR_Xs) * (1 + 1/SNR_Ys)) ** 0.5
    R_XZs = 1 / ((1 + 1/SNR_Xs) * (1 + 1/SNR_Zs)) ** 0.5
    R_YZs = 1 / ((1 + 1/SNR_Ys) * (1 + 1/SNR_Zs)) ** 0.5

    fMSE_Xs = 1 / (1 + SNR_Xs)
    fMSE_Ys = 1 / (1 + SNR_Ys)
    fMSE_Zs = 1 / (1 + SNR_Zs)

    R_XXs = 1 / (1 + 1/SNR_Xs)
    R_YYs = 1 / (1 + 1/SNR_Ys)
    R_ZZs = 1 / (1 + 1/SNR_Zs)

    SNRdb_Xs = 10 * np.log10(SNR_Xs)
    SNRdb_Ys = 10 * np.log10(SNR_Ys)
    SNRdb_Zs = 10 * np.log10(SNR_Zs)

    VAR_err = {'x': var_Xserr, 'y': var_Yserr, 'z': var_Zserr}
    SNR = {'x': SNR_Xs, 'y': SNR_Ys, 'z': SNR_Zs}
    SNRdb = {'x': SNRdb_Xs, 'y': SNRdb_Ys, 'z': SNRdb_Zs}
    R = {'x': R_XXs, 'y': R_YYs, 'z': R_ZZs}
    fMSE = {'x': fMSE_Xs, 'y': fMSE_Ys, 'z': fMSE_Zs}

    # 3. set the flags
    # flag on non-scaled data
    condition_corr = (corrXY < corr_th) | (corrXZ < corr_th) | (corrYZ < corr_th) #flag 1

    # falg on scaled data
    condition_n_valid = np.sum(~np.isnan(Xs) & ~np.isnan(Ys) & ~np.isnan(Zs), axis=2) < nod_th #flag 2
    condition_fMSE = (fMSE_Xs < 0) | (fMSE_Ys < 0) | (fMSE_Zs < 0) | (fMSE_Xs > 1) | (fMSE_Ys > 1) | (fMSE_Zs > 1) #flag 3
    condition_negative_vars_err  = (var_Xserr < 0) | (var_Yserr < 0) | (var_Zserr < 0) #flag 4

    flags = {'condition_corr': condition_corr,
            'condition_n_valid': condition_n_valid,
            'condition_fMSE': condition_fMSE,
            'condition_negative_vars_err': condition_negative_vars_err}

    return VAR_err, SNR, SNRdb, R, fMSE, flags


In [13]:
def TCA_vec(X, Y, Z, nod_th=30, corr_th=0):
    original_shape = X.shape[:-1]  # Get the shape without the last dimension
    original_size = np.prod(original_shape)

    # 0. Check for NaNs and ensure that any NaNs are consistent across datasets
    combined_nan_mask = np.isnan(X) | np.isnan(Y) | np.isnan(Z)
    X[combined_nan_mask] = np.nan
    Y[combined_nan_mask] = np.nan
    Z[combined_nan_mask] = np.nan

    # Flatten the data to 2D arrays
    X_flat = X.reshape(-1, X.shape[-1])
    Y_flat = Y.reshape(-1, Y.shape[-1])
    Z_flat = Z.reshape(-1, Z.shape[-1])

    # Initialize arrays to store results
    errVar_ETC = np.full((original_size, 3), np.nan)
    rho2_ETC = np.full((original_size, 3), np.nan)
    SNR = np.full((original_size, 3), np.nan)
    SNRdb = np.full((original_size, 3), np.nan)

    # Iterate over each time series (row)
    for i in range(original_size):
        y = np.column_stack((X_flat[i], Y_flat[i], Z_flat[i]))  # Shape: (T, 3)

        # Remove NaNs
        y = y[~np.isnan(y).any(axis=1)]

        # Check that there are enough valid observations
        if y.shape[0] < nod_th:
            continue  # Skip if not enough data

        # Compute covariance matrix
        Q_hat = np.cov(y, rowvar=False)

        # Ensure covariance matrix is full rank
        if np.linalg.matrix_rank(Q_hat) < 3:
            continue  # Skip if covariance matrix is singular

        # Calculate correlation coefficients
        try:
            rho_ETC = np.zeros(3)
            rho_ETC[0] = np.sqrt(Q_hat[0,1]*Q_hat[0,2]/(Q_hat[0,0]*Q_hat[1,2]))
            rho_ETC[1] = np.sign(Q_hat[0,2]*Q_hat[1,2]) * np.sqrt(Q_hat[0,1]*Q_hat[1,2]/(Q_hat[1,1]*Q_hat[0,2]))
            rho_ETC[2] = np.sign(Q_hat[0,1]*Q_hat[1,2]) * np.sqrt(Q_hat[0,2]*Q_hat[1,2]/(Q_hat[2,2]*Q_hat[0,1]))

            rho2_ETC[i, :] = rho_ETC**2

            # Calculate error variances
            errVar_ETC[i, 0] = Q_hat[0,0] - Q_hat[0,1]*Q_hat[0,2]/Q_hat[1,2]
            errVar_ETC[i, 1] = Q_hat[1,1] - Q_hat[0,1]*Q_hat[1,2]/Q_hat[0,2]
            errVar_ETC[i, 2] = Q_hat[2,2] - Q_hat[0,2]*Q_hat[1,2]/Q_hat[0,1]

            # Calculate SNR
            SNR[i, :] = (rho2_ETC[i, :]) / (1 - rho2_ETC[i, :])

            # Calculate SNR in dB
            SNRdb[i, :] = 10 * np.log10(SNR[i, :])

        except Exception as e:
            # Handle any mathematical errors (e.g., division by zero)
            continue

    # Reshape results back to original shape
    VAR_err = {
        'x': errVar_ETC[:, 0].reshape(original_shape),
        'y': errVar_ETC[:, 1].reshape(original_shape),
        'z': errVar_ETC[:, 2].reshape(original_shape)
    }
    SNR = {
        'x': SNR[:, 0].reshape(original_shape),
        'y': SNR[:, 1].reshape(original_shape),
        'z': SNR[:, 2].reshape(original_shape)
    }
    SNRdb = {
        'x': SNRdb[:, 0].reshape(original_shape),
        'y': SNRdb[:, 1].reshape(original_shape),
        'z': SNRdb[:, 2].reshape(original_shape)
    }
    R = {
        'x': rho2_ETC[:, 0].reshape(original_shape),
        'y': rho2_ETC[:, 1].reshape(original_shape),
        'z': rho2_ETC[:, 2].reshape(original_shape)
    }
    fMSE = {
        'x': (1 - rho2_ETC[:, 0]).reshape(original_shape),
        'y': (1 - rho2_ETC[:, 1]).reshape(original_shape),
        'z': (1 - rho2_ETC[:, 2]).reshape(original_shape)
    }

    # Set the flags
    condition_corr = ((R['x'] < corr_th) | (R['y'] < corr_th) | (R['z'] < corr_th))
    condition_n_valid = np.sum(~np.isnan(X) & ~np.isnan(Y) & ~np.isnan(Z), axis=-1) < nod_th
    condition_fMSE = ((fMSE['x'] < 0) | (fMSE['y'] < 0) | (fMSE['z'] < 0) | 
                      (fMSE['x'] > 1) | (fMSE['y'] > 1) | (fMSE['z'] > 1))
    condition_negative_vars_err = (errVar_ETC < 0).any(axis=1).reshape(original_shape)

    flags = {
        'condition_corr': condition_corr,
        'condition_n_valid': condition_n_valid,
        'condition_fMSE': condition_fMSE,
        'condition_negative_vars_err': condition_negative_vars_err
    }

    return VAR_err, SNR, SNRdb, R, fMSE, flags

In [14]:
def TCA_vec_cannot_calculate_nan(X, Y, Z, nod_th=30, corr_th=0):
    '''New developed TCA_vec function but cannot calculate when there is NaN in the data'''
    original_shape = X.shape[:-1]  # Get the shape without the last dimension

    # 0. Check for NaNs and ensure that any NaNs are consistent across datasets
    combined_nan_mask = np.isnan(X) | np.isnan(Y) | np.isnan(Z)
    X[combined_nan_mask] = np.nan
    Y[combined_nan_mask] = np.nan
    Z[combined_nan_mask] = np.nan

    # Remove completely NaN slices (if data is 3D)
    valid_mask = ~np.isnan(X).all(axis=2) & ~np.isnan(Y).all(axis=2) & ~np.isnan(Z).all(axis=2)
    X = X[valid_mask]
    Y = Y[valid_mask]
    Z = Z[valid_mask]

    # 1. Flatten the data to 2D arrays if necessary
    X_flat = X.reshape(-1, X.shape[-1])
    Y_flat = Y.reshape(-1, Y.shape[-1])
    Z_flat = Z.reshape(-1, Z.shape[-1])

    # Remove rows with NaNs
    valid_rows = ~np.isnan(X_flat).any(axis=1) & ~np.isnan(Y_flat).any(axis=1) & ~np.isnan(Z_flat).any(axis=1)
    X_flat = X_flat[valid_rows]
    Y_flat = Y_flat[valid_rows]
    Z_flat = Z_flat[valid_rows]

    # Combine the data into a single array
    data = np.stack((X_flat, Y_flat, Z_flat), axis=1)  # Shape: (N, 3, T)

    # Initialize arrays to store results
    N = data.shape[0]
    errVar_ETC = np.zeros((N, 3))
    rho2_ETC = np.zeros((N, 3))
    SNR = np.zeros((N, 3))
    SNRdb = np.zeros((N, 3))

    # Iterate over each time series (row)
    for i in range(N):
        y = data[i, :, :].T  # Shape: (T, 3)

        # Check that there are enough valid observations
        if y.shape[0] < nod_th:
            continue  # Skip if not enough data

        # Remove any remaining NaNs
        if np.isnan(y).any():
            continue  # Skip if there are NaNs

        # Compute covariance matrix
        Q_hat = np.cov(y, rowvar=False)

        # Ensure covariance matrix is full rank
        if np.linalg.matrix_rank(Q_hat) < 3:
            continue  # Skip if covariance matrix is singular

        # Calculate correlation coefficients
        try:
            rho_ETC = np.zeros(3)
            rho_ETC[0] = np.sqrt(Q_hat[0,1]*Q_hat[0,2]/(Q_hat[0,0]*Q_hat[1,2]))
            rho_ETC[1] = np.sign(Q_hat[0,2]*Q_hat[1,2]) * np.sqrt(Q_hat[0,1]*Q_hat[1,2]/(Q_hat[1,1]*Q_hat[0,2]))
            rho_ETC[2] = np.sign(Q_hat[0,1]*Q_hat[1,2]) * np.sqrt(Q_hat[0,2]*Q_hat[1,2]/(Q_hat[2,2]*Q_hat[0,1]))

            rho2_ETC[i, :] = rho_ETC**2

            # Calculate error variances
            errVar_ETC[i, 0] = Q_hat[0,0] - Q_hat[0,1]*Q_hat[0,2]/Q_hat[1,2]
            errVar_ETC[i, 1] = Q_hat[1,1] - Q_hat[0,1]*Q_hat[1,2]/Q_hat[0,2]
            errVar_ETC[i, 2] = Q_hat[2,2] - Q_hat[0,2]*Q_hat[1,2]/Q_hat[0,1]

            # Calculate SNR
            SNR[i, :] = (rho2_ETC[i, :]) / (1 - rho2_ETC[i, :])

            # Calculate SNR in dB
            SNRdb[i, :] = 10 * np.log10(SNR[i, :])

        except Exception as e:
            # Handle any mathematical errors (e.g., division by zero)
            continue

    # Prepare outputs
    
    VAR_err = {
        'x': errVar_ETC[:, 0].reshape(original_shape),
        'y': errVar_ETC[:, 1].reshape(original_shape),
        'z': errVar_ETC[:, 2].reshape(original_shape)
    }
    SNR = {
        'x': SNR[:, 0].reshape(original_shape),
        'y': SNR[:, 1].reshape(original_shape),
        'z': SNR[:, 2].reshape(original_shape)
    }
    SNRdb = {
        'x': SNRdb[:, 0].reshape(original_shape),
        'y': SNRdb[:, 1].reshape(original_shape),
        'z': SNRdb[:, 2].reshape(original_shape)
    }
    R = {
        'x': rho2_ETC[:, 0].reshape(original_shape),
        'y': rho2_ETC[:, 1].reshape(original_shape),
        'z': rho2_ETC[:, 2].reshape(original_shape)
    }
    fMSE = {
        'x': (1 - rho2_ETC[:, 0]).reshape(original_shape),
        'y': (1 - rho2_ETC[:, 1]).reshape(original_shape),
        'z': (1 - rho2_ETC[:, 2]).reshape(original_shape)
    }

    # 3. Set the flags
    condition_corr = ((R['x'] < corr_th) | (R['y'] < corr_th) | (R['z'] < corr_th)).reshape(original_shape)
    condition_n_valid = np.sum(~np.isnan(X) & ~np.isnan(Y) & ~np.isnan(Z), axis=-1) < nod_th
    condition_fMSE = ((fMSE['x'] < 0) | (fMSE['y'] < 0) | (fMSE['z'] < 0) | 
                      (fMSE['x'] > 1) | (fMSE['y'] > 1) | (fMSE['z'] > 1)).reshape(original_shape)
    condition_negative_vars_err = (errVar_ETC < 0).any(axis=1).reshape(original_shape)

    flags = {
        'condition_corr': condition_corr,
        'condition_n_valid': condition_n_valid,
        'condition_fMSE': condition_fMSE,
        'condition_negative_vars_err': condition_negative_vars_err
    }

    return VAR_err, SNR, SNRdb, R, fMSE, flags

In [15]:
def ETC(D1, D2, D3, nod_th=30, corr_th=0):
    """
    Extended Triple Collocation (ETC) is a technique for estimating the
    variance of the noise error (errVar) and correlation coefficients (rho)
    of three measurement systems (e.g., satellite, in-situ, and model-based products)
    with respect to the unknown true value of the variable being measured
    (e.g., soil moisture, wind speed).

    INPUTS
    D1, D2, D3: Arrays of observations from the three measurement systems.
    They must be of the same length, and all NaNs must be removed or handled appropriately.

    OUTPUTS
    errVar_ETC: A list of error variances [errVar_D1, errVar_D2, errVar_D3].
    rho2_ETC: A list of squared correlation coefficients [rho2_D1, rho2_D2, rho2_D3].

    REFERENCE
    McColl, K.A., J. Vogelzang, A.G. Konings, D. Entekhabi, M. Piles, A. Stoffelen (2014).
    Extended Triple Collocation: Estimating errors and correlation coefficients with respect
    to an unknown target. Geophysical Research Letters 41:6229-6236.
    """
    # Convert inputs to numpy arrays
    D1 = np.asarray(D1)
    D2 = np.asarray(D2)
    D3 = np.asarray(D3)

    # Check that all inputs have the same length
    if not (len(D1) == len(D2) == len(D3)):
        raise ValueError('Error: Input data D1, D2, D3 must be of the same length.')

    # Combine the data into a single array
    y = np.column_stack((D1, D2, D3))  # Shape: (N, 3)

    # Remove any rows with NaNs
    if np.isnan(y).any():
        y = y[~np.isnan(y).any(axis=1)]

    # Catch errors in inputs
    if y.shape[1] != 3:
        raise ValueError('Error: Input data must result in an N x 3 array after removing NaNs.')

    if y.size == 0:
        raise ValueError('Error: No data left after removing NaNs.')

    # Check that each column has non-zero variance
    if np.var(y[:, 0]) == 0 or np.var(y[:, 1]) == 0 or np.var(y[:, 2]) == 0:
        raise ValueError('Error: The sample variance of each dataset must be non-zero.')

    # Estimate covariance matrix of the three measurement systems
    Q_hat = np.cov(y, rowvar=False)

    # Compute correlation coefficients
    rho_ETC = np.zeros(3)

    try:
        rho_ETC[0] = np.sqrt(Q_hat[0, 1] * Q_hat[0, 2] / (Q_hat[0, 0] * Q_hat[1, 2]))
        rho_ETC[1] = np.sign(Q_hat[0, 2] * Q_hat[1, 2]) * np.sqrt(Q_hat[0, 1] * Q_hat[1, 2] / (Q_hat[1, 1] * Q_hat[0, 2]))
        rho_ETC[2] = np.sign(Q_hat[0, 1] * Q_hat[1, 2]) * np.sqrt(Q_hat[0, 2] * Q_hat[1, 2] / (Q_hat[2, 2] * Q_hat[0, 1]))
    except (ZeroDivisionError, FloatingPointError, ValueError):
        raise ValueError('Error: Calculation of correlation coefficients failed due to invalid covariance values.')

    rho2_ETC = rho_ETC ** 2

    # Compute error variances
    errVar_ETC = np.zeros(3)
    errVar_ETC[0] = Q_hat[0, 0] - (Q_hat[0, 1] * Q_hat[0, 2]) / Q_hat[1, 2]
    errVar_ETC[1] = Q_hat[1, 1] - (Q_hat[0, 1] * Q_hat[1, 2]) / Q_hat[0, 2]
    errVar_ETC[2] = Q_hat[2, 2] - (Q_hat[0, 2] * Q_hat[1, 2]) / Q_hat[0, 1]

    # Check for negative error variances
    if np.any(errVar_ETC < 0):
        print('Warning: At least one calculated errVar is negative. This can happen if the sample size is too small or if one of the assumptions of ETC is violated.')

    # Check for negative squared correlation coefficients
    if np.any(rho2_ETC < 0):
        print('Warning: At least one calculated squared correlation coefficient is negative. This can happen if the sample size is too small or if one of the assumptions of ETC is violated.')

    # ======================================================
    # Computer SNR
    SNR = np.zeros(3)
    SNRdb = np.zeros(3)

    SNR[0] = (rho2_ETC[0]) / (1 - rho2_ETC[0])
    SNRdb[0] = 10 * np.log10(SNR[0])
    SNR[1] = (rho2_ETC[1]) / (1 - rho2_ETC[1])
    SNRdb[1] = 10 * np.log10(SNR[1])
    SNR[2] = (rho2_ETC[2]) / (1 - rho2_ETC[2])
    SNRdb[2] = 10 * np.log10(SNR[2])

    # Prepare outputs
    VAR_err = {
        'x': errVar_ETC[0],
        'y': errVar_ETC[1],
        'z': errVar_ETC[2]
    }
    SNR = {
        'x': SNR[0],
        'y': SNR[1],
        'z': SNR[2]
    }
    SNRdb = {
        'x': SNRdb[0],
        'y': SNRdb[1],
        'z': SNRdb[2]
    }
    R = {
        'x': rho2_ETC[0],
        'y': rho2_ETC[1],
        'z': rho2_ETC[2]
    }
    fMSE = {
        'x': 1 - rho2_ETC[0],
        'y': 1 - rho2_ETC[1],
        'z': 1 - rho2_ETC[2]
    }
    # Set the flags
    # Flag on non-scaled data (not necessary here as we didn't calculate corrXY)
    # flags on correlation coefficients
    condition_corr = (R['x'] < corr_th) | (R['y'] < corr_th) | (R['z'] < corr_th)  # flag 1
    # flag on valid number of observations
    condition_n_valid = np.sum(~np.isnan(D1) & ~np.isnan(D2) & ~np.isnan(D3), axis=0) < nod_th # flag 2 (axis=0 is due to 1D array
    # flags on fMSE
    condition_fMSE = (fMSE['x'] < 0) | (fMSE['y'] < 0) | (fMSE['z'] < 0) | (fMSE['x'] > 1) | (fMSE['y'] > 1) | (fMSE['z'] > 1)  # flag 3
    # flag on negative error variances
    condition_negative_vars_err = (errVar_ETC < 0).any(axis=0)  # flag 4 (axis=0 is due to 1D array)

    flags = {'condition_corr': condition_corr,
            'condition_n_valid': condition_n_valid,
            'condition_fMSE': condition_fMSE,
            'condition_negative_vars_err': condition_negative_vars_err}

    return VAR_err, SNR, SNRdb, R, fMSE, flags

### Check values by switching X and Y

In [2]:
import numpy as np

np.random.seed(0)  # For reproducibility

N = 1000  # Number of data points

# Generate the true signal T
T = np.random.normal(0, 1, 4*N)  # Mean 0, standard deviation 1

# Generate errors for each dataset
e1 = np.random.normal(0, 0.5, 4*N)  # Error variance 0.25
e2 = np.random.normal(0, 0.7, 4*N)  # Error variance 0.49
e3 = np.random.normal(0, 0.9, 4*N)  # Error variance 0.81

# Generate the datasets
D1 = T + e1
D2 = T + e2
D3 = T + e3

In [3]:
# Reshape data to 3D arrays with shape (1, 1, N)
X = D1.reshape(2, 2, N)
Y = D2.reshape(2, 2, N)
Z = D3.reshape(2, 2, N)

In [118]:
# Previous TCA_vec_old code
VAR_err_TCA_old, SNR_TCA_old, SNRdb_TCA_old, R_TCA_old, fMSE_TCA_old, flags_TCA_old = hTCL.TCA_vec_old(X, Y, Z)
print('\n1. Old version (scaled)\n', VAR_err_TCA_old['x'])
print('\n2. Old version (scaled)\n', VAR_err_TCA_old['y'])
print('\n3. Old version (scaled)\n', VAR_err_TCA_old['z'])

VAR_err_TCA_old, SNR_TCA_old, SNRdb_TCA_old, R_TCA_old, fMSE_TCA_old, flags_TCA_old = hTCL.TCA_vec_old(Y, X, Z)
print('\n1. Old version (scaled, switched X and Y)\n', VAR_err_TCA_old['x'])
print('\n2. Old version (scaled, switched X and Y)\n', VAR_err_TCA_old['y'])
print('\n3. Old version (scaled, switched X and Y)\n', VAR_err_TCA_old['z'])



1. Old version (scaled)
 [[0.26481391 0.18906668]
 [0.28502206 0.26616256]]

2. Old version (scaled)
 [[0.44447355 0.62894485]
 [0.49474468 0.47151674]]

3. Old version (scaled)
 [[0.8314466  0.8088448 ]
 [0.74024721 0.77950874]]

1. Old version (scaled, switched X and Y)
 [[0.46120713 0.53868863]
 [0.46783316 0.47956407]]

2. Old version (scaled, switched X and Y)
 [[0.27478365 0.16193482]
 [0.26951835 0.27070513]]

3. Old version (scaled, switched X and Y)
 [[0.86274897 0.69277219]
 [0.69998163 0.79281253]]


In [8]:
print('This function is renamed as ETC_vec')
VAR_err_TCA_old, SNR_TCA_old, SNRdb_TCA_old, R_TCA_old, fMSE_TCA_old, flags_TCA_old = hTCL.ETC_vec(X, Y, Z)
print('\n1.Old version (unscaled)\n', VAR_err_TCA_old['x'])
print('\n2. Old version (unscaled)\n', VAR_err_TCA_old['y'])
print('\n3.Old version (unscaled)\n', VAR_err_TCA_old['z'])

VAR_err_TCA_old, SNR_TCA_old, SNRdb_TCA_old, R_TCA_old, fMSE_TCA_old, flags_TCA_old = hTCL.ETC_vec(Y, X, Z)
print('\n1. Old version (unscaled, switched X and Y)\n', VAR_err_TCA_old['x'])
print('\n2. Old version (unscaled, switched X and Y)\n', VAR_err_TCA_old['y'])
print('\n3. Old version (unscaled, switched X and Y)\n', VAR_err_TCA_old['z'])


This function is renamed as ETC_vec

1.Old version (unscaled)
 [[0.26481391 0.18906668]
 [0.28502206 0.26616256]]

2. Old version (unscaled)
 [[0.46120713 0.53868863]
 [0.46783316 0.47956407]]

3.Old version (unscaled)
 [[0.8428336  0.77998994]
 [0.74911562 0.81002849]]

1. Old version (unscaled, switched X and Y)
 [[0.46120713 0.53868863]
 [0.46783316 0.47956407]]

2. Old version (unscaled, switched X and Y)
 [[0.26481391 0.18906668]
 [0.28502206 0.26616256]]

3. Old version (unscaled, switched X and Y)
 [[0.8428336  0.77998994]
 [0.74911562 0.81002849]]


In [5]:
# Assume TCA_vec is already defined as per the modified code
VAR_err_TCA, SNR_TCA, SNRdb_TCA, R_TCA, fMSE_TCA, flags_TCA = hTCL.TCA_vec(X, Y, Z)
print('\n1. New version\n', VAR_err_TCA['x'])
print('\n2. New version\n', VAR_err_TCA['y'])
print('\n3. New version\n', VAR_err_TCA['z'])

# Assume TCA_vec is already defined as per the modified code
VAR_err_TCA2, SNR_TCA2, SNRdb_TCA2, R_TCA2, fMSE_TCA2, flags_TCA2 = hTCL.TCA_vec(Y, X, Z)
print('\n1. New version (Switched X and Y)\n', VAR_err_TCA2['x'])
print('\n2. New version (Switched X and Y)\n', VAR_err_TCA2['y'])
print('\n3. New version (Switched X and Y)\n', VAR_err_TCA2['z'])


1. New version
 [[0.26481391 0.18906668]
 [0.28502206 0.26616256]]

2. New version
 [[0.46120713 0.53868863]
 [0.46783316 0.47956407]]

3. New version
 [[0.8428336  0.77998994]
 [0.74911562 0.81002849]]

1. New version (Switched X and Y)
 [[0.46120713 0.53868863]
 [0.46783316 0.47956407]]

2. New version (Switched X and Y)
 [[0.26481391 0.18906668]
 [0.28502206 0.26616256]]

3. New version (Switched X and Y)
 [[0.8428336  0.77998994]
 [0.74911562 0.81002849]]


In [6]:
errVar_ETC, SNR_ETC, SNRdb_ETC, R_ETC, fMSE_ETC, flags_ETC = hTCL.ETC(D1, D2, D3)
print('\nETC\n', errVar_ETC['x'])
print('\nETC\n', errVar_ETC['y'])
print('\nETC\n', errVar_ETC['z'])
#print(D1[:10])
#print(D2[:10])

errVar_ETC, SNR_ETC, SNRdb_ETC, R_ETC, fMSE_ETC, flags_ETC = hTCL.ETC(D2, D1, D3)
print('\nETC (Switched D1 and D2)\n', errVar_ETC['x'])
print('\nETC (Switched D1 and D2)\n', errVar_ETC['y'])
print('\nETC (Switched D1 and D2)\n', errVar_ETC['z'])

#print(D1[:10])
#print(D2[:10])


ETC
 0.2522540373117278

ETC
 0.48744442691353373

ETC
 0.794874726108981

ETC (Switched D1 and D2)
 0.48744442691353373

ETC (Switched D1 and D2)
 0.2522540373117278

ETC (Switched D1 and D2)
 0.794874726108981


In [7]:
ETC_total_x = []
ETC_total_y = []
ETC_total_z = []
# Assume ETC is already defined as per the provided code
for i in range(2):
    for j in range(2):
        errVar_ETC, SNR_ETC, SNRdb_ETC, R_ETC, fMSE_ETC, flags_ETC = hTCL.ETC(X[i, j, :], Y[i, j, :], Z[i, j, :])
        ETC_total_x.append(errVar_ETC['x'])
        ETC_total_y.append(errVar_ETC['y'])
        ETC_total_z.append(errVar_ETC['z'])

print('\n1. ETC\n', np.array(ETC_total_x).reshape(2,2))
print('\n2. ETC\n', np.array(ETC_total_y).reshape(2,2))
print('\n3. ETC\n', np.array(ETC_total_z).reshape(2,2))

# Switch test
ETC_total_x = []
ETC_total_y = []
ETC_total_z = []
# Assume ETC is already defined as per the provided code
for i in range(2):
    for j in range(2):
        errVar_ETC, SNR_ETC, SNRdb_ETC, R_ETC, fMSE_ETC, flags_ETC = hTCL.ETC(Y[i, j, :], X[i, j, :], Z[i, j, :])
        ETC_total_x.append(errVar_ETC['x'])
        ETC_total_y.append(errVar_ETC['y'])
        ETC_total_z.append(errVar_ETC['z'])

print('\n1. ETC (Switched X and Y)\n', np.array(ETC_total_x).reshape(2,2))      
print('\n2. ETC (Switched X and Y)\n', np.array(ETC_total_y).reshape(2,2))
print('\n3. ETC (Switched X and Y)\n', np.array(ETC_total_z).reshape(2,2))


1. ETC
 [[0.26481391 0.18906668]
 [0.28502206 0.26616256]]

2. ETC
 [[0.46120713 0.53868863]
 [0.46783316 0.47956407]]

3. ETC
 [[0.8428336  0.77998994]
 [0.74911562 0.81002849]]

1. ETC (Switched X and Y)
 [[0.46120713 0.53868863]
 [0.46783316 0.47956407]]

2. ETC (Switched X and Y)
 [[0.26481391 0.18906668]
 [0.28502206 0.26616256]]

3. ETC (Switched X and Y)
 [[0.8428336  0.77998994]
 [0.74911562 0.81002849]]


### Check efficiency

In [20]:
import time
import numpy as np

# Assuming you have already defined TCA_vec_old, TCA_vec, and ETC functions
# and have your input data X, Y, Z, D1, D2, D3 ready

def time_function(func, *args):
    start_time = time.time()
    result = func(*args)
    end_time = time.time()
    return end_time - start_time, result

# Number of iterations for more accurate timing
n_iterations = 100

# Time TCA_vec_old
total_time_old = 0
for _ in range(n_iterations):
    execution_time, _ = time_function(ETC_vec, X, Y, Z)
    total_time_old += execution_time
avg_time_old = total_time_old / n_iterations

# Time TCA_vec
total_time_new = 0
for _ in range(n_iterations):
    execution_time, _ = time_function(TCA_vec, X, Y, Z)
    total_time_new += execution_time
avg_time_new = total_time_new / n_iterations

# Time ETC
total_time_etc = 0
for _ in range(n_iterations):
    for i in range(2):
        for j in range(2):
            execution_time, _ = time_function(ETC, X[i, j, :], Y[i, j, :], Z[i, j, :])
            total_time_etc += execution_time
avg_time_etc = total_time_etc / n_iterations

print(f"Average execution time for ETC_vec: {avg_time_old:.6f} seconds")
print(f"Average execution time for TCA_vec: {avg_time_new:.6f} seconds")
print(f"Average execution time for ETC: {avg_time_etc:.6f} seconds")


Average execution time for ETC_vec: 0.000560 seconds
Average execution time for TCA_vec: 0.000548 seconds
Average execution time for ETC: 0.000575 seconds


In [17]:
def generate_test_data_with_nans(N, nan_percentage=0.1):
    """
    Generate test datasets with NaN values.
    
    Args:
    N (int): Number of data points
    nan_percentage (float): Percentage of NaN values to include (0 to 1)
    
    Returns:
    X, Y, Z (np.array): 3D arrays with shape (2, 2, N) containing the test data
    """
    np.random.seed(0)  # For reproducibility

    # Generate the true signal T
    T = np.random.normal(0, 1, 4*N)  # Mean 0, standard deviation 1

    # Generate errors for each dataset
    e1 = np.random.normal(0, 0.5, 4*N)  # Error variance 0.25
    e2 = np.random.normal(0, 0.7, 4*N)  # Error variance 0.49
    e3 = np.random.normal(0, 0.9, 4*N)  # Error variance 0.81

    # Generate the datasets
    D1 = T + e1
    D2 = T + e2
    D3 = T + e3

    # Reshape data to 3D arrays with shape (2, 2, N)
    X = D1.reshape(2, 2, N)
    Y = D2.reshape(2, 2, N)
    Z = D3.reshape(2, 2, N)

    # Add NaN values
    nan_mask = np.random.random(X.shape) < nan_percentage
    X[nan_mask] = np.nan
    Y[nan_mask] = np.nan
    Z[nan_mask] = np.nan

    return X, Y, Z

# Generate new test data with NaN values
N = 1000
X_nan, Y_nan, Z_nan = generate_test_data_with_nans(N, nan_percentage=0.1)
print(X_nan[0,0,:10])
print(Y_nan[0,0,:10])
print(Z_nan[0,0,:10])

[ 2.07072693  1.3220072   1.11428348  2.80911701  0.99839206 -0.62371051
  0.96528148         nan  0.33061376 -0.71752656]
[ 2.46254368  0.58221372  1.62628391  3.2741463   0.55208139 -1.52504786
 -0.03145512         nan -0.90985364  0.47818249]
[ 2.6150099   0.7807884  -0.07937401  2.05652701  2.72840393  0.45024271
  1.66224402         nan -0.70772905 -0.21654611]


In [19]:
# Number of iterations for more accurate timing
n_iterations = 100

# Time TCA_vec_old
total_time_old = 0
for _ in range(n_iterations):
    execution_time, _ = time_function(ETC_vec, X_nan, Y_nan, Z_nan)
    total_time_old += execution_time
avg_time_old = total_time_old / n_iterations

# Time TCA_vec
total_time_new = 0
for _ in range(n_iterations):
    execution_time, _ = time_function(TCA_vec, X_nan, Y_nan, Z_nan)
    total_time_new += execution_time
avg_time_new = total_time_new / n_iterations

# Time ETC
total_time_etc = 0
for _ in range(n_iterations):
    for i in range(2):
        for j in range(2):
            execution_time, _ = time_function(ETC, X_nan[i, j, :], Y_nan[i, j, :], Z_nan[i, j, :])
            total_time_etc += execution_time
avg_time_etc = total_time_etc / n_iterations

print(f"Average execution time for ETC_vec: {avg_time_old:.6f} seconds")
print(f"Average execution time for TCA_vec: {avg_time_new:.6f} seconds")
print(f"Average execution time for ETC: {avg_time_etc:.6f} seconds")

Average execution time for ETC_vec: 0.000634 seconds
Average execution time for TCA_vec: 0.000553 seconds
Average execution time for ETC: 0.000688 seconds
