# RSCPN

This notebook shows how to solve the problem of corrupted traffic data recovery using RSCPN, the robust extension of SCPN considering unstructured, random anomalies. Note that the parameters used in this example may not necessarily optimal. For an in-depth discussion of RSCPN, please refer to our paper.

## Preparations

In [6]:
import numpy as np

def ten2mat(tensor, mode):
    return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order = 'F')

def mat2ten(mat, tensor_size, mode):
    index = list()
    index.append(mode)
    for i in range(tensor_size.shape[0]):
        if i != mode:
            index.append(i)
    return np.moveaxis(np.reshape(mat, list(tensor_size[index]), order = 'F'), 0, mode)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return  np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def compute_mae(var, var_hat):
    return np.sum(np.abs(var-var_hat)) / var.shape[0]


def Gradient_Descent(lamda, p, sigma, Iter=10):
    '''
    Parameters:
        lamda: 1/rho
        p: hyper-parameter p
        sigma: the i-th singular value of matrix G
        Iter: the number of iterations in solving x*
    Output:
        x*: float type
    '''
    x_k = sigma
    for k in range(Iter):
        x_k1 = sigma - lamda * p * (x_k) ** (p - 1)
        x_k = x_k1

    return x_k

def opt_x(G, lamda, p, tau):
    '''
    Parameters:
        G: 2-d matrix
        lamda: 1/rho
        p: hyper-parameter p
        tau: hyper-parameter tau (compared with singular value)
    Output:
        W: transformed G (with singular matrix changed)
    '''

    [Q, sigma, R] = np.linalg.svd(G, full_matrices=0)
    delta = np.zeros(sigma.shape)  # singular vector

    v = (2 * lamda * (1 - p)) ** (1 / (2 - p))
    v1 = v + lamda * p * (v ** (p - 1))

    for i in range(len(sigma)):
        s = sigma[i]
        if s >= v1:
            x_ = Gradient_Descent(lamda, p, s)
        else:
            x_ = 0

        tau_ = ((1 / (2 * lamda)) * ((x_ - s) ** 2) + x_ ** p) ** (1 / p)
        if tau <= tau_:
            delta[i] = s
        else:
            delta[i] = x_

    return np.matmul(np.matmul(Q, np.diag(delta)), R)

def add_outlier(sparse_tensor, s, γ):
    '''add the outlier corruption the observations (sparse_tensor)
       s: mean magnitude of outliers
       λ: corruption level
    '''
    np.random.seed(1000)
    ## position of obervations
    pos_observe = np.where(sparse_tensor != 0)
    ## copy the observations
    corrsparse_tensor = sparse_tensor.copy()
    ## size of observations
    obser_size = corrsparse_tensor[pos_observe].shape
    ## generate the corruption term
    corr = np.random.uniform(low = -1*s, high= s, size = obser_size)
    ## randomly sampled the sparse corruption term with fraction of γ
    corr_term = np.multiply(corr, np.round(np.random.rand(obser_size[0]) - 0.5 + γ ))
    ## add the sparse corruption to the obervations
    corrsparse_tensor[pos_observe] = corrsparse_tensor[pos_observe] + corr_term
    ## []+
    corrsparse_tensor = np.maximum(corrsparse_tensor,0)
    return corrsparse_tensor

## Main function

In [7]:
def RSCPN(dense_tensor, cor_sparse_tensor, lam, alpha, rho, p, tau, epsilon, pos_missing, pos_test, maxiter=100):
    '''
    Parameters:
        dense_tensor: the original tensor (without mask) ∈ (M * I * J)
        sparse_tensor: the observed tensor (with mask) ∈ (M * I * J)
        alpha: the weight for each tensor mode, [0.3, 0.3, 0.3]
        rho: learning rate
        p: hyper-parameter p
        tau: hypter-parameter tau
        epsilon: the threshold for residual
        maxiter: the maximum iterations, default = 200
    Output:
        return a predicted tensor, the same size with dense_tensor
    '''

    dim = np.array(cor_sparse_tensor.shape)  # dim = [M, I, J]

    #     min-max normalize
    min_value = ten2mat(dense_tensor, 0).min(axis=1)
    max_value = ten2mat(dense_tensor, 0).max(axis=1)
    cor_sparse_tensor = (cor_sparse_tensor - min_value[:, np.newaxis, np.newaxis]) / (
                max_value[:, np.newaxis, np.newaxis] - min_value[:, np.newaxis, np.newaxis])
    cor_sparse_tensor[pos_missing] = 0

    # initialization
    M = cor_sparse_tensor.copy()  # (M * I * J), M = mat2ten(Z, dim, 0)
    X = cor_sparse_tensor.copy()
    X3 = np.zeros(np.insert(dim, 0, len(dim)))  # 4-D tensor
    T3 = np.zeros(np.insert(dim, 0, len(dim)))  # 4-D tenosr
    E = np.zeros(dim)
    E3 = np.zeros(np.insert(dim, 0, len(dim)))
    it = 0

    # update iteratively
    while True:
        rho = min(rho * 1.4, 1e5)

        # update X 4-D tensor
        X_t = X.copy()
        for k in range(len(dim)):
            X3[k] = mat2ten(
                opt_x(ten2mat(M - E3[k] - T3[k] / rho, k), 1 / rho, p, tau[k]),
                dim,
                k)

        # update M
        M[pos_missing] = np.mean(X3 + E3 + T3 / rho, axis=0)[pos_missing]

        # update E
        for k in range(len(dim)):
            H = M - X3[k] - T3[k] / rho
            E3[k] = np.multiply(np.sign(H), np.maximum(0, np.abs(H) - lam / rho))

        X = np.einsum('k, kmnt -> mnt', alpha, X3)  # (M * I * J)
        E = np.einsum('k, kmnt -> mnt', alpha, E3)  # (M * I * J)

        # update T 4-D tensor
        T3 = T3 + rho * (X3 + E3 - np.broadcast_to(M, np.insert(dim, 0, len(dim))))

        # denormalize
        X = X * (max_value[:, np.newaxis, np.newaxis] - min_value[:, np.newaxis, np.newaxis]) + min_value[:, np.newaxis, np.newaxis]
        E = E * (max_value[:, np.newaxis, np.newaxis] - min_value[:, np.newaxis, np.newaxis]) + min_value[:, np.newaxis, np.newaxis]

        # check tolerance
        tol = np.sqrt(np.sum((X - X_t) ** 2)) / np.sqrt(np.sum(X_t ** 2))

        it += 1

        # terminate
        if (tol < epsilon) or (it >= maxiter):
            break

    # accuracy
    print('Total iteration: {}'.format(it + 1))
    print('Imputation MAPE: {:.6}'.format(compute_mape(dense_tensor[pos_test], X[pos_test])))
    print('Imputation RMSE: {:.6}'.format(compute_rmse(dense_tensor[pos_test], X[pos_test])))
    print('Imputation MAE: {:.6}'.format(compute_mae(dense_tensor[pos_test], X[pos_test])))
    print()

    return X, E

## A toy example on PEMS volume dataset (40% random missing scenarios)

In [8]:
import scipy.io

tensor = scipy.io.loadmat('dataset/PEMS08/pems08_288_62_170.mat')

dense_tensor = tensor['T']
dense_tensor = dense_tensor.transpose(2,1,0)
print(dense_tensor.shape)
dim = dense_tensor.shape
np.random.seed(1)

missing_rate = 0.4
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)

pos_missing = np.where(sparse_tensor == 0)  # all missing values, including original missing and mask missing, = (M * IJ)
pos_test = np.where((dense_tensor != 0) & (sparse_tensor == 0))  # only include mask missing, = (M * I * J)

dense_tensor.max()

(170, 62, 288)


1147

In [10]:
for cor_rate in [0.1, 0.2, 0.3, 0.4, 0.5]:
    cor_sparse_tensor = add_outlier(sparse_tensor, 100, cor_rate)
    alpha = np.ones(3) / 3
    p = 0.5
    lamda = 1e5
    rho = 1/lamda
    epsilon = 1e-3
    maxiter = 100
    lam = 1/((dim[0] * dim[2]) ** 0.5)
    if cor_rate in [0.1, 0.2]:
        tau = [10, 10, 10]
    elif cor_rate in [0.3, 0.4]:
        tau = [30, 30, 30]
    else:
        tau = [50, 50, 50]
    
    print('cor_rate = %.2f' % cor_rate)

    tensor_hat, E_hat = RSCPN(dense_tensor, cor_sparse_tensor, lam, alpha, rho, p, tau, epsilon, pos_missing, pos_test, maxiter)


cor_rate = 0.10
Total iteration: 33
Imputation MAPE: 0.0862123
Imputation RMSE: 22.3963
Imputation MAE: 12.9094

cor_rate = 0.20
Total iteration: 33
Imputation MAPE: 0.087379
Imputation RMSE: 22.3248
Imputation MAE: 13.0158

cor_rate = 0.30
Total iteration: 33
Imputation MAPE: 0.0924497
Imputation RMSE: 23.3016
Imputation MAE: 13.516

cor_rate = 0.40
Total iteration: 32
Imputation MAPE: 0.0962132
Imputation RMSE: 23.3616
Imputation MAE: 13.9809

cor_rate = 0.50
Total iteration: 32
Imputation MAPE: 0.101351
Imputation RMSE: 23.4729
Imputation MAE: 14.2306



## License

This work is released under the MIT license.