In [1]:
%matplotlib inline
import numpy as np
import pandas as pd


In [2]:
def generate_data_matrix(spatial_size, temporal_size, miss_ratio=0):
    data_matrix = np.random.randint(1,5,size=(spatial_size,temporal_size))
    origin = data_matrix.copy()
    k = round(spatial_size*temporal_size*miss_ratio)
    ri = np.random.choice(spatial_size*temporal_size,k,replace=False)
    data_matrix.flat[ri] = 0
    return origin,data_matrix

In [3]:
def is_diagonal(matrix):
    result = matrix.T == matrix
    if type(result) == bool:
        return result
    else:
        return result.all()

In [4]:
def get_distance_matrix(data_matrix,linear=False):
    if not linear:
        spatial_length = data_matrix.shape[0]
        # print(spatial_length)
        dis_matrix = np.random.rand(spatial_length,spatial_length)
        dis_matrix += dis_matrix.T
        np.fill_diagonal(dis_matrix,0)
        return dis_matrix
    else:
        spatial_length = data_matrix.shape[0]
        dis_matrix = np.zeros((spatial_length,spatial_length))
        for i in range(spatial_length):
            dis_matrix[i][i:] = np.arange(0,spatial_length-i)
        dis_matrix = dis_matrix + dis_matrix.T
        return dis_matrix
origin,dm = generate_data_matrix(4,3)
get_distance_matrix(dm,True)



array([[0., 1., 2., 3.],
       [1., 0., 1., 2.],
       [2., 1., 0., 1.],
       [3., 2., 1., 0.]])

In [5]:
def get_time_interval_matrix(data_matrix):
    temporal_length = data_matrix.shape[1]
    print(temporal_length)
    ti_matrix = np.random.rand(temporal_length,temporal_length)
    


In [6]:
origin,dm = generate_data_matrix(5,4,0.3)
print(origin)
print(dm)
sum = 0 
for i in dm.flatten():
    if i == 0:
        sum +=1
print(sum)

[[3 2 2 2]
 [3 4 3 1]
 [3 1 4 1]
 [3 4 4 4]
 [3 2 2 3]]
[[3 2 0 2]
 [3 0 3 0]
 [3 1 0 1]
 [3 4 0 4]
 [3 0 2 3]]
6


In [7]:
# Global Spatial View - IDW (Inverse distance weighting)
# Global means using all sensors in the sensor network
# Local means using a subset of sensors in the sensor network

def idw(data_matrix,alpha=1,distance_matrix=None):
    return_matrix = data_matrix.copy().astype(float)
    # return_matrix = data_matrix
    if distance_matrix is not None:
        spaital_length,temporal_length = data_matrix.shape

        for i in range(temporal_length):
            tmp = data_matrix.T[i]
            # print(tmp)
            zero_index = np.where(tmp == 0)[0]
            true_index = np.where(tmp != 0)[0]
            res_array = np.zeros_like(zero_index,dtype=float)
            for j,mis in enumerate(zero_index):                
                dis_array = np.zeros_like(true_index,dtype=np.float)
                for ii,solid in enumerate(true_index):
                    dis_array[ii] = distance_matrix[mis][solid]
                dis_array = np.power(dis_array,-alpha)
                res_array[j] = np.dot(dis_array,tmp[true_index]) / np.sum(dis_array)
                if np.isinf(res_array[j]) or np.isnan(res_array[j]):
                    res_array[j] = 0

            return_matrix.T[i][zero_index] = res_array
    else:
        pass
    return return_matrix

origin,dm = generate_data_matrix(5,4,0.2)
ds_matrix = get_distance_matrix(dm)
res = idw(dm,distance_matrix=ds_matrix)
print(res-dm)
# print(dm)


[[3.13346882 0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [3.31580956 0.         0.         2.48541443]
 [0.         0.         2.04910325 0.        ]
 [0.         0.         0.         0.        ]]


In [8]:
# Global Temporal View -SES(Simple Exponential Smoothing)

def ses(data_matrix,beta=0.5,time_interval_array=None):
    return_matrix = data_matrix.copy().astype(float)
    
    spatial_length,temporal_length = data_matrix.shape
    for i in range(spatial_length):
        tmp = data_matrix[i]
        # print(tmp)
        zero_index = np.where(tmp == 0)[0]
        true_index = np.where(tmp != 0)[0]
        res_array = np.zeros_like(zero_index,dtype=float)
        # print('-')
        for j,mis in enumerate(zero_index):
            # print('--')
            time_interval = abs(true_index-mis)
            # print(time_interval)
            # coef_array = time_interval
            coef_array = np.power(1-beta,time_interval-1)*beta
            res_array[j] = np.dot(coef_array,tmp[true_index]) / (np.sum(coef_array))
            if np.isinf(res_array[j]) or np.isnan(res_array[j]):
                res_array[j] = 0
            # print('res',res_array[j])
        # print(zero_index)
        return_matrix[i][zero_index] = res_array
    return return_matrix

origin,dm = generate_data_matrix(5,4,0.2)
ses(dm)-dm


array([[3.71428571, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [2.28571429, 0.        , 0.        , 0.        ],
       [3.33333333, 3.33333333, 0.        , 0.        ]])

In [9]:
# Local Temporal View - ICF (Item-based Collaborative filtering)

def icf(data_matrix,window=5):
    return_matrix = data_matrix.copy().astype(float)

    if data_matrix is not None:
        spatial_length, temporal_length = data_matrix.shape
        zero_index = np.where(data_matrix.flatten() == 0)[0]

        res_array = np.zeros_like(zero_index,dtype=float)

        temporal_index = zero_index % temporal_length
        spatial_index = zero_index / temporal_length

        spatial_index = spatial_index.astype(int)
        left_margin = temporal_index - (window-1)/2
        right_margin = temporal_index + (window -1)/2

        left_margin[np.where(left_margin < 0)[0]] = 0
        right_margin[np.where(right_margin >= temporal_length)] = temporal_length-1
        left_margin = left_margin.astype(int)
        right_margin = right_margin.astype(int)
        
        index = np.arange(temporal_length)
        for i in range(len(zero_index)):
            sim_array = np.zeros(temporal_length)
            sim_array[left_margin[i]:right_margin[i]+1] = 1
            sim_array[temporal_index[i]] = 0

            sim_array_index = np.where(sim_array == 1)[0]
            
            for ii in sim_array_index :
                sim_array[ii] = sim_temporal(data_matrix,temporal_index[i],ii)

            res_array[i] = np.dot(sim_array,data_matrix[spatial_index[i]]) / np.sum(sim_array)
            if np.isinf(res_array[i]) or np.isnan(res_array[i]):
                res_array[i] = 0


        return_matrix.flat[zero_index] = res_array

    return return_matrix

def sim_temporal(sub_data,source_index,target_index):
    source_non_zero_index = np.where(sub_data.T[source_index] != 0)[0]
    target_non_zero_index = np.where(sub_data.T[target_index] != 0)[0]

    intersect = np.intersect1d(source_non_zero_index,target_non_zero_index)

    diff1 = sub_data.T[source_index][intersect]
    diff2 = sub_data.T[target_index][intersect]

    diff_count = np.sqrt(len(diff1-diff2))
    diff_sum = 1 / np.sqrt(np.sum(np.power(diff1-diff2,2)))
    sim = diff_count*diff_sum

    if np.isinf(sim):
        sim = 999
    if np.isnan(sim):
        sim = 0
    return sim


origin,dm = generate_data_matrix(5,4,0.2)
icf(dm,3)-dm


array([[0. , 2.5, 0. , 0. ],
       [0. , 2.5, 0. , 2. ],
       [0. , 2.5, 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ]])

In [10]:
# Local Spatial View - UCF (User-based Collaborative filtering)

def ucf(data_matrix,window=5):
    return_matrix = data_matrix.copy().astype(float) 
    if data_matrix is not None:
        spatial_length, temporal_length = data_matrix.shape
        zero_index = np.where(data_matrix.flatten() == 0)[0]
        
        res_array = np.zeros_like(zero_index,dtype=float)
        temporal_index = zero_index % temporal_length
        spatial_index = zero_index / temporal_length
        spatial_index = spatial_index.astype(int)

        left_margin = temporal_index - (window-1)/2
        right_margin = temporal_index + (window -1)/2

        left_margin[np.where(left_margin < 0)[0]] = 0
        right_margin[np.where(right_margin >= temporal_length)] = temporal_length-1
        left_margin = left_margin.astype(int)
        right_margin = right_margin.astype(int)
        for i in range(len(zero_index)):

            sub = ((data_matrix.T[left_margin[i]:right_margin[i]+1]).T)
            sim_array = np.ones(spatial_length)
            sim_array[spatial_index[i]] = 0
            sim_array_index = np.where(sim_array != 0)[0]
            for ii in sim_array_index:
                sim_array[ii] = sim_local(sub,spatial_index[i],ii)
            
            spatial_data_for_certain_time = data_matrix.T[temporal_index[i]]
            res_array[i] = np.dot(spatial_data_for_certain_time,sim_array) / np.sum(sim_array)
            if np.isinf(res_array[i]) or np.isnan(res_array[i]):
                res_array[i] = 0
            
        return_matrix.flat[zero_index] = res_array
    return return_matrix

def sim_local(sub_data,spatial_index,current_index):

    sp_non_zero_index = np.where(sub_data[spatial_index] != 0)[0]
    ci_non_zero_index = np.where(sub_data[current_index] != 0)[0]

    intersect = np.intersect1d(sp_non_zero_index,ci_non_zero_index)

    diff1 = sub_data[spatial_index][intersect]
    diff2 = sub_data[current_index][intersect]
    diff_count = np.sqrt(len(diff1-diff2))
    diff_sum = 1 / np.sqrt(np.sum(np.power(diff1-diff2,2)))
    sim = diff_count*diff_sum
    if np.isinf(sim):
        sim = 999
    if np.isnan(sim):
        sim = 0
    return sim


origin,dm = generate_data_matrix(5,4,0.2)
ucf(dm,3)-dm

array([[0.        , 3.99671692, 0.        , 0.        ],
       [2.        , 0.        , 0.        , 0.        ],
       [1.5       , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 1.00199867],
       [0.        , 0.        , 0.        , 0.        ]])

In [11]:
def is_block_missing_1d(data_matrix,window):
    flag = False
    # mis-spatial
    spaital_length, temporal_length = data_matrix.shape

    zero_index = np.where(data_matrix.flatten() == 0)[0]
    spatial_index = zero_index / temporal_length
    spatial_index = spatial_index.astype(int)
    temporal_index = zero_index % temporal_length

    left_margin = temporal_index - (window-1)/2
    right_margin = temporal_index + (window -1)/2
    left_margin[np.where(left_margin < 0)[0]] = 0
    right_margin[np.where(right_margin >= temporal_length)] = temporal_length-1
    left_margin = left_margin.astype(int)
    right_margin = right_margin.astype(int)

    for i in range(len(zero_index)):
        sub_w = data_matrix[spatial_index[i]][left_margin[i]:right_margin[i]+1]
        if not sub_w.any() and len(sub_w) == window:
            flag = True
    return flag

def is_block_missing(data_matrix,window=3):
    flag = 0
    if is_block_missing_1d(data_matrix,window):
        flag = flag +1
    if is_block_missing_1d(data_matrix.T,window):
        flag = flag +2
    return flag

mis_dm_s = np.array(([0,0,1,0],[0,2,3,4],[1,6,7,8]))
mis_dm_t = mis_dm_s.T
is_block_missing(mis_dm_s)

0

In [12]:
# ST_MVL (Spatial Temporal Multiview-based learning)

def st_mvl(data_matrix, window=5, alpha=1, beta=0.5,distance_matrix=None):
    if distance_matrix is None:
        distance_matrix = get_distance_matrix(data_matrix,True)

    return_matrix = data_matrix.copy().astype(float)

    zero_index = np.where(return_matrix.flatten() == 0)[0]
    
    if is_block_missing(return_matrix,3) :
        return_matrix = ses(return_matrix,beta)
    dm1 = ucf(return_matrix,window)
    dm2 = icf(return_matrix,window)
    dm3 = idw(return_matrix,alpha,distance_matrix=distance_matrix)
    dm4 = ses(return_matrix,window)

    # weight & b should be learing, while remains here as mean value
    weight = np.ones(4)
    b = 0
    
    for i in zero_index:
        v = (dm1.flat[i],dm2.flat[i],dm3.flat[i],dm4.flat[i])
        print(v)
        return_matrix.flat[i]=(np.dot(v,weight) / np.sum(weight))+b
    return return_matrix


origin,dm = generate_data_matrix(5,4,0.1)
st_mvl(dm,3)-dm

(2.0869510458335268, 2.5241998455109975, 1.84, 0.0)
(2.998667110963012, 1.0, 1.8235294117647056, 4.333333333333333)


array([[0.        , 0.        , 1.61278772, 0.        ],
       [0.        , 0.        , 0.        , 2.53888246],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ]])