In [3]:
%matplotlib inline
import numpy as np
import pandas as pd


In [4]:
def generate_data_matrix(spatial_size, temporal_size, miss_ratio):
    data_matrix = np.random.randint(1,5,size=(spatial_size,temporal_size))
    origin = data_matrix.copy()
    k = round(spatial_size*temporal_size*miss_ratio)
    ri = np.random.choice(spatial_size*temporal_size,k,replace=False)
    data_matrix.flat[ri] = 0
    return origin,data_matrix

In [5]:
def is_diagonal(matrix):
    result = matrix.T == matrix
    if type(result) == bool:
        return result
    else:
        return result.all()

In [6]:
def get_distance_matrix(data_matrix,dis_loc=None):
    spatial_length = data_matrix.shape[0]
    # print(spatial_length)
    dis_matrix = np.random.rand(spatial_length,spatial_length)
    dis_matrix += dis_matrix.T
    np.fill_diagonal(dis_matrix,0)
    return dis_matrix

In [7]:
def get_time_interval_matrix(data_matrix):
    temporal_length = data_matrix.shape[1]
    print(temporal_length)
    ti_matrix = np.random.rand(temporal_length,temporal_length)
    


In [None]:
origin,dm = generate_data_matrix(5,4,0.3)
print(origin)
print(dm)
sum = 0 
for i in dm.flatten():
    if i == 0:
        sum +=1
print(sum)

In [None]:
# Global Spatial View - IDW (Inverse distance weighting)
# Global means using all sensors in the sensor network
# Local means using a subset of sensors in the sensor network

def idw(data_matrix,alpha=1,distance_matrix=None):
    return_matrix = data_matrix.copy().astype(float)
    # return_matrix = data_matrix
    if distance_matrix is not None:
        spaital_length,temporal_length = data_matrix.shape

        for i in range(temporal_length):
            tmp = data_matrix.T[i]
            # print(tmp)
            zero_index = np.where(tmp == 0)[0]
            true_index = np.where(tmp != 0)[0]
            res_array = np.zeros_like(zero_index,dtype=float)
            for j,mis in enumerate(zero_index):                
                dis_array = np.zeros_like(true_index,dtype=np.float)
                for ii,solid in enumerate(true_index):
                    dis_array[ii] = distance_matrix[mis][solid]
                dis_array = np.power(dis_array,-alpha)
                res_array[j] = np.dot(dis_array,tmp[true_index]) / np.sum(dis_array)

            return_matrix.T[i][zero_index] = res_array
    else:
        pass
    return return_matrix

origin,dm = generate_data_matrix(5,4,0.3)
ds_matrix = get_distance_matrix(dm,0)
res = idw(dm,distance_matrix=ds_matrix)
print(res-dm)
# print(dm)


In [None]:
# Global Temporal View -SES(Simple Exponential Smoothing)

def ses(data_matrix,beta=0.5,time_interval_array=None):
    return_matrix = data_matrix.copy().astype(float)
    
    spatial_length,temporal_length = data_matrix.shape
    for i in range(spatial_length):
        tmp = data_matrix[i]
        # print(tmp)
        zero_index = np.where(tmp == 0)[0]
        true_index = np.where(tmp != 0)[0]
        res_array = np.zeros_like(zero_index,dtype=float)
        # print('-')
        for j,mis in enumerate(zero_index):
            # print('--')
            time_interval = abs(true_index-mis)
            # print(time_interval)
            # coef_array = time_interval
            coef_array = np.power(1-beta,time_interval-1)*beta
            res_array[j] = np.dot(coef_array,tmp[true_index]) / (np.sum(coef_array))
            # print('res',res_array[j])
        # print(zero_index)
        return_matrix[i][zero_index] = res_array
    return return_matrix

origin,dm = generate_data_matrix(5,4,0.3)
ses(dm)-dm


In [96]:
# Local Temporal View - ICF (Item-based Collaborative filtering)

def icf(data_matrix,window=5):
    return_matrix = data_matrix.copy().astype(float)

    if data_matrix is not None:
        spatial_length, temporal_length = data_matrix.shape
        zero_index = np.where(data_matrix.flatten() == 0)[0]

        res_array = np.zeros_like(zero_index,dtype=float)

        temporal_index = zero_index % temporal_length
        spatial_index = zero_index / temporal_length

        spatial_index = spatial_index.astype(int)
        left_margin = temporal_index - (window-1)/2
        right_margin = temporal_index + (window -1)/2

        left_margin[np.where(left_margin < 0)[0]] = 0
        right_margin[np.where(right_margin >= temporal_length)] = temporal_length-1
        left_margin = left_margin.astype(int)
        right_margin = right_margin.astype(int)
        
        index = np.arange(temporal_length)
        for i in range(len(zero_index)):
            sim_array = np.zeros(temporal_length)
            sim_array[left_margin[i]:right_margin[i]+1] = 1
            sim_array[temporal_index[i]] = 0

            sim_array_index = np.where(sim_array == 1)[0]
            
            for ii in sim_array_index :
                sim_array[ii] = sim_temporal(data_matrix,temporal_index[i],ii)

            

            res_array[i] = np.dot(sim_array,data_matrix[spatial_index[i]]) / np.sum(sim_array)


        return_matrix.flat[zero_index] = res_array

    return return_matrix

def sim_temporal(sub_data,source_index,target_index):
    source_non_zero_index = np.where(sub_data.T[source_index] != 0)[0]
    target_non_zero_index = np.where(sub_data.T[target_index] != 0)[0]

    intersect = np.intersect1d(source_non_zero_index,target_non_zero_index)

    diff1 = sub_data.T[source_index][intersect]
    diff2 = sub_data.T[target_index][intersect]

    diff_count = np.sqrt(len(diff1-diff2))
    diff_sum = 1 / np.sqrt(np.sum(np.power(diff1-diff2,2)))
    sim = diff_count*diff_sum

    if np.isinf(sim):
        sim = 999
    if np.isnan(sim):
        sim = 0
    return sim


origin,dm = generate_data_matrix(5,4,0.2)
icf(dm,3)-dm


array([[0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 2.61257411, 0.        ],
       [0.        , 0.8       , 0.38742589, 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 2.61257411, 0.        ]])

In [None]:
# Local Spatial View - UCF (User-based Collaborative filtering)

def ucf(data_matrix,window=5):
    return_matrix = data_matrix.copy().astype(float) 
    if data_matrix is not None:
        spatial_length, temporal_length = data_matrix.shape
        zero_index = np.where(data_matrix.flatten() == 0)[0]
        
        res_array = np.zeros_like(zero_index,dtype=float)
        temporal_index = zero_index % temporal_length
        spatial_index = zero_index / temporal_length
        spatial_index = spatial_index.astype(int)
        
        left_margin = temporal_index - (window-1)/2
        right_margin = temporal_index + (window -1)/2

        left_margin[np.where(left_margin < 0)[0]] = 0
        right_margin[np.where(right_margin >= temporal_length)] = temporal_length-1
        left_margin = left_margin.astype(int)
        right_margin = right_margin.astype(int)
        for i in range(len(zero_index)):

            sub = ((data_matrix.T[left_margin[i]:right_margin[i]+1]).T)
            sim_array = np.ones(spatial_length)
            sim_array[spatial_index[i]] = 0
            sim_array_index = np.where(sim_array != 0)[0]
            for ii in sim_array_index:
                sim_array[ii] = sim_local(sub,spatial_index[i],ii)
            
            spatial_data_for_certain_time = data_matrix.T[temporal_index[i]]
            res_array[i] = np.dot(spatial_data_for_certain_time,sim_array) / np.sum(sim_array)
            
        return_matrix.flat[zero_index] = res_array
    return return_matrix

def sim_local(sub_data,spatial_index,current_index):

    sp_non_zero_index = np.where(sub_data[spatial_index] != 0)[0]
    ci_non_zero_index = np.where(sub_data[current_index] != 0)[0]

    intersect = np.intersect1d(sp_non_zero_index,ci_non_zero_index)

    diff1 = sub_data[spatial_index][intersect]
    diff2 = sub_data[current_index][intersect]
    diff_count = np.sqrt(len(diff1-diff2))
    diff_sum = 1 / np.sqrt(np.sum(np.power(diff1-diff2,2)))
    sim = diff_count*diff_sum
    if np.isinf(sim):
        sim = 999
    if np.isnan(sim):
        sim = 0
    return sim


origin,dm = generate_data_matrix(5,4,0.2)
ucf(dm,3)-dm