In [43]:
import numpy as np
A = np.array([[4,1,1,4,2,3,5,0,4,0],
              [0,4,2,0,3,2,5,0,4,3],
              [2,0,4,5,2,0,1,3,4,2],
              [1,4,0,1,4,5,0,3,1,2],
              [1,2,3,4,0,3,4,4,2,5]])

In [61]:
# user-similarity
def COS(mat):
    NumUsers = np.size(mat, axis=0)
    Sim = np.full((NumUsers, NumUsers), 0.0)
    print('COS calculation start!')
    
    for u in range(0, NumUsers):
        arridx_u = np.where(mat[u,] == 0)
        for v in range(u+1, NumUsers):
            arridx_v = np.where(mat[v,] == 0)
            arridx = np.unique(np.concatenate((arridx_u, arridx_v), axis = None))
            
            U = np.delete(mat[u,], arridx)
            V = np.delete(mat[v,], arridx)
            
            if(np.linalg.norm(U)==0 or np.linalg.norm(V)==0):
                Sim[u,v] = 0
            else:
                Sim[u,v] = np.dot(U,V) / (np.linalg.norm(U) * np.linalg.norm(V))
            Sim[v,u] = Sim[u,v]
    print('COS calculation end!')
    return Sim
    #print(Sim)

In [45]:
COS(A)

COS calculation start!
COS calculation end!
[[0.         0.9165204  0.79442991 0.63943019 0.88069603]
 [0.9165204  0.         0.80671783 0.82956136 0.89472029]
 [0.79442991 0.80671783 0.         0.71842121 0.84181887]
 [0.63943019 0.82956136 0.71842121 0.         0.79530788]
 [0.88069603 0.89472029 0.84181887 0.79530788 0.        ]]


In [62]:
# user-similarity
def PCC(mat):
    NumUsers = np.size(mat, axis=0)
    Sim = np.full((NumUsers, NumUsers), 0.0)
    
    mean = np.nanmean(np.where(mat!=0.0, mat, np.nan), axis=1)
    print('PCC calculation start!')
    
    for u in range(0, NumUsers):
        arridx_u = np.where(mat[u,] == 0)
        for v in range(u+1, NumUsers):
            arridx_v = np.where(mat[v,] == 0)
            arridx = np.unique(np.concatenate((arridx_u, arridx_v), axis = None))
            
            U = np.delete(mat[u,], arridx)-mean[u]
            V = np.delete(mat[v,], arridx)-mean[v]
            
            if(np.linalg.norm(U)==0 or np.linalg.norm(V)==0):
                Sim[u,v] = 0
            else:
                Sim[u,v] = np.dot(U,V) / (np.linalg.norm(U) * np.linalg.norm(V))
            Sim[v,u] = Sim[u,v]
    print('PCC calculation end!')
    return Sim
    #print(Sim)    

In [15]:
PCC(A)

[3.         3.28571429 2.875      2.625      3.11111111]
PCC calculation start!
PCC calculation end!
[[ 0.          0.54935027 -0.2282767  -0.76405745  0.16697993]
 [ 0.54935027  0.         -0.53146801 -0.57985679 -0.04474374]
 [-0.2282767  -0.53146801  0.         -0.48839607 -0.07135479]
 [-0.76405745 -0.57985679 -0.48839607  0.          0.08293712]
 [ 0.16697993 -0.04474374 -0.07135479  0.08293712  0.        ]]


In [63]:
# memory_based CF using similarity measure(COS, PCC)
def basic_CF(mat,sim,k):
    # predicted_rating
    predicted_rating = np.array([[0.0 for col in range(10)] for row in range(5)])
    
    # call similarity
    if(sim == 'COS'):
        Sim = COS(mat)
    elif(sim == 'PCC'):
        Sim = PCC(mat)
    
    
    k_neighbors = np.argsort(-Sim)  # descending order sorting 
    k_neighbors = np.delete(k_neighbors, np.s_[k:], 1)  # slicing top k users
    
    NumUsers = np.size(mat, axis=0)
    
    for u in range(0, NumUsers):
        list_sim = Sim[u, k_neighbors[u,]] # similarity list
        list_rating = mat[k_neighbors[u,],].astype('float64') # rating list
        # predicted rating calculation
        predicted_rating[u,] = np.sum(list_sim.reshape(-1,1) * list_rating, axis=0) / np.sum(list_sim)
    
    return predicted_rating
    
print(basic_CF(A,'COS',2))
print(basic_CF(A,'PCC',2))

COS calculation start!
COS calculation end!
[3.         3.28571429 2.875      2.625      3.11111111]
PCC calculation start!
PCC calculation end!


array([[ 0.23310469,  3.53379062,  2.23310469,  0.93241876,  2.30068593,
         2.23310469,  4.76689531,  0.93241876,  3.53379062,  3.46620938],
       [ 4.        ,  1.        ,  1.        ,  4.        ,  2.        ,
         3.        ,  5.        ,  0.        ,  4.        ,  0.        ],
       [ 1.        ,  2.        ,  3.        ,  4.        , -0.        ,
         3.        ,  4.        ,  4.        ,  2.        ,  5.        ],
       [ 1.        ,  2.        ,  3.        ,  4.        ,  0.        ,
         3.        ,  4.        ,  4.        ,  2.        ,  5.        ],
       [ 3.00442425,  1.99557575,  0.66814142,  3.00442425,  2.66371717,
         3.66371717,  3.34070708,  0.99557575,  3.00442425,  0.66371717]])

In [64]:
# CF algorithm with mean
def basic_mean(mat,sim,k):
    # predicted_rating
    predicted_rating = np.array([[0.0 for col in range(10)] for row in range(5)])
    
    #mean of users rating
    mean = np.nanmean(np.where(mat!=0, mat, np.nan), axis=1)
    
    # call similarity
    if(sim == 'COS'):
        Sim = COS(mat)
    elif(sim == 'PCC'):
        Sim = PCC(mat)
    
    
    k_neighbors = np.argsort(-Sim)  # descending order sorting 
    k_neighbors = np.delete(k_neighbors, np.s_[k:], 1)  # slicing top k users
    
    NumUsers = np.size(mat, axis=0)
    
    for u in range(0, NumUsers):
        list_sim = Sim[u, k_neighbors[u,]] # similarity list
        list_rating = mat[k_neighbors[u,],].astype('float64') # rating list
        list_mean = mean[k_neighbors[u,],] # mean list
        # predicted rating calculation
        denominator = np.sum(list_sim)
        numerator = np.sum(list_sim.reshape(-1,1) * (list_rating-list_mean.reshape(-1,1)), axis=0)
        predicted_rating[u,] = mean[u] + numerator / denominator
    
    return predicted_rating
    
print(basic_mean(A,'COS',2))
print(basic_mean(A,'PCC',2))

COS calculation start!
COS calculation end!
[[ 0.28988047  2.81978035  2.28988047  1.75998059  1.32974698  2.28988047
   4.30981372  1.75998059  2.81978035  3.77991384]
 [ 2.74888141  1.72480939  2.21879139  4.2308274   1.24286341  3.2308274
   4.7368454   2.20675538  3.24286341  2.70073737]
 [ 0.18909227  2.65715391  2.18909227  1.72103063  1.14650779  2.18909227
   4.16780003  1.72103063  2.65715391  3.69973839]
 [-0.08579346  2.44582768  1.91420654  1.3825854   0.95636806  1.91420654
   3.9352873   1.3825854   2.44582768  3.40366616]
 [ 1.95132724  2.47897423  1.47107509  1.95132724  2.47107509  2.46317595
   4.96712552 -0.03287448  3.96712552  1.47897423]]
[3.         3.28571429 2.875      2.625      3.11111111]
PCC calculation start!
PCC calculation end!
[[-0.01190878  3.28877715  1.98809122  0.68740529  2.05567246  1.98809122
   4.52188184  0.68740529  3.28877715  3.22119591]
 [ 4.28571429  1.28571429  1.28571429  4.28571429  2.28571429  3.28571429
   5.28571429  0.28571429  4.28

In [None]:
# CF algorithm with z-score
def basic_zscore(mat,sim,k):
    # predicted_rating
    predicted_rating = np.array([[0.0 for col in range(10)] for row in range(5)])
    
    # mean & std of users rating
    mean = np.nanmean(np.where(mat!=0, mat, np.nan), axis=1)
    std = np.nanstd(np.where(mat!=0, mat, np.nan), axis=1)
    
    # call similarity
    if(sim == 'COS'):
        Sim = COS(mat)
    elif(sim == 'PCC'):
        Sim = PCC(mat)
    
    
    k_neighbors = np.argsort(-Sim)  # descending order sorting 
    k_neighbors = np.delete(k_neighbors, np.s_[k:], 1)  # slicing top k users
    
    NumUsers = np.size(mat, axis=0)
    
    for u in range(0, NumUsers):
        list_sim = Sim[u, k_neighbors[u,]] # similarity list
        list_rating = mat[k_neighbors[u,],].astype('float64') # rating list
        list_mean = mean[k_neighbors[u,],] # mean list
        list_std = std[k_neighbors[u,],] # std list
        # predicted rating calculation
        denominator = np.sum(list_sum)
        numerator = np.sum(list_sim.reshape(-1,1) * ((list_rating-list_mean.reshape(-1,1))/list_std.reshape(-1,1)), axis=0)
        predicted_rating[u,] = mean[u]+std[u]*numerator/denominator
    
    return predicted_rating
    
print(basic_zscore(A,'COS',2))
print(basic_zscore(A,'PCC',2))