In [199]:
# Models from Introduction to Algorithmic Marketing
# https://algorithmicweb.wordpress.com/
#
# Nearest-neighbor used-based collaborative filtering

In [200]:
%matplotlib inline
import sympy as sy
import numpy as np
import matplotlib.pyplot as plt

In [201]:
# Input rating matrix
X = None
R = np.array([
    [5, 4, X, 1, 2, 1],
    [4, X, 3, 1, 1, 2],
    [X, 5, 5, X, 3, 3],
    [2, X, 1, 4, 5, 4],
    [2, 2, 2, X, 4, X],
    [1, 2, 1, X, 5, 4]
])

In [202]:
# Define common varibales and helper functions
n, m = R.shape
def user_common_ratings(R, u, v):
    return np.array(list(filter(
        lambda r: all(v is not None for v in r), 
        np.column_stack((R[u,:], R[v,:]))
    ))).T

def known_user_ratings(R, u):
    return [x for x in R[u,:] if x is not None]

def known(r):
    return r is not None

def mean_r(R, u):
    return np.mean(known_user_ratings(R, u))

In [203]:
# Calculate the matrix of user similarities
def similarity(R, u, v):
    I_uv = user_common_ratings(R, u, v)
    mu_u = mean_r(R, u)
    mu_v = mean_r(R, v)
    return sum([ 
          ((I_uv[0,i] - mu_u)*(I_uv[1,i] - mu_v)) / 
          (np.linalg.norm(I_uv[0,:] - mu_u) * np.linalg.norm(I_uv[1,:] - mu_v)) 
          for i in range(I_uv.shape[1])])

user_similarity = np.array([[
    similarity(R, u, v)
    for u in range(n)] for v in range(n)])

In [204]:
print(user_similarity)

[[ 1.     0.875  0.941 -0.797 -0.594 -0.786]
 [ 0.875  1.     0.872 -0.84  -0.81  -0.882]
 [ 0.941  0.872  1.    -0.938 -0.87  -0.92 ]
 [-0.797 -0.84  -0.938  1.     0.86   0.953]
 [-0.594 -0.81  -0.87   0.86   1.     0.947]
 [-0.786 -0.882 -0.92   0.953  0.947  1.   ]]


In [206]:
# Predict ratings based on the user similarities
k = 2 # neighborhood size

def predict_rating(R, u, i):
    # neighbors sorted by similarity
    all_neighbors = np.argsort(user_similarity[u])[::-1]
    # remove neighbors without ratings for i and select top k
    neighbors = list(filter(lambda v: known(R[v,i]) and not v==u, all_neighbors))[:k] 
    mu_u = mean_r(R, u)
    score = 0 
    norm = 0
    print("user %s, item %s <- neighbors %s" % (u, i, neighbors))
    for v in neighbors:
        mu_v = mean_r(R, v)
        score = score + user_similarity[u,v]*(R[v,i] - mu_v)
        norm = norm + abs(user_similarity[u,v])
    return mu_u + score/norm

ratings = np.array([[ R[u,i] if known(R[u,i]) else predict_rating(R, u, i)
   for i in range(m)] for u in range(n)])

print("\nComplete rating matrix:")
print(ratings)

user 0, item 2 <- neighbors [2, 1]
user 1, item 1 <- neighbors [0, 2]
user 2, item 0 <- neighbors [0, 1]
user 2, item 3 <- neighbors [0, 1]
user 3, item 1 <- neighbors [5, 4]
user 4, item 3 <- neighbors [3, 0]
user 4, item 5 <- neighbors [5, 3]
user 5, item 3 <- neighbors [3, 0]

Complete rating matrix:
[[ 5.     4.     3.504  1.     2.     1.   ]
 [ 4.     3.4    3.     1.     1.     2.   ]
 [ 6.111  5.     5.     2.592  3.     3.   ]
 [ 2.     2.647  1.     4.     5.     4.   ]
 [ 2.     2.     2.     3.627  4.     3.614]
 [ 1.     2.     1.     3.762  5.     4.   ]]
