In [None]:
import numpy as np

In [53]:
movies = [
    "The Shawshank Redemption",
    "The Godfather",
    "The Hunger Games",
    "Pulp Fiction",
    "Transformers: Revenge of the Fallen",
    "Frozen",
    "Batman v Superman: Dawn of Justice",
    "Cats (2019)"
]

users = ["Bart", "Anton", "Daniel", "Bob", "Lucas"]

def get_movie_index(movie_name):
    return movies.index(movie_name)

def get_movie_name(movie_index):
    return movies[movie_index]

def get_user_index(user):
    return users.index(user)

def get_user_name(user_index):
    return users[user_index]

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_a = np.linalg.norm(vec1)
    norm_b = np.linalg.norm(vec2)
    return dot_product / (norm_a * norm_b)

def max_n_indices(vec, n):
    return np.argsort(vec)[-n:]

def max_n_values(vec, n):
    return np.sort(vec)[-n:]

# Input data
ratings = np.array([
    [2, -1, 2, -2, 2],
    [1, -1, 2, 0, 2],
    [1, np.nan, 1, -2, 1],
    [1, -2, 1, -1, 1],
    [-1, np.nan, 0, -2, -1],
    [0, np.nan, 0, -1, -1],
    [-2, 2, -1, 1, 0],
    [-2, np.nan, -2, -1, -2]
], dtype=float)

# Assuming 0s where ratings are 'N/A'
ratings[np.isnan(ratings)] = 0

In [43]:
ratings

array([[ 2., -1.,  2., -2.,  2.],
       [ 1., -1.,  2.,  0.,  2.],
       [ 1.,  0.,  1., -2.,  1.],
       [ 1., -2.,  1., -1.,  1.],
       [-1.,  0.,  0., -2., -1.],
       [ 0.,  0.,  0., -1., -1.],
       [-2.,  2., -1.,  1.,  0.],
       [-2.,  0., -2., -1., -2.]])

In [44]:
target_user = 1 # Index of the target user

target_movie = 2  # Index of the target movie

N = 2  # Number of similar users to consider

# Preparing the target vector and the modified ratings matrix
target_vector = np.delete(ratings[:, target_user], target_movie)
modified_ratings = np.delete(ratings, target_movie, axis=0)

In [45]:
target_vector

array([-1., -1., -2.,  0.,  0.,  2.,  0.])

In [46]:
modified_ratings

array([[ 2., -1.,  2., -2.,  2.],
       [ 1., -1.,  2.,  0.,  2.],
       [ 1., -2.,  1., -1.,  1.],
       [-1.,  0.,  0., -2., -1.],
       [ 0.,  0.,  0., -1., -1.],
       [-2.,  2., -1.,  1.,  0.],
       [-2.,  0., -2., -1., -2.]])

In [47]:
# Compute cosine similarity between the target user and all others
similarities = np.array([cosine_similarity(target_vector, modified_ratings[:, i]) for i in range(modified_ratings.shape[1]) if i != target_user])
similarities = np.insert(similarities, target_user, -999)  # Setting the target user's similarity to self as invalid


prob_similarities = softmax(similarities)

In [48]:
similarities

array([-7.34846923e-01, -9.99000000e+02, -6.76123404e-01,  5.47722558e-01,
       -4.89897949e-01])

In [49]:
prob_similarities

array([0.14401064, 0.        , 0.15272069, 0.51928698, 0.18398168])

In [50]:
top_n_indices = max_n_indices(prob_similarities, N)
top_n_similarities = prob_similarities[top_n_indices]
top_n_ratings = ratings[target_movie, top_n_indices]

predicted_rating = np.dot(top_n_similarities, top_n_ratings) / np.sum(top_n_similarities) if np.sum(top_n_similarities) != 0 else 0

print(f"Predicted rating for user {get_user_name(target_user)} for movie {get_movie_name(target_movie)} is: {predicted_rating:.2f}")

Predicted rating for user Anton for movie The Hunger Games is: -1.22


Above, step by step execution, now define a function to compute the predicted rating for a given user and movie.

In [57]:
def predict_rating(target_user, target_movie, N):
    target_vector = np.delete(ratings[:, target_user], target_movie)
    modified_ratings = np.delete(ratings, target_movie, axis=0)

    similarities = np.array([cosine_similarity(target_vector, modified_ratings[:, i]) for i in range(modified_ratings.shape[1]) if i != target_user])
    similarities = np.insert(similarities, target_user, -999)  # Setting the target user's similarity to self as invalid


    prob_similarities = softmax(similarities)

    top_n_indices = max_n_indices(prob_similarities, N)
    top_n_similarities = prob_similarities[top_n_indices]
    top_n_ratings = ratings[target_movie, top_n_indices]
    
    predicted_rating = np.dot(top_n_similarities, top_n_ratings) / np.sum(top_n_similarities) if np.sum(top_n_similarities) != 0 else 0
    
    print(f"Predicted rating for user {get_user_name(target_user)} for movie {get_movie_name(target_movie)} is: {predicted_rating:.2f}")

In [60]:
predict_rating(1,7,2)

Predicted rating for user Anton for movie Cats (2019) is: -1.26
